diff --git a/changelogs/unreleased/jv-pool-no-dangle.yml b/changelogs/unreleased/jv-pool-no-dangle.yml new file mode 100644 index 0000000000000000000000000000000000000000..766e038f32cf3abbd0ffb6288eda32ebba8fec6b --- /dev/null +++ b/changelogs/unreleased/jv-pool-no-dangle.yml @@ -0,0 +1,5 @@ +--- +title: 'FetchIntoObjectPool: stop creating dangling refs' +merge_request: 1459 +author: +type: fixed diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index a5240fbfd616144448014f14b6577b4ea3dd5c21..1485817855641e1e1f23d867303dce078a83aae6 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -4,12 +4,10 @@ import ( "bufio" "context" "fmt" - "strings" "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/repository" - "gitlab.com/gitlab-org/gitaly/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/internal/helper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) @@ -75,10 +73,6 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos return err } - if err := rescueDanglingObjects(ctx, o); err != nil { - return err - } - packRefs, err := git.Command(ctx, o, "pack-refs", "--all") if err != nil { return err @@ -92,61 +86,22 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos const danglingObjectNamespace = "refs/dangling" -// rescueDanglingObjects creates refs for all dangling objects if finds -// with `git fsck`, which converts those objects from "dangling" to -// "not-dangling". This guards against any object ever being deleted from -// a pool repository. This is a defense in depth against accidental use -// of `git prune`, which could remove Git objects that a pool member -// relies on. There is currently no way for us to reliably determine if -// an object is still used anywhere, so the only safe thing to do is to -// assume that every object _is_ used. -func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { - fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling") - if err != nil { - return err - } - - updater, err := updateref.New(ctx, repo) - if err != nil { - return err - } - - scanner := bufio.NewScanner(fsck) - for scanner.Scan() { - split := strings.SplitN(scanner.Text(), " ", 3) - if len(split) != 3 { - continue - } - - if split[0] != "dangling" { - continue - } - - ref := danglingObjectNamespace + "/" + split[2] - if err := updater.Create(ref, split[2]); err != nil { - return err - } - } - - if err := scanner.Err(); err != nil { - return err - } - - if err := fsck.Wait(); err != nil { - return fmt.Errorf("git fsck: %v", err) - } - - return updater.Wait() -} - func repackPool(ctx context.Context, pool repository.GitRepo) error { - repackArgs := []string{ - "-c", "pack.island=" + sourceRefNamespace + "/heads", - "-c", "pack.island=" + sourceRefNamespace + "/tags", - "-c", "pack.writeBitmapHashCache=true", - "repack", "-aidb", - } - repackCmd, err := git.Command(ctx, pool, repackArgs...) + globalOpts := []git.Option{ + git.ValueFlag{"-c", "pack.island=" + sourceRefNamespace + "/heads"}, + git.ValueFlag{"-c", "pack.island=" + sourceRefNamespace + "/tags"}, + git.ValueFlag{"-c", "pack.writeBitmapHashCache=true"}, + } + + repackCmd, err := git.SafeCmd(ctx, pool, globalOpts, git.SubCmd{ + Name: "repack", + Flags: []git.Option{ + git.Flag{"-ad"}, + git.Flag{"--keep-unreachable"}, // Prevent data loss + git.Flag{"--delta-islands"}, // Performance + git.Flag{"--write-bitmap-index"}, // Performance + }, + }) if err != nil { return err } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index a7ff6ba32081cc2cd684b7cdc17d4481c2a35030..3c2cb70eccc9816b8e7ce2347be8f07e034d644f 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -3,6 +3,7 @@ package objectpool import ( "fmt" "io/ioutil" + "os/exec" "path/filepath" "strings" "testing" @@ -13,7 +14,13 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) -func TestFetchFromOriginDangling(t *testing.T) { +const ( + existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" + existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" + existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" +) + +func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { source, _, cleanup := testhelper.NewTestRepo(t) defer cleanup() @@ -25,12 +32,6 @@ func TestFetchFromOriginDangling(t *testing.T) { require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") - const ( - existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" - existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" - existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" - ) - // We want to have some objects that are guaranteed to be dangling. Use // random data to make each object unique. nonce, err := text.RandomHex(4) @@ -73,16 +74,48 @@ func TestFetchFromOriginDangling(t *testing.T) { require.Contains(t, fsckBeforeLines, l, "test setup sanity check") } - // We expect this second run to convert the dangling objects into - // non-dangling objects. - require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") + // Make sure dangling objects are not loose + testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "repack", "-adk")...) + + objectsDir := filepath.Join(pool.FullPath(), "objects") + packPrefix := filepath.Join(objectsDir, "pack/pack-") + infoPrefix := filepath.Join(objectsDir, "info") + + findOut := text.ChompBytes(testhelper.MustRunCommand(t, nil, "find", objectsDir, "-type", "f")) + for _, f := range strings.Split(findOut, "\n") { + if strings.HasPrefix(f, packPrefix) || strings.HasPrefix(f, infoPrefix) { + continue + } + + t.Fatalf("%s does not look like a packfile or info file", f) + } + + unreachableObjectsExist := func(msg string) { + for _, oid := range []string{newBlob, newTree, newCommit, newTag} { + check := exec.Command("git", "cat-file", "-e", oid) + check.Dir = pool.FullPath() + require.NoError(t, check.Run(), "%s: object %s must still exist", msg, oid) + } + } + + unreachableObjectsExist("verify test setup") - refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)") - refsAfter := testhelper.MustRunCommand(t, nil, "git", refsArgs...) - refsAfterLines := strings.Split(string(refsAfter), "\n") - for _, id := range []string{newBlob, newTree, newCommit, newTag} { - require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) + // Call function twice in case ordering effects hide deletion + for i := 0; i < 2; i++ { + require.NoError(t, pool.FetchFromOrigin(ctx, source), "fetch into pool to see if objects stay around %d", i) } + + unreachableObjectsExist("after FetchFromOrigin") +} + +func listDanglingRefs(t *testing.T, pool *ObjectPool) []string { + forEachRefArgs := []string{"-C", pool.FullPath(), "for-each-ref", "--format=%(refname)", "refs/dangling"} + dangling := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", forEachRefArgs...)) + if len(dangling) == 0 { + return nil + } + + return strings.Split(dangling, "\n") } func TestFetchFromOriginDeltaIslands(t *testing.T) {