From 6924eedf962b01a4a4f9761573e1a766943c9e99 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:08:15 +0200 Subject: [PATCH 01/13] Stop creating dangling refs, and remove existing --- internal/git/objectpool/fetch.go | 38 +++++----------- internal/git/objectpool/fetch_test.go | 63 ++++++++------------------- 2 files changed, 29 insertions(+), 72 deletions(-) diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index a5240fbfd61..bbb97f6b014 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -4,7 +4,6 @@ import ( "bufio" "context" "fmt" - "strings" "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" @@ -75,7 +74,7 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos return err } - if err := rescueDanglingObjects(ctx, o); err != nil { + if err := removeDanglingRefs(ctx, o); err != nil { return err } @@ -92,16 +91,13 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos const danglingObjectNamespace = "refs/dangling" -// rescueDanglingObjects creates refs for all dangling objects if finds -// with `git fsck`, which converts those objects from "dangling" to -// "not-dangling". This guards against any object ever being deleted from -// a pool repository. This is a defense in depth against accidental use -// of `git prune`, which could remove Git objects that a pool member -// relies on. There is currently no way for us to reliably determine if -// an object is still used anywhere, so the only safe thing to do is to -// assume that every object _is_ used. -func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { - fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling") +// In https://gitlab.com/gitlab-org/gitaly/merge_requests/1297 we +// introduced a mechanism to protect dangling objects in a pool from +// being removed by a manual 'git prune' command. It turned out that +// created way too many refs, so now we want to make sure no such refs +// are left behind. +func removeDanglingRefs(ctx context.Context, repo repository.GitRepo) error { + forEachRef, err := git.Command(ctx, repo, "for-each-ref", "--format=%(refname)", danglingObjectNamespace) if err != nil { return err } @@ -111,19 +107,9 @@ func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { return err } - scanner := bufio.NewScanner(fsck) + scanner := bufio.NewScanner(forEachRef) for scanner.Scan() { - split := strings.SplitN(scanner.Text(), " ", 3) - if len(split) != 3 { - continue - } - - if split[0] != "dangling" { - continue - } - - ref := danglingObjectNamespace + "/" + split[2] - if err := updater.Create(ref, split[2]); err != nil { + if err := updater.Delete(scanner.Text()); err != nil { return err } } @@ -132,8 +118,8 @@ func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { return err } - if err := fsck.Wait(); err != nil { - return fmt.Errorf("git fsck: %v", err) + if err := forEachRef.Wait(); err != nil { + return fmt.Errorf("git for-each-ref: %v", err) } return updater.Wait() diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index a7ff6ba3208..3ad001f0064 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -1,7 +1,6 @@ package objectpool import ( - "fmt" "io/ioutil" "path/filepath" "strings" @@ -13,7 +12,7 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) -func TestFetchFromOriginDangling(t *testing.T) { +func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { source, _, cleanup := testhelper.NewTestRepo(t) defer cleanup() @@ -31,58 +30,30 @@ func TestFetchFromOriginDangling(t *testing.T) { existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" ) - // We want to have some objects that are guaranteed to be dangling. Use - // random data to make each object unique. - nonce, err := text.RandomHex(4) - require.NoError(t, err) - + // Simulate "dangling refs" as created by https://gitlab.com/gitlab-org/gitaly/merge_requests/1297 baseArgs := []string{"-C", pool.FullPath()} - - // A blob with random contents should be unique. - newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") - newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) - - // A tree with a randomly named blob entry should be unique. - newTreeArgs := append(baseArgs, "mktree") - newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) - newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) - - // A commit with a random message should be unique. - newCommitArgs := append(baseArgs, "commit-tree", existingTree) - newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) - - // A tag with random hex characters in its name should be unique. - newTagName := "tag-" + nonce - newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) - testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) - newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) - - // `git tag` automatically creates a ref, so our new tag is not dangling. - // Deleting the ref should fix that. - testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) - - fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) - fsckBeforeLines := strings.Split(string(fsckBefore), "\n") - - for _, l := range []string{ - fmt.Sprintf("dangling blob %s", newBlob), - fmt.Sprintf("dangling tree %s", newTree), - fmt.Sprintf("dangling commit %s", newCommit), - fmt.Sprintf("dangling tag %s", newTag), - } { - require.Contains(t, fsckBeforeLines, l, "test setup sanity check") + for _, oid := range []string{existingTree, existingCommit, existingBlob} { + args := append(baseArgs, "update-ref", "refs/dangling/"+oid, oid) + testhelper.MustRunCommand(t, nil, "git", args...) } + require.Len(t, listDanglingRefs(t, pool), 3, "test setup sanity check") + + require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") // We expect this second run to convert the dangling objects into // non-dangling objects. require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") + require.Empty(t, listDanglingRefs(t, pool), "dangling refs should be gone") +} - refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)") - refsAfter := testhelper.MustRunCommand(t, nil, "git", refsArgs...) - refsAfterLines := strings.Split(string(refsAfter), "\n") - for _, id := range []string{newBlob, newTree, newCommit, newTag} { - require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) +func listDanglingRefs(t *testing.T, pool *ObjectPool) []string { + forEachRefArgs := []string{"-C", pool.FullPath(), "for-each-ref", "--format=%(refname)", "refs/dangling"} + dangling := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", forEachRefArgs...)) + if len(dangling) == 0 { + return nil } + + return strings.Split(dangling, "\n") } func TestFetchFromOriginDeltaIslands(t *testing.T) { -- GitLab From 2e3f2949535cf7c8c191e1a8431a9096a1d1778f Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:50:34 +0200 Subject: [PATCH 02/13] Prevent repack from deleting unreachable objects --- internal/git/objectpool/fetch.go | 2 +- internal/git/objectpool/fetch_test.go | 79 +++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index bbb97f6b014..76d4d177705 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -130,7 +130,7 @@ func repackPool(ctx context.Context, pool repository.GitRepo) error { "-c", "pack.island=" + sourceRefNamespace + "/heads", "-c", "pack.island=" + sourceRefNamespace + "/tags", "-c", "pack.writeBitmapHashCache=true", - "repack", "-aidb", + "repack", "-aidbk", } repackCmd, err := git.Command(ctx, pool, repackArgs...) if err != nil { diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 3ad001f0064..ab4ab8bccc3 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -1,7 +1,9 @@ package objectpool import ( + "fmt" "io/ioutil" + "os/exec" "path/filepath" "strings" "testing" @@ -12,6 +14,12 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) +const ( + existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" + existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" + existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" +) + func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { source, _, cleanup := testhelper.NewTestRepo(t) defer cleanup() @@ -24,14 +32,9 @@ func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") - const ( - existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" - existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" - existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" - ) + baseArgs := []string{"-C", pool.FullPath()} // Simulate "dangling refs" as created by https://gitlab.com/gitlab-org/gitaly/merge_requests/1297 - baseArgs := []string{"-C", pool.FullPath()} for _, oid := range []string{existingTree, existingCommit, existingBlob} { args := append(baseArgs, "update-ref", "refs/dangling/"+oid, oid) testhelper.MustRunCommand(t, nil, "git", args...) @@ -46,6 +49,70 @@ func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { require.Empty(t, listDanglingRefs(t, pool), "dangling refs should be gone") } +func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { + source, _, cleanup := testhelper.NewTestRepo(t) + defer cleanup() + + pool, err := NewObjectPool(source.StorageName, testhelper.NewTestObjectPoolName(t)) + require.NoError(t, err) + + ctx, cancel := testhelper.Context() + defer cancel() + + require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") + + // We want to have some objects that are guaranteed to be dangling. Use + // random data to make each object unique. + nonce, err := text.RandomHex(4) + require.NoError(t, err) + + baseArgs := []string{"-C", pool.FullPath()} + + // A blob with random contents should be unique. + newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") + newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) + // A tree with a randomly named blob entry should be unique. + newTreeArgs := append(baseArgs, "mktree") + newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) + newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) + + // A commit with a random message should be unique. + newCommitArgs := append(baseArgs, "commit-tree", existingTree) + newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) + + // A tag with random hex characters in its name should be unique. + newTagName := "tag-" + nonce + newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) + testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) + newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) + + // `git tag` automatically creates a ref, so our new tag is not dangling. + // Deleting the ref should fix that. + testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) + + unreachableObjectsExist := func(msg string) { + for _, oid := range []string{newBlob, newTree, newCommit, newTag} { + check := exec.Command("git", "cat-file", "-e", oid) + check.Dir = pool.FullPath() + require.NoError(t, check.Run(), "%s: object %s must still exist", msg, oid) + } + } + + unreachableObjectsExist("setup, before repack") + + // Make sure dangling objects are not loose + testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "repack", "-adk")...) + + unreachableObjectsExist("setup, after repack") + + // Call function twice in case ordering effects hide deletion + for i := 0; i < 2; i++ { + require.NoError(t, pool.FetchFromOrigin(ctx, source), "fetch into pool to see if objects stay around %d", i) + } + + unreachableObjectsExist("after FetchFromOrigin") +} + func listDanglingRefs(t *testing.T, pool *ObjectPool) []string { forEachRefArgs := []string{"-C", pool.FullPath(), "for-each-ref", "--format=%(refname)", "refs/dangling"} dangling := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", forEachRefArgs...)) -- GitLab From 459ff043e081f28d35fd7206c29da20097c68862 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:51:58 +0200 Subject: [PATCH 03/13] fix comment --- internal/git/objectpool/fetch_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index ab4ab8bccc3..53e3361f4cc 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -43,8 +43,7 @@ func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") - // We expect this second run to convert the dangling objects into - // non-dangling objects. + // We expect this second run to remove all refs under refs/dangling require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") require.Empty(t, listDanglingRefs(t, pool), "dangling refs should be gone") } -- GitLab From dbd87c628e521ce967fe1e86d68059e44b4f5fb6 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:52:23 +0200 Subject: [PATCH 04/13] whitespace --- internal/git/objectpool/fetch_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 53e3361f4cc..1bbea268506 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -70,6 +70,7 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { // A blob with random contents should be unique. newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) + // A tree with a randomly named blob entry should be unique. newTreeArgs := append(baseArgs, "mktree") newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) -- GitLab From ab00b76d2a032b13e66e6e9dd06df8392bf17cf1 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:56:58 +0200 Subject: [PATCH 05/13] Add back fsck sanity check --- internal/git/objectpool/fetch_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 1bbea268506..354a79ba277 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -90,6 +90,17 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { // Deleting the ref should fix that. testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) + fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) + fsckBeforeLines := strings.Split(string(fsckBefore), "\n") + for _, l := range []string{ + fmt.Sprintf("dangling blob %s", newBlob), + fmt.Sprintf("dangling tree %s", newTree), + fmt.Sprintf("dangling commit %s", newCommit), + fmt.Sprintf("dangling tag %s", newTag), + } { + require.Contains(t, fsckBeforeLines, l, "test setup sanity check") + } + unreachableObjectsExist := func(msg string) { for _, oid := range []string{newBlob, newTree, newCommit, newTag} { check := exec.Command("git", "cat-file", "-e", oid) -- GitLab From 704e63e30d7595d30d53659c1f4607d28ecb0e53 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 12:57:35 +0200 Subject: [PATCH 06/13] whitespace --- internal/git/objectpool/fetch_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 354a79ba277..e1f58ac3cea 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -92,6 +92,7 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) fsckBeforeLines := strings.Split(string(fsckBefore), "\n") + for _, l := range []string{ fmt.Sprintf("dangling blob %s", newBlob), fmt.Sprintf("dangling tree %s", newTree), -- GitLab From f001163ee6c3e814ac90dab82ecaa604c64cc17e Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 14:58:10 +0200 Subject: [PATCH 07/13] Check test setup --- internal/git/objectpool/fetch_test.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index e1f58ac3cea..ca6dceea695 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "os/exec" "path/filepath" + "regexp" "strings" "testing" @@ -114,9 +115,21 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { // Make sure dangling objects are not loose testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "repack", "-adk")...) - unreachableObjectsExist("setup, after repack") + objectsDir := filepath.Join(pool.FullPath(), "objects") + packRegex, err := regexp.Compile("^" + filepath.Join(objectsDir, "pack/pack-")) + require.NoError(t, err) + + findOut := text.ChompBytes(testhelper.MustRunCommand(t, nil, "find", objectsDir, "-type", "f")) + for _, f := range strings.Split(findOut, "\n") { + if strings.Contains(f, "/info/") { + continue + } + + require.Regexp(t, packRegex, f, "expect only packfiles") + } + // Call function twice in case ordering effects hide deletion for i := 0; i < 2; i++ { require.NoError(t, pool.FetchFromOrigin(ctx, source), "fetch into pool to see if objects stay around %d", i) -- GitLab From 40ff07b2bb1e24596a524a6d8e864b1cd0e82119 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 16:38:07 +0200 Subject: [PATCH 08/13] changelog --- changelogs/unreleased/jv-pool-no-dangle.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/jv-pool-no-dangle.yml diff --git a/changelogs/unreleased/jv-pool-no-dangle.yml b/changelogs/unreleased/jv-pool-no-dangle.yml new file mode 100644 index 00000000000..c443e38d809 --- /dev/null +++ b/changelogs/unreleased/jv-pool-no-dangle.yml @@ -0,0 +1,5 @@ +--- +title: 'FetchIntoObjectPool: stop creating dangling refs, and remove existing' +merge_request: 1459 +author: +type: bug -- GitLab From 5ad50b265d9ca7bdc0e35cc1c7c357d2cb9c07a8 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 16:46:14 +0200 Subject: [PATCH 09/13] fix changelog --- changelogs/unreleased/jv-pool-no-dangle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/unreleased/jv-pool-no-dangle.yml b/changelogs/unreleased/jv-pool-no-dangle.yml index c443e38d809..1100bbdd6fe 100644 --- a/changelogs/unreleased/jv-pool-no-dangle.yml +++ b/changelogs/unreleased/jv-pool-no-dangle.yml @@ -2,4 +2,4 @@ title: 'FetchIntoObjectPool: stop creating dangling refs, and remove existing' merge_request: 1459 author: -type: bug +type: fixed -- GitLab From d508da9bafca99763e18a4fe080e88db53327bd3 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 16:48:29 +0200 Subject: [PATCH 10/13] remove duplicate method call --- internal/git/objectpool/fetch_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index ca6dceea695..7c8044893f0 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -42,10 +42,8 @@ func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { } require.Len(t, listDanglingRefs(t, pool), 3, "test setup sanity check") - require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") + require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch (should remove dangling refs)") - // We expect this second run to remove all refs under refs/dangling - require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") require.Empty(t, listDanglingRefs(t, pool), "dangling refs should be gone") } -- GitLab From 892d17a0a865219e3475573ea80e81b5b676fda2 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 29 Aug 2019 16:52:53 +0200 Subject: [PATCH 11/13] Streamline test --- internal/git/objectpool/fetch_test.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 7c8044893f0..82882f5428d 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -101,19 +101,8 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { require.Contains(t, fsckBeforeLines, l, "test setup sanity check") } - unreachableObjectsExist := func(msg string) { - for _, oid := range []string{newBlob, newTree, newCommit, newTag} { - check := exec.Command("git", "cat-file", "-e", oid) - check.Dir = pool.FullPath() - require.NoError(t, check.Run(), "%s: object %s must still exist", msg, oid) - } - } - - unreachableObjectsExist("setup, before repack") - // Make sure dangling objects are not loose testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "repack", "-adk")...) - unreachableObjectsExist("setup, after repack") objectsDir := filepath.Join(pool.FullPath(), "objects") packRegex, err := regexp.Compile("^" + filepath.Join(objectsDir, "pack/pack-")) @@ -128,6 +117,16 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { require.Regexp(t, packRegex, f, "expect only packfiles") } + unreachableObjectsExist := func(msg string) { + for _, oid := range []string{newBlob, newTree, newCommit, newTag} { + check := exec.Command("git", "cat-file", "-e", oid) + check.Dir = pool.FullPath() + require.NoError(t, check.Run(), "%s: object %s must still exist", msg, oid) + } + } + + unreachableObjectsExist("verify test setup") + // Call function twice in case ordering effects hide deletion for i := 0; i < 2; i++ { require.NoError(t, pool.FetchFromOrigin(ctx, source), "fetch into pool to see if objects stay around %d", i) -- GitLab From 5e4065965ef38593067babeb3d4e77be0d12971d Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 30 Aug 2019 16:19:26 +0200 Subject: [PATCH 12/13] Simplify check --- internal/git/objectpool/fetch_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 82882f5428d..749c0bd95f7 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -5,7 +5,6 @@ import ( "io/ioutil" "os/exec" "path/filepath" - "regexp" "strings" "testing" @@ -105,16 +104,16 @@ func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "repack", "-adk")...) objectsDir := filepath.Join(pool.FullPath(), "objects") - packRegex, err := regexp.Compile("^" + filepath.Join(objectsDir, "pack/pack-")) - require.NoError(t, err) + packPrefix := filepath.Join(objectsDir, "pack/pack-") + infoPrefix := filepath.Join(objectsDir, "info") findOut := text.ChompBytes(testhelper.MustRunCommand(t, nil, "find", objectsDir, "-type", "f")) for _, f := range strings.Split(findOut, "\n") { - if strings.Contains(f, "/info/") { + if strings.HasPrefix(f, packPrefix) || strings.HasPrefix(f, infoPrefix) { continue } - require.Regexp(t, packRegex, f, "expect only packfiles") + t.Fatalf("%s does not look like a packfile or info file", f) } unreachableObjectsExist := func(msg string) { -- GitLab From 012ec310aab499b01f199eea409b0d6f22def3dc Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 22 Oct 2019 16:40:41 +0200 Subject: [PATCH 13/13] Stop creating dangling refs --- changelogs/unreleased/jv-pool-no-dangle.yml | 2 +- internal/git/objectpool/fetch.go | 61 +++++---------------- internal/git/objectpool/fetch_test.go | 26 --------- 3 files changed, 16 insertions(+), 73 deletions(-) diff --git a/changelogs/unreleased/jv-pool-no-dangle.yml b/changelogs/unreleased/jv-pool-no-dangle.yml index 1100bbdd6fe..766e038f32c 100644 --- a/changelogs/unreleased/jv-pool-no-dangle.yml +++ b/changelogs/unreleased/jv-pool-no-dangle.yml @@ -1,5 +1,5 @@ --- -title: 'FetchIntoObjectPool: stop creating dangling refs, and remove existing' +title: 'FetchIntoObjectPool: stop creating dangling refs' merge_request: 1459 author: type: fixed diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 76d4d177705..14858178556 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -8,7 +8,6 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/repository" - "gitlab.com/gitlab-org/gitaly/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/internal/helper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) @@ -74,10 +73,6 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos return err } - if err := removeDanglingRefs(ctx, o); err != nil { - return err - } - packRefs, err := git.Command(ctx, o, "pack-refs", "--all") if err != nil { return err @@ -91,48 +86,22 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos const danglingObjectNamespace = "refs/dangling" -// In https://gitlab.com/gitlab-org/gitaly/merge_requests/1297 we -// introduced a mechanism to protect dangling objects in a pool from -// being removed by a manual 'git prune' command. It turned out that -// created way too many refs, so now we want to make sure no such refs -// are left behind. -func removeDanglingRefs(ctx context.Context, repo repository.GitRepo) error { - forEachRef, err := git.Command(ctx, repo, "for-each-ref", "--format=%(refname)", danglingObjectNamespace) - if err != nil { - return err - } - - updater, err := updateref.New(ctx, repo) - if err != nil { - return err - } - - scanner := bufio.NewScanner(forEachRef) - for scanner.Scan() { - if err := updater.Delete(scanner.Text()); err != nil { - return err - } - } - - if err := scanner.Err(); err != nil { - return err - } - - if err := forEachRef.Wait(); err != nil { - return fmt.Errorf("git for-each-ref: %v", err) - } - - return updater.Wait() -} - func repackPool(ctx context.Context, pool repository.GitRepo) error { - repackArgs := []string{ - "-c", "pack.island=" + sourceRefNamespace + "/heads", - "-c", "pack.island=" + sourceRefNamespace + "/tags", - "-c", "pack.writeBitmapHashCache=true", - "repack", "-aidbk", - } - repackCmd, err := git.Command(ctx, pool, repackArgs...) + globalOpts := []git.Option{ + git.ValueFlag{"-c", "pack.island=" + sourceRefNamespace + "/heads"}, + git.ValueFlag{"-c", "pack.island=" + sourceRefNamespace + "/tags"}, + git.ValueFlag{"-c", "pack.writeBitmapHashCache=true"}, + } + + repackCmd, err := git.SafeCmd(ctx, pool, globalOpts, git.SubCmd{ + Name: "repack", + Flags: []git.Option{ + git.Flag{"-ad"}, + git.Flag{"--keep-unreachable"}, // Prevent data loss + git.Flag{"--delta-islands"}, // Performance + git.Flag{"--write-bitmap-index"}, // Performance + }, + }) if err != nil { return err } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 749c0bd95f7..3c2cb70eccc 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -20,32 +20,6 @@ const ( existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" ) -func TestFetchFromOriginRemoveDanglingRefs(t *testing.T) { - source, _, cleanup := testhelper.NewTestRepo(t) - defer cleanup() - - pool, err := NewObjectPool(source.StorageName, testhelper.NewTestObjectPoolName(t)) - require.NoError(t, err) - - ctx, cancel := testhelper.Context() - defer cancel() - - require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") - - baseArgs := []string{"-C", pool.FullPath()} - - // Simulate "dangling refs" as created by https://gitlab.com/gitlab-org/gitaly/merge_requests/1297 - for _, oid := range []string{existingTree, existingCommit, existingBlob} { - args := append(baseArgs, "update-ref", "refs/dangling/"+oid, oid) - testhelper.MustRunCommand(t, nil, "git", args...) - } - require.Len(t, listDanglingRefs(t, pool), 3, "test setup sanity check") - - require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch (should remove dangling refs)") - - require.Empty(t, listDanglingRefs(t, pool), "dangling refs should be gone") -} - func TestFetchFromOriginKeepUnreachableObjects(t *testing.T) { source, _, cleanup := testhelper.NewTestRepo(t) defer cleanup() -- GitLab