From 70b57d3995b81f1c8401d62d12029ceff50aed5b Mon Sep 17 00:00:00 2001 From: James Fargher Date: Wed, 27 Oct 2021 16:11:40 +1300 Subject: [PATCH 1/6] Move RefsDecoder into git package RefsDecoder decodes the output from git-show-ref. There is nothing specific to backups here and moving it to the git package should allow us to reuse the logic elsewhere. --- internal/backup/backup.go | 2 +- internal/{backup/refs.go => git/decoder.go} | 8 +++----- internal/{backup/refs_test.go => git/decoder_test.go} | 8 ++++---- 3 files changed, 8 insertions(+), 10 deletions(-) rename internal/{backup/refs.go => git/decoder.go} (80%) rename internal/{backup/refs_test.go => git/decoder_test.go} (89%) diff --git a/internal/backup/backup.go b/internal/backup/backup.go index 5dc959dd2f2..7c23db2ee71 100644 --- a/internal/backup/backup.go +++ b/internal/backup/backup.go @@ -337,7 +337,7 @@ func (mgr *Manager) sendKnownRefs(ctx context.Context, step *Step, repo *gitalyp } defer reader.Close() - d := NewRefsDecoder(reader) + d := git.NewRefsDecoder(reader) for { var ref git.Reference diff --git a/internal/backup/refs.go b/internal/git/decoder.go similarity index 80% rename from internal/backup/refs.go rename to internal/git/decoder.go index cbce8bc2e0e..9fcf0d922c2 100644 --- a/internal/backup/refs.go +++ b/internal/git/decoder.go @@ -1,12 +1,10 @@ -package backup +package git import ( "bufio" "fmt" "io" "strings" - - "gitlab.com/gitlab-org/gitaly/v14/internal/git" ) // RefsDecoder parses the output format of git-show-ref @@ -24,7 +22,7 @@ func NewRefsDecoder(r io.Reader) *RefsDecoder { // Decode reads and parses the next reference. Returns io.EOF when the end of // the reader has been reached. -func (d *RefsDecoder) Decode(ref *git.Reference) error { +func (d *RefsDecoder) Decode(ref *Reference) error { if d.err != nil { return d.err } @@ -44,7 +42,7 @@ func (d *RefsDecoder) Decode(ref *git.Reference) error { return fmt.Errorf("refs decoder: invalid line: %q", line) } - *ref = git.NewReference(git.ReferenceName(splits[1]), splits[0]) + *ref = NewReference(ReferenceName(splits[1]), splits[0]) return nil } diff --git a/internal/backup/refs_test.go b/internal/git/decoder_test.go similarity index 89% rename from internal/backup/refs_test.go rename to internal/git/decoder_test.go index 3aa713f4b62..2bd8810b3b9 100644 --- a/internal/backup/refs_test.go +++ b/internal/git/decoder_test.go @@ -1,4 +1,4 @@ -package backup +package git_test import ( "bytes" @@ -6,7 +6,7 @@ import ( "testing" "github.com/stretchr/testify/require" - "gitlab.com/gitlab-org/gitaly/v14/internal/git" + . "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" @@ -33,9 +33,9 @@ func TestRefsDecoder(t *testing.T) { d := NewRefsDecoder(stream) - var refs []git.Reference + var refs []Reference for { - var ref git.Reference + var ref Reference err := d.Decode(&ref) if err == io.EOF { -- GitLab From ee281c1a5c6448144501bd2672b873c628b2f2fb Mon Sep 17 00:00:00 2001 From: James Fargher Date: Wed, 27 Oct 2021 16:17:27 +1300 Subject: [PATCH 2/6] git: Rename NewRefsDecoder to NewShowRefDecoder In anticipation of interpreting more reference listings from git, we rename the constructor to be specific to the type of output that is being decoded. Most other refs output use a null-byte as a separator. So we make the initial changes to make this separator configurable. --- internal/backup/backup.go | 2 +- internal/git/decoder.go | 12 +++++++----- internal/git/decoder_test.go | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/internal/backup/backup.go b/internal/backup/backup.go index 7c23db2ee71..3cbb7eaf397 100644 --- a/internal/backup/backup.go +++ b/internal/backup/backup.go @@ -337,7 +337,7 @@ func (mgr *Manager) sendKnownRefs(ctx context.Context, step *Step, repo *gitalyp } defer reader.Close() - d := git.NewRefsDecoder(reader) + d := git.NewShowRefDecoder(reader) for { var ref git.Reference diff --git a/internal/git/decoder.go b/internal/git/decoder.go index 9fcf0d922c2..75b34c2828a 100644 --- a/internal/git/decoder.go +++ b/internal/git/decoder.go @@ -7,16 +7,18 @@ import ( "strings" ) -// RefsDecoder parses the output format of git-show-ref +// RefsDecoder parses git output for git references type RefsDecoder struct { r *bufio.Reader + sep string err error } -// NewRefsDecoder returns a new RefsDecoder -func NewRefsDecoder(r io.Reader) *RefsDecoder { +// NewShowRefDecoder returns a new RefsDecoder to decode the output from git-show-ref +func NewShowRefDecoder(r io.Reader) *RefsDecoder { return &RefsDecoder{ - r: bufio.NewReader(r), + r: bufio.NewReader(r), + sep: " ", } } @@ -34,7 +36,7 @@ func (d *RefsDecoder) Decode(ref *Reference) error { line = line[:len(line)-1] } - splits := strings.SplitN(line, " ", 2) + splits := strings.SplitN(line, d.sep, 2) if len(splits) != 2 { if d.err != nil { return d.err diff --git a/internal/git/decoder_test.go b/internal/git/decoder_test.go index 2bd8810b3b9..a31b361bb1c 100644 --- a/internal/git/decoder_test.go +++ b/internal/git/decoder_test.go @@ -13,7 +13,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" ) -func TestRefsDecoder(t *testing.T) { +func TestRefsDecoder_gitShowRef(t *testing.T) { cfg := testcfg.Build(t) ctx, cancel := testhelper.Context() @@ -31,7 +31,7 @@ func TestRefsDecoder(t *testing.T) { output := gittest.Exec(t, cfg, "-C", repoPath, "show-ref") stream := bytes.NewBuffer(output) - d := NewRefsDecoder(stream) + d := NewShowRefDecoder(stream) var refs []Reference for { -- GitLab From fe5a154a1a70c5a0696efddfbd7dccf2db82c670 Mon Sep 17 00:00:00 2001 From: James Fargher Date: Thu, 28 Oct 2021 10:23:57 +1300 Subject: [PATCH 3/6] git: Add ref decoder constructor for decoding for-each-ref output --- internal/git/decoder.go | 28 ++++++++++++++++++++++++---- internal/git/localrepo/refs.go | 2 +- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/internal/git/decoder.go b/internal/git/decoder.go index 75b34c2828a..f73889ed241 100644 --- a/internal/git/decoder.go +++ b/internal/git/decoder.go @@ -22,6 +22,23 @@ func NewShowRefDecoder(r io.Reader) *RefsDecoder { } } +// ForEachRefFormat can be passed to the `for-each-ref` `--format` flag +// to retrieve references that can be parsed by `NewForEachRefDecoder`. +const ForEachRefFormat = "%(objectname)%00%(refname)" + +// ForEachRefSymbolicFormat can be passed to the `for-each-ref` `--format` flag +// to retrieve references that can be parsed by `NewForEachRefDecoder` and that +// distinguishes symbolic references. +const ForEachRefSymbolicFormat = "%(objectname)%00%(refname)%00%(symref)" + +// NewForEachRefDecoder ... +func NewForEachRefDecoder(r io.Reader) *RefsDecoder { + return &RefsDecoder{ + r: bufio.NewReader(r), + sep: "\x00", + } +} + // Decode reads and parses the next reference. Returns io.EOF when the end of // the reader has been reached. func (d *RefsDecoder) Decode(ref *Reference) error { @@ -36,15 +53,18 @@ func (d *RefsDecoder) Decode(ref *Reference) error { line = line[:len(line)-1] } - splits := strings.SplitN(line, d.sep, 2) - if len(splits) != 2 { + splits := strings.SplitN(line, d.sep, 3) + switch { + case len(splits) == 3 && len(splits[2]) > 0: + *ref = NewSymbolicReference(ReferenceName(splits[1]), splits[0]) + case len(splits) >= 2: + *ref = NewReference(ReferenceName(splits[1]), splits[0]) + default: if d.err != nil { return d.err } return fmt.Errorf("refs decoder: invalid line: %q", line) } - *ref = NewReference(ReferenceName(splits[1]), splits[0]) - return nil } diff --git a/internal/git/localrepo/refs.go b/internal/git/localrepo/refs.go index 0ac4bf96900..db4193df21a 100644 --- a/internal/git/localrepo/refs.go +++ b/internal/git/localrepo/refs.go @@ -93,7 +93,7 @@ func (repo *Repo) GetReferences(ctx context.Context, patterns ...string) ([]git. } func (repo *Repo) getReferences(ctx context.Context, limit uint, patterns ...string) ([]git.Reference, error) { - flags := []git.Option{git.Flag{Name: "--format=%(refname)%00%(objectname)%00%(symref)"}} + flags := []git.Option{git.ValueFlag{Name: "--format", Value: git.ForEachRefSymbolicFormat}} if limit > 0 { flags = append(flags, git.Flag{Name: fmt.Sprintf("--count=%d", limit)}) } -- GitLab From af6a9bc8e8d10ac0636165662f5fc1e3bff7a40f Mon Sep 17 00:00:00 2001 From: James Fargher Date: Thu, 28 Oct 2021 10:24:59 +1300 Subject: [PATCH 4/6] localrepo: Replace manual for-each-ref decoding in localrepo Previously ref parsing was done in place. This replaces the parsing with a shared implementation. --- internal/git/localrepo/refs.go | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/internal/git/localrepo/refs.go b/internal/git/localrepo/refs.go index db4193df21a..eef4b6c0c32 100644 --- a/internal/git/localrepo/refs.go +++ b/internal/git/localrepo/refs.go @@ -107,25 +107,20 @@ func (repo *Repo) getReferences(ctx context.Context, limit uint, patterns ...str return nil, err } - scanner := bufio.NewScanner(cmd) + decoder := git.NewForEachRefDecoder(cmd) var refs []git.Reference - for scanner.Scan() { - line := bytes.SplitN(scanner.Bytes(), []byte{0}, 3) - if len(line) != 3 { - return nil, errors.New("unexpected reference format") - } - - if len(line[2]) == 0 { - refs = append(refs, git.NewReference(git.ReferenceName(line[0]), string(line[1]))) - } else { - refs = append(refs, git.NewSymbolicReference(git.ReferenceName(line[0]), string(line[1]))) + for { + var ref git.Reference + err := decoder.Decode(&ref) + if err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("reading standard input: %v", err) } + refs = append(refs, ref) } - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("reading standard input: %v", err) - } if err := cmd.Wait(); err != nil { return nil, err } -- GitLab From c78886eb5b6f5ca3d22a9715cc6f9a5a229f3c52 Mon Sep 17 00:00:00 2001 From: James Fargher Date: Thu, 28 Oct 2021 14:49:42 +1300 Subject: [PATCH 5/6] gitpipe: Replace manual for-each-ref decoding Previously ref parsing was done in place. This replaces the parsing with a shared implementation. To enable the git output to be decoded with the same code the format was changed to a consistent NUL byte separator. `PEELED` had to be lower-cased as git was interpreting %00PEELED as an escape code. --- internal/git/gitpipe/revision.go | 30 ++++++++------------ internal/git/gitpipe/revision_test.go | 2 +- internal/gitaly/service/ref/find_all_tags.go | 2 +- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/internal/git/gitpipe/revision.go b/internal/git/gitpipe/revision.go index 0f97a9d8ad0..27472c3db54 100644 --- a/internal/git/gitpipe/revision.go +++ b/internal/git/gitpipe/revision.go @@ -5,8 +5,8 @@ import ( "bytes" "context" "fmt" + "io" "strconv" - "strings" "time" "gitlab.com/gitlab-org/gitaly/v14/internal/git" @@ -368,7 +368,7 @@ func ForEachRef( // referenced commit's object. It would thus be about 2-3x slower to use the // default format, and instead we move the burden into the next pipeline step by // default. - format: "%(objectname) %(refname)", + format: git.ForEachRefFormat, } for _, opt := range opts { opt(&cfg) @@ -401,33 +401,27 @@ func ForEachRef( return } - scanner := bufio.NewScanner(forEachRef) - for scanner.Scan() { - line := scanner.Text() - - oidAndRef := strings.SplitN(line, " ", 2) - if len(oidAndRef) != 2 { + decoder := git.NewForEachRefDecoder(forEachRef) + for { + var ref git.Reference + err := decoder.Decode(&ref) + if err == io.EOF { + break + } else if err != nil { sendRevisionResult(ctx, resultChan, RevisionResult{ - err: fmt.Errorf("invalid for-each-ref format: %q", line), + err: fmt.Errorf("scanning for-each-ref output: %w", err), }) return } if isDone := sendRevisionResult(ctx, resultChan, RevisionResult{ - OID: git.ObjectID(oidAndRef[0]), - ObjectName: []byte(oidAndRef[1]), + OID: git.ObjectID(ref.Target), + ObjectName: []byte(ref.Name), }); isDone { return } } - if err := scanner.Err(); err != nil { - sendRevisionResult(ctx, resultChan, RevisionResult{ - err: fmt.Errorf("scanning for-each-ref output: %w", err), - }) - return - } - if err := forEachRef.Wait(); err != nil { sendRevisionResult(ctx, resultChan, RevisionResult{ err: fmt.Errorf("for-each-ref pipeline command: %w", err), diff --git a/internal/git/gitpipe/revision_test.go b/internal/git/gitpipe/revision_test.go index 8b0d7bd254c..b31d30ff7da 100644 --- a/internal/git/gitpipe/revision_test.go +++ b/internal/git/gitpipe/revision_test.go @@ -572,7 +572,7 @@ func TestForEachRef(t *testing.T) { t.Run("tag with format", func(t *testing.T) { refs := readRefs(t, repo, []string{"refs/tags/v1.0.0"}, - WithForEachRefFormat("%(objectname) tag\n%(*objectname) peeled"), + WithForEachRefFormat("%(objectname)%00tag\n%(*objectname)%00peeled"), ) require.Equal(t, refs, []RevisionResult{ diff --git a/internal/gitaly/service/ref/find_all_tags.go b/internal/gitaly/service/ref/find_all_tags.go index 47eb787a246..23a925c04b1 100644 --- a/internal/gitaly/service/ref/find_all_tags.go +++ b/internal/gitaly/service/ref/find_all_tags.go @@ -50,7 +50,7 @@ func (s *server) findAllTags(ctx context.Context, repo *localrepo.Repo, sortFiel repo, []string{"refs/tags/"}, gitpipe.WithSortField(sortField), - gitpipe.WithForEachRefFormat("%(objectname) %(refname)%(if)%(*objectname)%(then)\n%(objectname)^{} PEELED%(end)"), + gitpipe.WithForEachRefFormat("%(objectname)%00%(refname)%(if)%(*objectname)%(then)\n%(objectname)^{}%00peeled%(end)"), ) catfileObjectsIter := gitpipe.CatfileObject(ctx, objectReader, forEachRefIter) -- GitLab From 5436b8c94256624716b5e61f0d263bfe3c823828 Mon Sep 17 00:00:00 2001 From: James Fargher Date: Mon, 8 Nov 2021 13:10:00 +1300 Subject: [PATCH 6/6] git: Use spaces instead of the NUL-byte There is a bug that prevents the NUL-byte being outputted for some variations of `--format`. So instead, since ref names cannot have spaces anyway, switch to using spaces. --- internal/git/decoder.go | 6 +++--- internal/git/gitpipe/revision_test.go | 2 +- internal/gitaly/service/ref/find_all_tags.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/git/decoder.go b/internal/git/decoder.go index f73889ed241..e788dede9ad 100644 --- a/internal/git/decoder.go +++ b/internal/git/decoder.go @@ -24,18 +24,18 @@ func NewShowRefDecoder(r io.Reader) *RefsDecoder { // ForEachRefFormat can be passed to the `for-each-ref` `--format` flag // to retrieve references that can be parsed by `NewForEachRefDecoder`. -const ForEachRefFormat = "%(objectname)%00%(refname)" +const ForEachRefFormat = "%(objectname) %(refname)" // ForEachRefSymbolicFormat can be passed to the `for-each-ref` `--format` flag // to retrieve references that can be parsed by `NewForEachRefDecoder` and that // distinguishes symbolic references. -const ForEachRefSymbolicFormat = "%(objectname)%00%(refname)%00%(symref)" +const ForEachRefSymbolicFormat = "%(objectname) %(refname) %(symref)" // NewForEachRefDecoder ... func NewForEachRefDecoder(r io.Reader) *RefsDecoder { return &RefsDecoder{ r: bufio.NewReader(r), - sep: "\x00", + sep: " ", } } diff --git a/internal/git/gitpipe/revision_test.go b/internal/git/gitpipe/revision_test.go index b31d30ff7da..8b0d7bd254c 100644 --- a/internal/git/gitpipe/revision_test.go +++ b/internal/git/gitpipe/revision_test.go @@ -572,7 +572,7 @@ func TestForEachRef(t *testing.T) { t.Run("tag with format", func(t *testing.T) { refs := readRefs(t, repo, []string{"refs/tags/v1.0.0"}, - WithForEachRefFormat("%(objectname)%00tag\n%(*objectname)%00peeled"), + WithForEachRefFormat("%(objectname) tag\n%(*objectname) peeled"), ) require.Equal(t, refs, []RevisionResult{ diff --git a/internal/gitaly/service/ref/find_all_tags.go b/internal/gitaly/service/ref/find_all_tags.go index 23a925c04b1..849427c1b11 100644 --- a/internal/gitaly/service/ref/find_all_tags.go +++ b/internal/gitaly/service/ref/find_all_tags.go @@ -50,7 +50,7 @@ func (s *server) findAllTags(ctx context.Context, repo *localrepo.Repo, sortFiel repo, []string{"refs/tags/"}, gitpipe.WithSortField(sortField), - gitpipe.WithForEachRefFormat("%(objectname)%00%(refname)%(if)%(*objectname)%(then)\n%(objectname)^{}%00peeled%(end)"), + gitpipe.WithForEachRefFormat("%(objectname) %(refname)%(if)%(*objectname)%(then)\n%(objectname)^{} peeled%(end)"), ) catfileObjectsIter := gitpipe.CatfileObject(ctx, objectReader, forEachRefIter) -- GitLab