From 6d4a0530b180bfd09483c131961a25f3934e861f Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 11 Jul 2025 11:02:45 +0200 Subject: [PATCH 1/5] log: Add EachPathLastCommit function In the future we want to run a single `git-last-modified(1)` command to get all the data at once. To support this, add a wrapper EachPathLastCommit around LastCommitForPath that calls the given function for each path. --- internal/git/log/last_commit.go | 26 +++++++++++++++++++ .../commit/list_last_commits_for_tree.go | 18 ++++++++----- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/internal/git/log/last_commit.go b/internal/git/log/last_commit.go index d63be7f2a63..a6880a62bd1 100644 --- a/internal/git/log/last_commit.go +++ b/internal/git/log/last_commit.go @@ -61,6 +61,32 @@ func LastCommitForPath( return catfile.GetCommit(ctx, objectReader, git.Revision(commitID)) } +// EachPathLastCommit returns the last commit that modified each path in `paths`. +// It does so by calling the provided function `fn` for each path in `paths` with the +// related commit. It is up to the caller to decide what to do with the commit then. +func EachPathLastCommit( + ctx context.Context, + objectReader catfile.ObjectContentReader, + repo gitcmd.RepositoryExecutor, + revision git.Revision, + paths []string, + options *gitalypb.GlobalOptions, + fn func(string, *catfile.Commit) error, +) error { + for _, path := range paths { + c, err := LastCommitForPath(ctx, objectReader, repo, revision, path, options) + if err != nil { + return fmt.Errorf("last commit for %q failed: %w", err) + } + err = fn(path, c) + if err != nil { + return fmt.Errorf("callback last commit: %w", err) + } + } + + return nil +} + // GitLogCommand returns a Command that executes git log with the given the arguments func GitLogCommand(ctx context.Context, repo gitcmd.RepositoryExecutor, revisions []git.Revision, paths []string, options *gitalypb.GlobalOptions, extraArgs ...gitcmd.Option) (*command.Command, error) { args := make([]string, len(revisions)) diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree.go b/internal/gitaly/service/commit/list_last_commits_for_tree.go index 40f612bd998..4c4e98eb037 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree.go @@ -9,6 +9,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/command" "gitlab.com/gitlab-org/gitaly/v16/internal/git" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gitcmd" "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/git/log" @@ -67,14 +68,15 @@ func (s *server) listLastCommitsForTree(in *gitalypb.ListLastCommitsForTreeReque limit = len(entries) } - for _, entry := range entries[offset:limit] { - commit, err := log.LastCommitForPath(ctx, objectReader, repo, git.Revision(in.GetRevision()), entry.Path, in.GetGlobalOptions()) - if err != nil { - return err - } + entries = entries[offset:limit] + paths := make([]string, 0, len(entries)) + for _, e := range entries { + paths = append(paths, e.Path) + } + err = log.EachPathLastCommit(ctx, objectReader, repo, git.Revision(in.GetRevision()), paths, in.GetGlobalOptions(), func(path string, commit *catfile.Commit) error { commitForTree := &gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ - PathBytes: []byte(entry.Path), + PathBytes: []byte(path), Commit: commit.GitCommit, } @@ -86,6 +88,10 @@ func (s *server) listLastCommitsForTree(in *gitalypb.ListLastCommitsForTreeReque batch = batch[0:0] } + return nil + }) + if err != nil { + return err } if err := cmd.Wait(); err != nil { -- GitLab From e4101228a7711a9e31cb0632b32db094fbea0ee7 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 11 Jul 2025 15:25:49 +0200 Subject: [PATCH 2/5] commit: Use git-last-modified(1) for ListLastCommitsForTree RPC At the moment we do N+1 Git calls to fill in the ListLastCommitsForTreeResponse. To feed this data, we first do `git ls-tree` to get the list of tree entries, than get the correct page, and run `git log --max-count=1` for each path. This is ineffecient. We've been working hard to upstream the git-last-modified(1) command to Git[1]. This command can be used to replace all the git-log(1) calls. Implement it's use behind a feature flag. [1]: https://gitlab.com/gitlab-org/git/-/issues/114 --- internal/featureflag/ff_git_last_modified.go | 9 +++ internal/git/gitcmd/command_description.go | 3 + internal/git/log/last_commit.go | 68 ++++++++++++++++++- .../commit/list_last_commits_for_tree_test.go | 34 ++++++---- 4 files changed, 100 insertions(+), 14 deletions(-) create mode 100644 internal/featureflag/ff_git_last_modified.go diff --git a/internal/featureflag/ff_git_last_modified.go b/internal/featureflag/ff_git_last_modified.go new file mode 100644 index 00000000000..a575463e087 --- /dev/null +++ b/internal/featureflag/ff_git_last_modified.go @@ -0,0 +1,9 @@ +package featureflag + +// GitLastModified enables the use of native git-last-modified(1) +var GitLastModified = NewFeatureFlag( + "git_last_modified", + "v18.5", + "https://gitlab.com/gitlab-org/gitaly/-/issues/6911", + false, +) diff --git a/internal/git/gitcmd/command_description.go b/internal/git/gitcmd/command_description.go index 0446d2ecbdb..7b46fdd6aa1 100644 --- a/internal/git/gitcmd/command_description.go +++ b/internal/git/gitcmd/command_description.go @@ -198,6 +198,9 @@ var commandDescriptions = map[string]commandDescription{ "log": { flags: scNoRefUpdates, }, + "last-modified": { + flags: scNoRefUpdates, + }, "ls-remote": { flags: scNoRefUpdates, opts: func(context.Context) []GlobalOption { diff --git a/internal/git/log/last_commit.go b/internal/git/log/last_commit.go index a6880a62bd1..e360362ab22 100644 --- a/internal/git/log/last_commit.go +++ b/internal/git/log/last_commit.go @@ -1,15 +1,22 @@ package log import ( + "bufio" + "bytes" "context" + "errors" "fmt" + "io" "regexp" + "slices" "strings" "gitlab.com/gitlab-org/gitaly/v16/internal/command" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" "gitlab.com/gitlab-org/gitaly/v16/internal/git" "gitlab.com/gitlab-org/gitaly/v16/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gitcmd" + "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" ) @@ -73,10 +80,69 @@ func EachPathLastCommit( options *gitalypb.GlobalOptions, fn func(string, *catfile.Commit) error, ) error { + if featureflag.GitLastModified.IsEnabled(ctx) && featureflag.GitMaster.IsEnabled(ctx) { + stderr := &bytes.Buffer{} + + cmd, err := repo.Exec(ctx, gitcmd.Command{ + Name: "last-modified", + Flags: []gitcmd.Option{ + gitcmd.Flag{Name: "-t"}, + gitcmd.Flag{Name: "--max-depth=1"}, + gitcmd.Flag{Name: "-z"}, + }, + Args: []string{revision.String()}, + PostSepArgs: paths, + }, + append(gitcmd.ConvertGlobalOptions(options), + gitcmd.WithSetupStdout(), gitcmd.WithStderr(stderr))...) + if err != nil { + return err + } + + reader := bufio.NewReader(cmd) + for { + blurb, err := reader.ReadBytes(0x00) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + blurb = bytes.TrimSuffix(blurb, []byte{0x00}) + + if len(blurb) == 0 { + break + } + + oid, path, found := strings.Cut(string(blurb), "\t") + if !found { + return fmt.Errorf("last-modified tab not found") + } + if !slices.Contains(paths, path) { + continue + } + commit, err := catfile.GetCommit(ctx, objectReader, git.Revision(strings.TrimPrefix(oid, "^"))) + if err != nil { + return fmt.Errorf("get commit %v, %w", oid, err) + } + err = fn(path, commit) + if err != nil { + return fmt.Errorf("each last path %v, %w", oid, err) + } + } + + if err := cmd.Wait(); err != nil { + return structerr.NewInternal("%w", err). + WithMetadata("stderr", stderr.String()). + WithMetadata("command", "last-modified") + } + return nil + } + for _, path := range paths { c, err := LastCommitForPath(ctx, objectReader, repo, revision, path, options) if err != nil { - return fmt.Errorf("last commit for %q failed: %w", err) + return fmt.Errorf("last commit for %q failed: %w", path, err) } err = fn(path, c) if err != nil { diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go index f42eca89a5d..02149546701 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go @@ -6,6 +6,7 @@ import ( "unicode/utf8" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" @@ -18,6 +19,12 @@ import ( func TestListLastCommitsForTree(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets( + featureflag.GitLastModified, + ).Run(t, testListLastCommitsForTree) +} + +func testListLastCommitsForTree(t *testing.T, ctx context.Context) { commitResponse := func(path string, commit *gitalypb.GitCommit) *gitalypb.ListLastCommitsForTreeResponse_CommitForTree { return &gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ PathBytes: []byte(path), @@ -141,18 +148,20 @@ func TestListLastCommitsForTree(t *testing.T) { { desc: "root directory", setup: func(t *testing.T, ctx context.Context, data TestData) setupData { + request := &gitalypb.ListLastCommitsForTreeRequest{ + Repository: data.repoProto, + Revision: data.parentCommit.GetId(), + Path: []byte("/"), + Limit: 5, + } + expectedCommits := []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ + commitResponse("subdir", data.parentCommit), + commitResponse("changed", data.parentCommit), + commitResponse("unchanged", data.childCommit), + } return setupData{ - request: &gitalypb.ListLastCommitsForTreeRequest{ - Repository: data.repoProto, - Revision: data.parentCommit.GetId(), - Path: []byte("/"), - Limit: 5, - }, - expectedCommits: []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ - commitResponse("subdir", data.parentCommit), - commitResponse("changed", data.parentCommit), - commitResponse("unchanged", data.childCommit), - }, + request: request, + expectedCommits: expectedCommits, } }, }, @@ -316,7 +325,6 @@ func TestListLastCommitsForTree(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { t.Parallel() - ctx := testhelper.Context(t) cfg, client := setupCommitService(t, ctx) repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) @@ -363,7 +371,7 @@ func TestListLastCommitsForTree(t *testing.T) { ) []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree { return append(result, response.GetCommits()...) }) - testhelper.RequireGrpcError(t, setup.expectedErr, err) + testhelper.RequireGrpcErrorContains(t, setup.expectedErr, err) testhelper.ProtoEqual(t, setup.expectedCommits, commits) }) } -- GitLab From c660656b4b6c3bb266521e4551e60c148cadc9d1 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 25 Jul 2025 16:15:48 +0200 Subject: [PATCH 3/5] commit: Implement benchmark for ListLastCommitsForTree To test performance of git-last-modified(1) vs the old approach of doing `git-log -1` for each path separetely, add a benchmark test. --- .../commit/list_last_commits_for_tree_test.go | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go index 02149546701..ad20b2c832e 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go @@ -2,6 +2,7 @@ package commit import ( "context" + "fmt" "testing" "unicode/utf8" @@ -376,3 +377,45 @@ func testListLastCommitsForTree(t *testing.T, ctx context.Context) { }) } } + +func BenchmarkListLastCommitsForTree(b *testing.B) { + ctx := testhelper.Context(b) + cfg, client := setupCommitService(b, ctx) + + repoProto, _ := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + Seed: "benchmark.git", + }) + request := &gitalypb.ListLastCommitsForTreeRequest{ + Repository: repoProto, + Revision: "58f4691876e4301fda53285b0413c64ed67a4585", + Path: []byte("/"), + Limit: 25, + } + + for _, limit := range []int32{5, 25, 50} { + b.Run(fmt.Sprintf("limit=%d", limit), func(b *testing.B) { + request.Limit = limit + + testhelper.NewFeatureSets( + featureflag.GitLastModified, + ).Bench(b, func(b *testing.B, ctx context.Context) { + for i := int32(0); i < 120; i += limit { + request.Offset = i + + stream, err := client.ListLastCommitsForTree(ctx, request) + require.NoError(b, err) + + commits, err := testhelper.ReceiveAndFold(stream.Recv, func( + result []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree, + response *gitalypb.ListLastCommitsForTreeResponse, + ) []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree { + return append(result, response.GetCommits()...) + }) + require.NoError(b, err) + require.NotEmpty(b, commits) + } + }) + }) + } +} -- GitLab From 3385839268a7932d89930a9b8e01f2da38d5fb40 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 19 Sep 2025 17:15:12 +0200 Subject: [PATCH 4/5] commit: Sort entries of ListLastCommitsForTreeResponse by path GitLab rails expects the paths in order. This is the case since gitlab-org/gitlab@66052c17a9cba2bf72bc342d7823565b501a479. Here a limit of `limit + 1` is taken, but later they call #first(limit) on the results. log.EachPathLastCommit uses git-last-modified(1), which returns paths in reverse-chronological order. This means the oldest entry gets dropped, leaving the tree behind with missing data. Workaround the issue by sorting the entries in the reponse by path. --- .../commit/list_last_commits_for_tree.go | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree.go b/internal/gitaly/service/commit/list_last_commits_for_tree.go index 4c4e98eb037..e58a99c7da6 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree.go @@ -74,10 +74,29 @@ func (s *server) listLastCommitsForTree(in *gitalypb.ListLastCommitsForTreeReque paths = append(paths, e.Path) } + // GitLab rails expects the paths in order. This is the case since + // gitlab-org/gitlab@66052c17a9cba2bf72bc342d7823565b501a479 + // Here a limit of `limit + 1` is taken, but later they call + // #first(limit) on the results. + // log.EachPathLastCommit uses git-last-modified(1), which returns paths + // in reverse-chronological order. This means the oldest entry gets + // dropped, leaving the tree behind with missing data. + commitsForPaths := make(map[string]*gitalypb.GitCommit, len(paths)) err = log.EachPathLastCommit(ctx, objectReader, repo, git.Revision(in.GetRevision()), paths, in.GetGlobalOptions(), func(path string, commit *catfile.Commit) error { + commitsForPaths[path] = commit.GitCommit + return nil + }) + if err != nil { + return err + } + for _, path := range paths { + commit, ok := commitsForPaths[path] + if !ok { + return fmt.Errorf("commit not resolved for %q", path) + } commitForTree := &gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ PathBytes: []byte(path), - Commit: commit.GitCommit, + Commit: commit, } batch = append(batch, commitForTree) @@ -88,10 +107,6 @@ func (s *server) listLastCommitsForTree(in *gitalypb.ListLastCommitsForTreeReque batch = batch[0:0] } - return nil - }) - if err != nil { - return err } if err := cmd.Wait(); err != nil { -- GitLab From 6d2509d3dd82c9610a690047b07a2969d6d399ff Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Thu, 11 Sep 2025 10:31:54 +0200 Subject: [PATCH 5/5] Makefile: Bump GIT_VERSION_MASTER To make the tests pass, use a Git version that has git-last-modified(1). --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a40e8d0973e..5728333bc90 100644 --- a/Makefile +++ b/Makefile @@ -157,7 +157,7 @@ GIT_VERSION ?= # Do not modify the format, it's automatically updated by renovate-gitlab-bot. The timestamp # is used for version comparison by renovate. # renovate: 1754054220 -GIT_VERSION_MASTER ?= 23466173824c0a0f835c0d790c2f38156ae6284a +GIT_VERSION_MASTER ?= 4975ec3473b4bc61bc8a3df1ef29d0b7e7959e87 GIT_VERSION_PREV ?= 41d0310a83aba75a19585d8fbbf021938d8d2ace # # GIT_VERSION_x_xx defines versions for each instance of bundled Git we ship. When a new -- GitLab