diff --git a/Makefile b/Makefile index a40e8d0973e4ed3e4872e2ff310cfaa20318d428..5728333bc90279328492a1fceb69f05ded1086c5 100644 --- a/Makefile +++ b/Makefile @@ -157,7 +157,7 @@ GIT_VERSION ?= # Do not modify the format, it's automatically updated by renovate-gitlab-bot. The timestamp # is used for version comparison by renovate. # renovate: 1754054220 -GIT_VERSION_MASTER ?= 23466173824c0a0f835c0d790c2f38156ae6284a +GIT_VERSION_MASTER ?= 4975ec3473b4bc61bc8a3df1ef29d0b7e7959e87 GIT_VERSION_PREV ?= 41d0310a83aba75a19585d8fbbf021938d8d2ace # # GIT_VERSION_x_xx defines versions for each instance of bundled Git we ship. When a new diff --git a/internal/featureflag/ff_git_last_modified.go b/internal/featureflag/ff_git_last_modified.go new file mode 100644 index 0000000000000000000000000000000000000000..a575463e087e2ce1682d20f1c67e21629a7d3caf --- /dev/null +++ b/internal/featureflag/ff_git_last_modified.go @@ -0,0 +1,9 @@ +package featureflag + +// GitLastModified enables the use of native git-last-modified(1) +var GitLastModified = NewFeatureFlag( + "git_last_modified", + "v18.5", + "https://gitlab.com/gitlab-org/gitaly/-/issues/6911", + false, +) diff --git a/internal/git/gitcmd/command_description.go b/internal/git/gitcmd/command_description.go index 0446d2ecbdb1413dcb8c2aebe26a3a28bc0e7a3b..7b46fdd6aa1f31d50d3b604105822bd7fa1e7822 100644 --- a/internal/git/gitcmd/command_description.go +++ b/internal/git/gitcmd/command_description.go @@ -198,6 +198,9 @@ var commandDescriptions = map[string]commandDescription{ "log": { flags: scNoRefUpdates, }, + "last-modified": { + flags: scNoRefUpdates, + }, "ls-remote": { flags: scNoRefUpdates, opts: func(context.Context) []GlobalOption { diff --git a/internal/git/log/last_commit.go b/internal/git/log/last_commit.go index d63be7f2a63eedc38db3cad1588cbf8ccb74a352..e360362ab22bc4103e4d7e9166f2785e71e01383 100644 --- a/internal/git/log/last_commit.go +++ b/internal/git/log/last_commit.go @@ -1,15 +1,22 @@ package log import ( + "bufio" + "bytes" "context" + "errors" "fmt" + "io" "regexp" + "slices" "strings" "gitlab.com/gitlab-org/gitaly/v16/internal/command" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" "gitlab.com/gitlab-org/gitaly/v16/internal/git" "gitlab.com/gitlab-org/gitaly/v16/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gitcmd" + "gitlab.com/gitlab-org/gitaly/v16/internal/structerr" "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb" ) @@ -61,6 +68,91 @@ func LastCommitForPath( return catfile.GetCommit(ctx, objectReader, git.Revision(commitID)) } +// EachPathLastCommit returns the last commit that modified each path in `paths`. +// It does so by calling the provided function `fn` for each path in `paths` with the +// related commit. It is up to the caller to decide what to do with the commit then. +func EachPathLastCommit( + ctx context.Context, + objectReader catfile.ObjectContentReader, + repo gitcmd.RepositoryExecutor, + revision git.Revision, + paths []string, + options *gitalypb.GlobalOptions, + fn func(string, *catfile.Commit) error, +) error { + if featureflag.GitLastModified.IsEnabled(ctx) && featureflag.GitMaster.IsEnabled(ctx) { + stderr := &bytes.Buffer{} + + cmd, err := repo.Exec(ctx, gitcmd.Command{ + Name: "last-modified", + Flags: []gitcmd.Option{ + gitcmd.Flag{Name: "-t"}, + gitcmd.Flag{Name: "--max-depth=1"}, + gitcmd.Flag{Name: "-z"}, + }, + Args: []string{revision.String()}, + PostSepArgs: paths, + }, + append(gitcmd.ConvertGlobalOptions(options), + gitcmd.WithSetupStdout(), gitcmd.WithStderr(stderr))...) + if err != nil { + return err + } + + reader := bufio.NewReader(cmd) + for { + blurb, err := reader.ReadBytes(0x00) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + blurb = bytes.TrimSuffix(blurb, []byte{0x00}) + + if len(blurb) == 0 { + break + } + + oid, path, found := strings.Cut(string(blurb), "\t") + if !found { + return fmt.Errorf("last-modified tab not found") + } + if !slices.Contains(paths, path) { + continue + } + commit, err := catfile.GetCommit(ctx, objectReader, git.Revision(strings.TrimPrefix(oid, "^"))) + if err != nil { + return fmt.Errorf("get commit %v, %w", oid, err) + } + err = fn(path, commit) + if err != nil { + return fmt.Errorf("each last path %v, %w", oid, err) + } + } + + if err := cmd.Wait(); err != nil { + return structerr.NewInternal("%w", err). + WithMetadata("stderr", stderr.String()). + WithMetadata("command", "last-modified") + } + return nil + } + + for _, path := range paths { + c, err := LastCommitForPath(ctx, objectReader, repo, revision, path, options) + if err != nil { + return fmt.Errorf("last commit for %q failed: %w", path, err) + } + err = fn(path, c) + if err != nil { + return fmt.Errorf("callback last commit: %w", err) + } + } + + return nil +} + // GitLogCommand returns a Command that executes git log with the given the arguments func GitLogCommand(ctx context.Context, repo gitcmd.RepositoryExecutor, revisions []git.Revision, paths []string, options *gitalypb.GlobalOptions, extraArgs ...gitcmd.Option) (*command.Command, error) { args := make([]string, len(revisions)) diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree.go b/internal/gitaly/service/commit/list_last_commits_for_tree.go index 40f612bd9981fba4407db8c24879c7b0a2ac1a86..e58a99c7da640a453ed90d032e5a1bcc3679551a 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree.go @@ -9,6 +9,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/command" "gitlab.com/gitlab-org/gitaly/v16/internal/git" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gitcmd" "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/git/log" @@ -67,15 +68,35 @@ func (s *server) listLastCommitsForTree(in *gitalypb.ListLastCommitsForTreeReque limit = len(entries) } - for _, entry := range entries[offset:limit] { - commit, err := log.LastCommitForPath(ctx, objectReader, repo, git.Revision(in.GetRevision()), entry.Path, in.GetGlobalOptions()) - if err != nil { - return err - } + entries = entries[offset:limit] + paths := make([]string, 0, len(entries)) + for _, e := range entries { + paths = append(paths, e.Path) + } + // GitLab rails expects the paths in order. This is the case since + // gitlab-org/gitlab@66052c17a9cba2bf72bc342d7823565b501a479 + // Here a limit of `limit + 1` is taken, but later they call + // #first(limit) on the results. + // log.EachPathLastCommit uses git-last-modified(1), which returns paths + // in reverse-chronological order. This means the oldest entry gets + // dropped, leaving the tree behind with missing data. + commitsForPaths := make(map[string]*gitalypb.GitCommit, len(paths)) + err = log.EachPathLastCommit(ctx, objectReader, repo, git.Revision(in.GetRevision()), paths, in.GetGlobalOptions(), func(path string, commit *catfile.Commit) error { + commitsForPaths[path] = commit.GitCommit + return nil + }) + if err != nil { + return err + } + for _, path := range paths { + commit, ok := commitsForPaths[path] + if !ok { + return fmt.Errorf("commit not resolved for %q", path) + } commitForTree := &gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ - PathBytes: []byte(entry.Path), - Commit: commit.GitCommit, + PathBytes: []byte(path), + Commit: commit, } batch = append(batch, commitForTree) diff --git a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go index f42eca89a5d5dffa0d0d38819864717c21b2a5b6..ad20b2c832e47315770c4be4c06b110762acdde0 100644 --- a/internal/gitaly/service/commit/list_last_commits_for_tree_test.go +++ b/internal/gitaly/service/commit/list_last_commits_for_tree_test.go @@ -2,10 +2,12 @@ package commit import ( "context" + "fmt" "testing" "unicode/utf8" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v16/internal/featureflag" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" @@ -18,6 +20,12 @@ import ( func TestListLastCommitsForTree(t *testing.T) { t.Parallel() + testhelper.NewFeatureSets( + featureflag.GitLastModified, + ).Run(t, testListLastCommitsForTree) +} + +func testListLastCommitsForTree(t *testing.T, ctx context.Context) { commitResponse := func(path string, commit *gitalypb.GitCommit) *gitalypb.ListLastCommitsForTreeResponse_CommitForTree { return &gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ PathBytes: []byte(path), @@ -141,18 +149,20 @@ func TestListLastCommitsForTree(t *testing.T) { { desc: "root directory", setup: func(t *testing.T, ctx context.Context, data TestData) setupData { + request := &gitalypb.ListLastCommitsForTreeRequest{ + Repository: data.repoProto, + Revision: data.parentCommit.GetId(), + Path: []byte("/"), + Limit: 5, + } + expectedCommits := []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ + commitResponse("subdir", data.parentCommit), + commitResponse("changed", data.parentCommit), + commitResponse("unchanged", data.childCommit), + } return setupData{ - request: &gitalypb.ListLastCommitsForTreeRequest{ - Repository: data.repoProto, - Revision: data.parentCommit.GetId(), - Path: []byte("/"), - Limit: 5, - }, - expectedCommits: []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree{ - commitResponse("subdir", data.parentCommit), - commitResponse("changed", data.parentCommit), - commitResponse("unchanged", data.childCommit), - }, + request: request, + expectedCommits: expectedCommits, } }, }, @@ -316,7 +326,6 @@ func TestListLastCommitsForTree(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { t.Parallel() - ctx := testhelper.Context(t) cfg, client := setupCommitService(t, ctx) repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) @@ -363,8 +372,50 @@ func TestListLastCommitsForTree(t *testing.T) { ) []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree { return append(result, response.GetCommits()...) }) - testhelper.RequireGrpcError(t, setup.expectedErr, err) + testhelper.RequireGrpcErrorContains(t, setup.expectedErr, err) testhelper.ProtoEqual(t, setup.expectedCommits, commits) }) } } + +func BenchmarkListLastCommitsForTree(b *testing.B) { + ctx := testhelper.Context(b) + cfg, client := setupCommitService(b, ctx) + + repoProto, _ := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + Seed: "benchmark.git", + }) + request := &gitalypb.ListLastCommitsForTreeRequest{ + Repository: repoProto, + Revision: "58f4691876e4301fda53285b0413c64ed67a4585", + Path: []byte("/"), + Limit: 25, + } + + for _, limit := range []int32{5, 25, 50} { + b.Run(fmt.Sprintf("limit=%d", limit), func(b *testing.B) { + request.Limit = limit + + testhelper.NewFeatureSets( + featureflag.GitLastModified, + ).Bench(b, func(b *testing.B, ctx context.Context) { + for i := int32(0); i < 120; i += limit { + request.Offset = i + + stream, err := client.ListLastCommitsForTree(ctx, request) + require.NoError(b, err) + + commits, err := testhelper.ReceiveAndFold(stream.Recv, func( + result []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree, + response *gitalypb.ListLastCommitsForTreeResponse, + ) []*gitalypb.ListLastCommitsForTreeResponse_CommitForTree { + return append(result, response.GetCommits()...) + }) + require.NoError(b, err) + require.NotEmpty(b, commits) + } + }) + }) + } +}