From 5338b9b794e8731de38687856dbe93f18670d3a2 Mon Sep 17 00:00:00 2001 From: jramsay Date: Wed, 11 Dec 2019 14:55:42 +0100 Subject: [PATCH 1/3] Repo language detected no more that once a week Noted by analyzing Sidekiq usage patterns[1], the commit languages topped the charts in time spend. Most, almost all, of this time is spend on Gitaly. Now Gitaly could reject the call with a FailedPrecondition if it finds it's scanning the repository too much. The heuristic to be used for this would be cache modification time. The cache will be updated each linguist run, and only RPC per repository is called at once from the client side. [1]: https://gitlab.com/gitlab-com/gl-infra/scalability/issues/64#note_254382111 --- .../zj-repo-languages-failed-precond.yml | 5 +++ internal/service/commit/languages.go | 33 ++++++++++++----- internal/service/commit/languages_test.go | 36 ++++--------------- 3 files changed, 35 insertions(+), 39 deletions(-) create mode 100644 changelogs/unreleased/zj-repo-languages-failed-precond.yml diff --git a/changelogs/unreleased/zj-repo-languages-failed-precond.yml b/changelogs/unreleased/zj-repo-languages-failed-precond.yml new file mode 100644 index 00000000000..75122868347 --- /dev/null +++ b/changelogs/unreleased/zj-repo-languages-failed-precond.yml @@ -0,0 +1,5 @@ +--- +title: Return FailedPrecondition when commit languages are requested too often +merge_request: +author: +type: performance diff --git a/internal/service/commit/languages.go b/internal/service/commit/languages.go index 8d73542e666..32320668c41 100644 --- a/internal/service/commit/languages.go +++ b/internal/service/commit/languages.go @@ -3,14 +3,16 @@ package commit import ( "context" "io/ioutil" + "os" + "path/filepath" "sort" + "time" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/helper" "gitlab.com/gitlab-org/gitaly/internal/helper/text" "gitlab.com/gitlab-org/gitaly/internal/linguist" "gitlab.com/gitlab-org/gitaly/internal/metadata/featureflag" - "gitlab.com/gitlab-org/gitaly/internal/service/ref" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -19,18 +21,11 @@ import ( func (*server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLanguagesRequest) (*gitalypb.CommitLanguagesResponse, error) { repo := req.Repository - if err := git.ValidateRevisionAllowEmpty(req.Revision); err != nil { + if err := git.ValidateRevision(req.Revision); err != nil { return nil, helper.ErrInvalidArgument(err) } revision := string(req.Revision) - if revision == "" { - defaultBranch, err := ref.DefaultBranchName(ctx, req.Repository) - if err != nil { - return nil, err - } - revision = string(defaultBranch) - } commitID, err := lookupRevision(ctx, repo, revision) if err != nil { @@ -41,6 +36,11 @@ func (*server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLanguage if err != nil { return nil, err } + + if cacheRecentlyUpdated(repoPath) { + return nil, helper.ErrPreconditionFailedf("repository languages recently scanned") + } + stats, err := linguist.Stats(ctx, repoPath, commitID) if err != nil { return nil, err @@ -104,3 +104,18 @@ func lookupRevision(ctx context.Context, repo *gitalypb.Repository, revision str return text.ChompBytes(revParseBytes), nil } + +// Gitaly uses Linguist to detect the languages, and to improve performance, the +// results will be cached. If the modified time is later than a trashold it's +// considered recently updated enough that we do not scan again +func cacheRecentlyUpdated(path string) bool { + fullPath := filepath.Join(path, "language-stats.cache") + + fi, err := os.Stat(fullPath) + if err != nil { + return false + } + + const sentinal = -time.Hour * 24 * 7 + return fi.ModTime().After(time.Now().Add(sentinal)) +} diff --git a/internal/service/commit/languages_test.go b/internal/service/commit/languages_test.go index b456719a52b..7218bcf519b 100644 --- a/internal/service/commit/languages_test.go +++ b/internal/service/commit/languages_test.go @@ -53,6 +53,12 @@ func TestLanguages(t *testing.T) { actualLanguage := resp.Languages[i] requireLanguageEqual(t, &el, actualLanguage) } + + // if another request is made within short succession (1 week), the second + // one is rejected + _, err = client.CommitLanguages(ctx, request) + require.Error(t, err) + testhelper.RequireGrpcError(t, err, codes.FailedPrecondition) } func TestFileCountIsZeroWhenFeatureIsDisabled(t *testing.T) { @@ -92,36 +98,6 @@ func requireLanguageEqual(t *testing.T, expected, actual *gitalypb.CommitLanguag require.Equal(t, expected.Bytes, actual.Bytes) } -func TestLanguagesEmptyRevision(t *testing.T) { - server, serverSocketPath := startTestServices(t) - defer server.Stop() - - client, conn := newCommitServiceClient(t, serverSocketPath) - defer conn.Close() - - testRepo, _, cleanupFn := testhelper.NewTestRepo(t) - defer cleanupFn() - - request := &gitalypb.CommitLanguagesRequest{ - Repository: testRepo, - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - resp, err := client.CommitLanguages(ctx, request) - require.NoError(t, err) - - require.NotZero(t, len(resp.Languages), "number of languages in response") - - foundRuby := false - for _, l := range resp.Languages { - if l.Name == "Ruby" { - foundRuby = true - } - } - require.True(t, foundRuby, "expected to find Ruby as a language on HEAD") -} - func TestInvalidCommitLanguagesRequestRevision(t *testing.T) { server, serverSocketPath := startTestServices(t) defer server.Stop() -- GitLab From 39d50c8bf34940944b236a1483cce330d725dc98 Mon Sep 17 00:00:00 2001 From: Zeger-Jan van de Weg Date: Wed, 11 Dec 2019 23:12:42 +0000 Subject: [PATCH 2/3] Apply suggestion to changelogs/unreleased/zj-repo-languages-failed-precond.yml --- changelogs/unreleased/zj-repo-languages-failed-precond.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/unreleased/zj-repo-languages-failed-precond.yml b/changelogs/unreleased/zj-repo-languages-failed-precond.yml index 75122868347..d38c00a0bf4 100644 --- a/changelogs/unreleased/zj-repo-languages-failed-precond.yml +++ b/changelogs/unreleased/zj-repo-languages-failed-precond.yml @@ -1,5 +1,5 @@ --- title: Return FailedPrecondition when commit languages are requested too often -merge_request: +merge_request: 1690 author: type: performance -- GitLab From 9bf04679206859c2d26a0abe2f9c74a7b96dd54a Mon Sep 17 00:00:00 2001 From: Zeger-Jan van de Weg Date: Fri, 13 Dec 2019 09:03:37 +0000 Subject: [PATCH 3/3] Apply suggestion to internal/service/commit/languages.go --- internal/service/commit/languages.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/service/commit/languages.go b/internal/service/commit/languages.go index 32320668c41..a9c2c06cb17 100644 --- a/internal/service/commit/languages.go +++ b/internal/service/commit/languages.go @@ -116,6 +116,6 @@ func cacheRecentlyUpdated(path string) bool { return false } - const sentinal = -time.Hour * 24 * 7 - return fi.ModTime().After(time.Now().Add(sentinal)) + const sentinel = -time.Hour * 24 * 7 + return fi.ModTime().After(time.Now().Add(sentinel)) } -- GitLab