From aeea043f21714fb90d897b1d2a3b3140cf3448a0 Mon Sep 17 00:00:00 2001 From: Nick Thomas Date: Thu, 3 Dec 2020 15:11:02 +0000 Subject: [PATCH] Speed up pull mirroring by getting updated tags straight from Gitaly Currently, pull mirroring will fetch the full list of tags for a repository twice - before and after the pull - and use the diff between them to work out which new tags have been added. Instead, we can parse the `git fetch` output to learn when tags have been added, and use that to avoid pulling the list of tags twice if no tags have been changed by the fetch. Ideally, we'd use the output of `git fetch` to determine exactly which tags had been changed, but that adds significant complexity since the pointed-to SHAs are not included in the output, and we need that for the push events we create, etc. Still, optimising the common case seems like a decent win to me. --- GITALY_SERVER_VERSION | 2 +- Gemfile | 2 +- Gemfile.lock | 14 ++++++++++---- ee/app/models/ee/project.rb | 4 ++-- ee/app/models/ee/repository.rb | 5 +++-- .../projects/update_mirror_service.rb | 19 ++++++++++++++++++- .../development/fetch_remote_with_status.yml | 8 ++++++++ ee/spec/models/project_spec.rb | 2 +- .../projects/update_mirror_service_spec.rb | 11 ++++++++++- lib/gitlab/git/repository.rb | 4 +++- .../gitaly_client/repository_service.rb | 14 ++++++++++++-- spec/lib/gitlab/git/repository_spec.rb | 3 ++- 12 files changed, 71 insertions(+), 17 deletions(-) create mode 100644 ee/config/feature_flags/development/fetch_remote_with_status.yml diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION index 6bafd9452fa057..6b7681d6552df7 100644 --- a/GITALY_SERVER_VERSION +++ b/GITALY_SERVER_VERSION @@ -1 +1 @@ -c0ea152ccad891cda5fd255c1fea78562aae5e4a +fetch-remote-with-status diff --git a/Gemfile b/Gemfile index e58ee3559a6194..6892ab762a53cd 100644 --- a/Gemfile +++ b/Gemfile @@ -466,7 +466,7 @@ group :ed25519 do end # Gitaly GRPC protocol definitions -gem 'gitaly', '~> 13.7.0.pre.rc1' +gem 'gitaly', git: 'https://gitlab.com/gitlab-org/gitaly', ref: 'fetch-remote-with-status' gem 'grpc', '~> 1.30.2' diff --git a/Gemfile.lock b/Gemfile.lock index 3ec97fc4d5fc2d..0fe61a1031ffc9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,11 @@ +GIT + remote: https://gitlab.com/gitlab-org/gitaly + revision: 1b6e7628d32bd5bf135efa97040f6ca671450fa6 + ref: fetch-remote-with-status + specs: + gitaly (13.7.0.pre.rc2) + grpc (~> 1.0) + GEM remote: https://rubygems.org/ specs: @@ -422,8 +430,6 @@ GEM rails (>= 3.2.0) git (1.7.0) rchardet (~> 1.8) - gitaly (13.7.0.pre.rc1) - grpc (~> 1.0) github-markup (1.7.0) gitlab-chronic (0.10.5) numerizer (~> 0.2) @@ -484,7 +490,7 @@ GEM signet (~> 0.12) google-cloud-env (1.4.0) faraday (>= 0.17.3, < 2.0) - google-protobuf (3.12.4) + google-protobuf (3.14.0) googleapis-common-protos-types (1.0.5) google-protobuf (~> 3.11) googleauth (0.14.0) @@ -1350,7 +1356,7 @@ DEPENDENCIES gettext (~> 3.3) gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails_js (~> 1.3) - gitaly (~> 13.7.0.pre.rc1) + gitaly! github-markup (~> 1.7.0) gitlab-chronic (~> 0.10.5) gitlab-fog-azure-rm (~> 1.0) diff --git a/ee/app/models/ee/project.rb b/ee/app/models/ee/project.rb index 6a84c557b36b95..680c96f0dea9e1 100644 --- a/ee/app/models/ee/project.rb +++ b/ee/app/models/ee/project.rb @@ -295,7 +295,7 @@ def mirror_with_content? mirror? && !empty_repo? end - def fetch_mirror(forced: false) + def fetch_mirror(forced: false, with_status: false) return unless mirror? # Only send the password if it's needed @@ -306,7 +306,7 @@ def fetch_mirror(forced: false) username_only_import_url end - repository.fetch_upstream(url, forced: forced) + repository.fetch_upstream(url, forced: forced, with_status: with_status) end def can_override_approvers? diff --git a/ee/app/models/ee/repository.rb b/ee/app/models/ee/repository.rb index c7640c1c5aeb37..25b17647914a58 100644 --- a/ee/app/models/ee/repository.rb +++ b/ee/app/models/ee/repository.rb @@ -33,9 +33,10 @@ def after_sync expire_content_cache end - def fetch_upstream(url, forced: false) + def fetch_upstream(url, forced: false, with_status: false) add_remote(MIRROR_REMOTE, url) - fetch_remote(MIRROR_REMOTE, ssh_auth: project&.import_data, forced: forced) + + fetch_remote(MIRROR_REMOTE, ssh_auth: project&.import_data, forced: forced, with_status: with_status) end def upstream_branches diff --git a/ee/app/services/projects/update_mirror_service.rb b/ee/app/services/projects/update_mirror_service.rb index 8c9eae551ad737..90c8f9d2a4b803 100644 --- a/ee/app/services/projects/update_mirror_service.rb +++ b/ee/app/services/projects/update_mirror_service.rb @@ -33,7 +33,7 @@ def execute checksum_before = project.repository.checksum update_tags do - project.fetch_mirror(forced: true) + fetch_mirror_with_tag_status end update_branches @@ -95,6 +95,23 @@ def update_branches end end + def fetch_mirror_with_tag_status + return project.fetch_mirror(forced: true) unless Feature.enabled?(:fetch_remote_with_status, project) + + # Watch the stream from gitaly to see if any of the updated refsare tags. + # There isn't enough information to construct the full update without + # further queries, but this lets us skip cache invalidation in the common + # case where no tags have been changed. + tags_updated = false + + fetch_result = project.fetch_mirror(forced: true, with_status: true) do |status| + tags_updated = true if status.update_type == :FETCHED && status.summary = '[new tag]' + tags_updated = true if status.update_type = :TAG_UPDATE + end + + fetch_result && tags_updated + end + def update_tags(&block) old_tags = repository_tags_with_target.each_with_object({}) { |tag, tags| tags[tag.name] = tag } diff --git a/ee/config/feature_flags/development/fetch_remote_with_status.yml b/ee/config/feature_flags/development/fetch_remote_with_status.yml new file mode 100644 index 00000000000000..0332b78a777307 --- /dev/null +++ b/ee/config/feature_flags/development/fetch_remote_with_status.yml @@ -0,0 +1,8 @@ +--- +name: fetch_remote_with_status +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/49140 +rollout_issue_url: +milestone: '13.7' +type: development +group: group::source code +default_enabled: false diff --git a/ee/spec/models/project_spec.rb b/ee/spec/models/project_spec.rb index 2ebc6d8efb2d7a..c6c9ff6a924a8c 100644 --- a/ee/spec/models/project_spec.rb +++ b/ee/spec/models/project_spec.rb @@ -896,7 +896,7 @@ let(:project) { build(:project, :mirror, import_url: import_url, import_data_attributes: { auth_method: auth_method } ) } specify do - expect(project.repository).to receive(:fetch_upstream).with(expected, forced: false) + expect(project.repository).to receive(:fetch_upstream).with(expected, forced: false, with_status: false) project.fetch_mirror end diff --git a/ee/spec/services/projects/update_mirror_service_spec.rb b/ee/spec/services/projects/update_mirror_service_spec.rb index 2900091c1c76a1..e976b025150326 100644 --- a/ee/spec/services/projects/update_mirror_service_spec.rb +++ b/ee/spec/services/projects/update_mirror_service_spec.rb @@ -430,7 +430,7 @@ def create_file(repository) end def stub_fetch_mirror(project, repository: project.repository) - allow(project).to receive(:fetch_mirror) { fetch_mirror(repository) } + allow(project).to receive(:fetch_mirror) { |&blk| fetch_mirror(repository, branch_prefix: branch_prefix, &blk) } end def fetch_mirror(repository) @@ -457,6 +457,15 @@ def fetch_mirror(repository) # New tag that point to a blob rugged.references.create('refs/tags/new-tag-on-blob', 'c74175afd117781cbc983664339a0f599b5bb34e') + + # Imitate the behaviour of the FetchRemoteWithStatus call + update_kls = Gitaly::FetchRemoteWithStatusResponse::Update + if Feature.enabled?(:fetch_remote_with_status) + yield update_kls.new(update_type: :FETCHED, summary: '[new tag]', from_ref: 'new-tag', to_ref: 'new-tag') + # FIXME: check that this is how git fetch reports such a beast + yield update_kls.new(update_type: :FETCHED, summary: '[new tag]', from_ref: 'new-tag-on-blob', to_ref: 'new-tag-on-blob') + # TODO: we're missing a test for the tag update case. Add one to ensure coverage + end end def modify_tag(repository, tag_name) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index f66013792020a6..7861db2e8a86d6 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -801,7 +801,8 @@ def empty? # forced - should we use --force flag? # no_tags - should we use --no-tags flag? # prune - should we use --prune flag? - def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true) + # with_status - should we return a list of updated refs? + def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true, with_status: false) wrapped_gitaly_errors do gitaly_repository_client.fetch_remote( remote, @@ -809,6 +810,7 @@ def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: tr forced: forced, no_tags: no_tags, prune: prune, + with_status: with_status, timeout: GITLAB_PROJECTS_TIMEOUT ) end diff --git a/lib/gitlab/gitaly_client/repository_service.rb b/lib/gitlab/gitaly_client/repository_service.rb index e41a406ebd3e0b..1a45bdcd80d62c 100644 --- a/lib/gitlab/gitaly_client/repository_service.rb +++ b/lib/gitlab/gitaly_client/repository_service.rb @@ -70,7 +70,7 @@ def info_attributes end.join end - def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true) + def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true, with_status: false) request = Gitaly::FetchRemoteRequest.new( repository: @gitaly_repo, remote: remote, force: forced, no_tags: no_tags, timeout: timeout, no_prune: !prune @@ -86,7 +86,17 @@ def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true) end end - GitalyClient.call(@storage, :repository_service, :fetch_remote, request, timeout: GitalyClient.long_timeout) + if with_status + response = GitalyClient.call(@storage, :repository_service, :fetch_remote_with_status, request, timeout: GitalyClient.long_timeout) + + response.each do |rsp| + rsp.updates.each do |update| + yield update + end + end + else + GitalyClient.call(@storage, :repository_service, :fetch_remote, request, timeout: GitalyClient.long_timeout) + end end def create_repository diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb index 626798199a2fe6..f9a91e4533364c 100644 --- a/spec/lib/gitlab/git/repository_spec.rb +++ b/spec/lib/gitlab/git/repository_spec.rb @@ -520,7 +520,8 @@ def submodule_url(path) forced: true, no_tags: true, timeout: described_class::GITLAB_PROJECTS_TIMEOUT, - prune: false + prune: false, + with_status: false } expect(repository.gitaly_repository_client).to receive(:fetch_remote).with('remote-name', expected_opts) -- GitLab