From c48120c16e0f04a3bf3e86f1351228045c474209 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Wed, 1 Oct 2025 13:49:12 +0200 Subject: [PATCH 01/13] Start returning send_url for digest file when cache is not found --- .../maven/handle_file_request_service.rb | 45 +++++++++++++------ .../maven/handle_file_request_service_spec.rb | 22 ++++++--- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb index 5d9e54d5bd4513..5dac16c7fae634 100644 --- a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb @@ -12,10 +12,6 @@ class HandleFileRequestService < ::VirtualRegistries::BaseService ERRORS = BASE_ERRORS.merge( unauthorized: ServiceResponse.error(message: 'Unauthorized', reason: :unauthorized), no_upstreams: ServiceResponse.error(message: 'No upstreams set', reason: :no_upstreams), - digest_not_found: ServiceResponse.error( - message: 'File of the requested digest not found in cache entries', - reason: :digest_not_found_in_cache_entries - ), fips_unsupported_md5: ServiceResponse.error( message: 'MD5 digest is not supported when FIPS is enabled', reason: :fips_unsupported_md5 @@ -36,7 +32,7 @@ def execute elsif cache_response_still_valid? download_cache_entry else - check_registry_upstreams + build_workhorse_upload_url_response end rescue *::Gitlab::HTTP::HTTP_ERRORS @@ -71,15 +67,25 @@ def cache_entry end strong_memoize_attr :cache_entry - def check_registry_upstreams - service = ::VirtualRegistries::CheckUpstreamsService.new( + def check_registry_upstreams_response + param_path = relative_path[1..] # file extension without the leading slash(/) + ::VirtualRegistries::CheckUpstreamsService.new( registry: registry, - params: { path: path } - ) - response = service.execute - return response unless response.success? + params: { path: param_path } + ).execute + end + strong_memoize_attr :check_registry_upstreams_response - workhorse_upload_url_response(upstream: response.payload[:upstream]) + def build_workhorse_upload_url_response + return check_registry_upstreams_response unless check_registry_upstreams_response.success? + + workhorse_upload_url_response(upstream: check_registry_upstreams_response.payload[:upstream]) + end + + def build_workhorse_send_url_response + return check_registry_upstreams_response unless check_registry_upstreams_response.success? + + workhorse_send_url_response(upstream: check_registry_upstreams_response.payload[:upstream]) end def head_upstream(upstream:) @@ -95,11 +101,11 @@ def head_upstream(upstream:) end def download_cache_entry_digest - return ERRORS[:digest_not_found] unless cache_entry - digest_format = File.extname(path)[1..] # file extension without the leading dot return ERRORS[:fips_unsupported_md5] if digest_format == 'md5' && Gitlab::FIPS.enabled? + return build_workhorse_send_url_response unless cache_entry + create_event(from_upstream: false) cache_entry.bump_downloads_count @@ -175,6 +181,17 @@ def workhorse_upload_url_response(upstream:) ) end + def workhorse_send_url_response(upstream:) + create_event(from_upstream: true) + + ServiceResponse.success( + payload: { + action: :workhorse_send_url, + action_params: { url: upstream.url_for(path) } + } + ) + end + def create_event(from_upstream:) args = { namespace: registry.group, diff --git a/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb b/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb index 9e023ba228fd29..c0bfb21534a410 100644 --- a/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb +++ b/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb @@ -50,11 +50,13 @@ expect(action_params[:file_md5]).to be_instance_of(String) when :download_digest expect(execute.payload[:action_params]).to eq(digest: expected_digest) + when :workhorse_send_url + expect(execute.payload[:action_params]).to eq(url: upstream_digest_url) end end def event_data_from(action) - if action == :workhorse_upload_url + if action == :workhorse_upload_url || action == :workhorse_send_url event_label = 'from_upstream' metric_key = 'counts.count_total_pull_maven_package_file_through_virtual_registry_from_upstream' else @@ -147,27 +149,37 @@ def event_data_from(action) context 'when accessing the sha1 digest' do let(:path) { "#{super()}.sha1" } + let(:upstream_digest_url) { "#{upstream_resource_url}.sha1" } let(:expected_digest) { cache_entry.file_sha1 } it_behaves_like 'returning a service response success response', action: :download_digest context 'when the cache entry does not exist' do - let(:path) { "#{super()}_not_existing.sha1" } + before do + VirtualRegistries::Packages::Maven::Cache::Entry.delete_all + end - it { is_expected.to eq(described_class::ERRORS[:digest_not_found]) } + it_behaves_like 'returning a service response success response', action: :workhorse_send_url end end context 'when accessing the md5 digest' do let(:path) { "#{super()}.md5" } + let(:upstream_digest_url) { "#{upstream_resource_url}.md5" } let(:expected_digest) { cache_entry.file_md5 } it_behaves_like 'returning a service response success response', action: :download_digest context 'when the cache entry does not exist' do - let(:path) { "#{super()}_not_existing.md5" } + before do + VirtualRegistries::Packages::Maven::Cache::Entry.delete_all + end + + it_behaves_like 'returning a service response success response', action: :workhorse_send_url - it { is_expected.to eq(described_class::ERRORS[:digest_not_found]) } + context 'in FIPS mode', :fips_mode do + it { is_expected.to eq(described_class::ERRORS[:fips_unsupported_md5]) } + end end context 'in FIPS mode', :fips_mode do -- GitLab From aa40f63cc0bde536bbe8a0ba7343c7de4bdfbe98 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Wed, 1 Oct 2025 15:39:28 +0200 Subject: [PATCH 02/13] Return workhorse_send_url response --- .../virtual_registries/packages/endpoint.rb | 46 +++++++++++++-- .../packages/maven/endpoints_spec.rb | 57 ++++++++++++++++++- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/ee/lib/api/concerns/virtual_registries/packages/endpoint.rb b/ee/lib/api/concerns/virtual_registries/packages/endpoint.rb index 62bb17fc692a0f..a2b496e04b4982 100644 --- a/ee/lib/api/concerns/virtual_registries/packages/endpoint.rb +++ b/ee/lib/api/concerns/virtual_registries/packages/endpoint.rb @@ -19,6 +19,14 @@ module Endpoint X-Checksum-Md5 X-Checksum-Sha1 ].freeze + RESPONSE_STATUSES = { + error: :bad_gateway, + timeout: :gateway_timeout + }.freeze + TIMEOUTS = { + open: 10, + read: 10 + }.freeze WEB_BROWSER_ERROR_MESSAGE = 'This endpoint is not meant to be accessed by a web browser.' UPSTREAM_GID_HEADER = 'X-Gitlab-Virtual-Registry-Upstream-Global-Id' MAX_FILE_SIZE = 5.gigabytes @@ -35,6 +43,8 @@ def send_successful_response_from(service_response:) case action when :workhorse_upload_url workhorse_upload_url(**action_params.slice(:url, :upstream)) + when :workhorse_send_url + workhorse_send_url(**action_params.slice(:url)) when :download_file extra_response_headers = download_file_extra_response_headers(action_params: action_params) .merge(EXTRA_RESPONSE_HEADERS) @@ -70,12 +80,6 @@ def send_error_response_from!(service_response:) end def workhorse_upload_url(url:, upstream:) - allow_localhost = Gitlab.dev_or_test_env? || - Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services? - # rubocop:disable Naming/InclusiveLanguage -- existing setting - allowed_endpoints = ObjectStoreSettings.enabled_endpoint_uris + - Gitlab::CurrentSettings.outbound_local_requests_whitelist - # rubocop:enable Naming/InclusiveLanguage send_workhorse_headers( Gitlab::Workhorse.send_dependency( upstream.headers, @@ -96,6 +100,26 @@ def workhorse_upload_url(url:, upstream:) ) end + def workhorse_send_url(url:) + send_workhorse_headers( + Gitlab::Workhorse.send_url( + url, + headers: {}, + response_headers: EXTRA_RESPONSE_HEADERS, + allow_localhost: allow_localhost, + allow_redirects: true, + timeouts: TIMEOUTS, + response_statuses: RESPONSE_STATUSES, + ssrf_filter: true, + allowed_endpoints: allowed_endpoints, + restrict_forwarded_response_headers: { + enabled: true, + allow_list: ALLOWED_RESPONSE_HEADERS + } + ) + ) + end + def authorized_upload_response(upstream) ::VirtualRegistries::Cache::EntryUploader.workhorse_authorize( has_length: true, @@ -120,6 +144,16 @@ def ok_empty_response env['api.format'] = :binary # to return data as-is body '' end + + def allow_localhost + Gitlab.dev_or_test_env? || Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services? + end + + def allowed_endpoints + # rubocop:disable Naming/InclusiveLanguage -- existing setting + ObjectStoreSettings.enabled_endpoint_uris + Gitlab::CurrentSettings.outbound_local_requests_whitelist + # rubocop:enable Naming/InclusiveLanguage + end end after_validation do diff --git a/ee/spec/requests/api/virtual_registries/packages/maven/endpoints_spec.rb b/ee/spec/requests/api/virtual_registries/packages/maven/endpoints_spec.rb index 944d62ba73a06b..6f52a26812f3a1 100644 --- a/ee/spec/requests/api/virtual_registries/packages/maven/endpoints_spec.rb +++ b/ee/spec/requests/api/virtual_registries/packages/maven/endpoints_spec.rb @@ -54,6 +54,40 @@ get api(url), headers: headers end + shared_examples 'returning the workhorse send_url response' do + it 'returns a workhorse send_url response' do + subject + + expect(response).to have_gitlab_http_status(:ok) + expect(response.headers[Gitlab::Workhorse::SEND_DATA_HEADER]).to start_with('send-url:') + expect(response.headers['Content-Type']).to eq('application/octet-stream') + expect(response.headers['Content-Length'].to_i).to eq(0) + expect(response.body).to eq('') + + send_data_type, send_data = workhorse_send_data + + expected_restrict_forwarded_response_headers = { + 'Enabled' => true, + 'AllowList' => described_class::ALLOWED_RESPONSE_HEADERS + } + expected_resp_headers = described_class::EXTRA_RESPONSE_HEADERS.deep_transform_values do |value| + [value] + end + + expect(send_data_type).to eq('send-url') + expect(send_data['URL']).to be_present + expect(send_data['Header']).to eq({}) + expect(send_data['ResponseHeaders']).to eq(expected_resp_headers) + expect(send_data['AllowLocalhost']).to be_truthy + expect(send_data['AllowRedirects']).to be_truthy + expect(send_data['ResponseHeaderTimeout']).to eq('10s') + expect(send_data['DialTimeout']).to eq('10s') + expect(send_data['ErrorResponseStatus']).to eq(502) + expect(send_data['TimeoutResponseStatus']).to eq(504) + expect(send_data['RestrictForwardedResponseHeaders']).to eq(expected_restrict_forwarded_response_headers) + end + end + shared_examples 'returning the workhorse send_dependency response' do let(:enabled_endpoint_uris) { [URI('192.168.1.1')] } let(:outbound_local_requests_allowlist) { ['127.0.0.1'] } @@ -64,7 +98,7 @@ stub_application_setting(outbound_local_requests_whitelist: outbound_local_requests_allowlist) end - it 'returns a workhorse send_url response' do + it 'returns a workhorse upload_url response' do expect(::VirtualRegistries::Cache::EntryUploader).to receive(:workhorse_authorize).with( a_hash_including( use_final_store_path: true, @@ -164,6 +198,26 @@ expect(response.media_type).to eq('text/plain') expect(response.body).to eq(cache_entry.file_sha1) end + + context 'when cache is not found' do + let(:service_response) do + ServiceResponse.success( + payload: { + action: :workhorse_send_url, + action_params: { url: upstream.url_for(path) } + } + ) + end + + before do + allow(::VirtualRegistries::Packages::Maven::HandleFileRequestService) + .to receive(:new) + .with(registry: registry, current_user: user, params: { path: path }) + .and_return(service_double) + end + + it_behaves_like 'returning the workhorse send_url response' + end end end @@ -173,7 +227,6 @@ :unauthorized | :unauthorized :no_upstreams | :bad_request :file_not_found_on_upstreams | :not_found - :digest_not_found_in_cache_entries | :not_found :upstream_not_available | :bad_request :fips_unsupported_md5 | :bad_request end -- GitLab From d8958150ad00bbd5cfd99e52a6a3d9ab80d14eee Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Wed, 1 Oct 2025 18:18:13 +0200 Subject: [PATCH 03/13] Rename path to request_path --- .../maven/handle_file_request_service.rb | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb index 5dac16c7fae634..352e1dcd009f77 100644 --- a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb @@ -23,7 +23,7 @@ class HandleFileRequestService < ::VirtualRegistries::BaseService ).freeze def execute - return ERRORS[:path_not_present] unless path.present? + return ERRORS[:path_not_present] unless request_path.present? return ERRORS[:unauthorized] unless allowed? return ERRORS[:no_upstreams] if registry.upstreams.empty? @@ -89,7 +89,7 @@ def build_workhorse_send_url_response end def head_upstream(upstream:) - url = upstream.url_for(path) + url = upstream.url_for(request_path) headers = upstream.headers ::Gitlab::HTTP.head( @@ -101,7 +101,7 @@ def head_upstream(upstream:) end def download_cache_entry_digest - digest_format = File.extname(path)[1..] # file extension without the leading dot + digest_format = File.extname(request_path)[1..] # file extension without the leading dot return ERRORS[:fips_unsupported_md5] if digest_format == 'md5' && Gitlab::FIPS.enabled? return build_workhorse_send_url_response unless cache_entry @@ -118,7 +118,7 @@ def download_cache_entry_digest end def digest_request? - File.extname(path).in?(DIGEST_EXTENSIONS) + File.extname(request_path).in?(DIGEST_EXTENSIONS) end strong_memoize_attr :digest_request? @@ -141,18 +141,22 @@ def permissions_cache_key ] end - def path + def request_path params[:path] end - def relative_path + def path if digest_request? - "/#{path.chomp(File.extname(path))}" + request_path.chomp(File.extname(request_path)) else - "/#{path}" + request_path end end + def relative_path + "/#{path}" + end + def download_cache_entry create_event(from_upstream: false) cache_entry.bump_downloads_count @@ -176,7 +180,7 @@ def workhorse_upload_url_response(upstream:) ServiceResponse.success( payload: { action: :workhorse_upload_url, - action_params: { url: upstream.url_for(path), upstream: upstream } + action_params: { url: upstream.url_for(request_path), upstream: upstream } } ) end @@ -187,7 +191,7 @@ def workhorse_send_url_response(upstream:) ServiceResponse.success( payload: { action: :workhorse_send_url, - action_params: { url: upstream.url_for(path) } + action_params: { url: upstream.url_for(request_path) } } ) end -- GitLab From 9e70dbd1ba9785b46f0e9e7d04fbeee0780f2f41 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Thu, 2 Oct 2025 15:04:50 +0200 Subject: [PATCH 04/13] Add VirtualRegistries::Packages::Maven::CreateCacheEntryWorker --- .rubocop_todo/fips/md5.yml | 2 + .rubocop_todo/fips/sha1.yml | 2 + ee/app/workers/all_queues.yml | 10 ++ .../maven/create_cache_entry_worker.rb | 74 ++++++++++++++ .../maven/create_cache_entry_worker_spec.rb | 98 +++++++++++++++++++ 5 files changed, 186 insertions(+) create mode 100644 ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb create mode 100644 ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb diff --git a/.rubocop_todo/fips/md5.yml b/.rubocop_todo/fips/md5.yml index 3549851c8ac24e..d873b12fb4d453 100644 --- a/.rubocop_todo/fips/md5.yml +++ b/.rubocop_todo/fips/md5.yml @@ -5,6 +5,7 @@ Fips/MD5: - 'app/models/concerns/checksummable.rb' - 'app/services/packages/go/create_package_service.rb' - 'app/services/packages/maven/metadata/append_package_file_service.rb' + - 'ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb' - 'app/services/packages/rubygems/create_gemspec_service.rb' - 'ee/app/models/license.rb' - 'lib/tasks/migrate/setup_postgresql.rake' @@ -13,3 +14,4 @@ Fips/MD5: - 'spec/lib/gitlab/ci/trace/archive_spec.rb' - 'spec/lib/gitlab/ci/trace/remote_checksum_spec.rb' - 'spec/models/concerns/checksummable_spec.rb' + - 'ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb' diff --git a/.rubocop_todo/fips/sha1.yml b/.rubocop_todo/fips/sha1.yml index d09143e821616f..3b49c58d2a73f3 100644 --- a/.rubocop_todo/fips/sha1.yml +++ b/.rubocop_todo/fips/sha1.yml @@ -31,6 +31,7 @@ Fips/SHA1: - 'ee/spec/services/security/ingestion/tasks/ingest_identifiers_spec.rb' - 'ee/spec/services/security/override_uuids_service_spec.rb' - 'ee/spec/services/vulnerabilities/manually_create_service_spec.rb' + - 'ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb' - 'lib/extracts_path.rb' - 'lib/gitlab/alert_management/fingerprint.rb' - 'lib/gitlab/background_migration/backfill_note_discussion_id.rb' @@ -88,3 +89,4 @@ Fips/SHA1: - 'spec/support/shared_examples/services/alert_management/alert_processing/alert_recovery_shared_examples.rb' - 'spec/validators/sha_validator_spec.rb' - 'spec/workers/update_head_pipeline_for_merge_request_worker_spec.rb' + - 'ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb' diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index cda2e6e11ecf29..d5ba6e6b62abd6 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -1033,6 +1033,16 @@ :idempotent: true :tags: [] :queue_namespace: :dependency_proxy_blob +- :name: dependency_proxy_blob:virtual_registries_packages_maven_create_cache_entry + :worker_name: VirtualRegistries::Packages::Maven::CreateCacheEntryWorker + :feature_category: :virtual_registry + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] + :queue_namespace: :dependency_proxy_blob - :name: deployment:deployments_approval :worker_name: Deployments::ApprovalWorker :feature_category: :continuous_delivery diff --git a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb new file mode 100644 index 00000000000000..2d30f0e6e957a7 --- /dev/null +++ b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module VirtualRegistries + module Packages + module Maven + class CreateCacheEntryWorker + include ApplicationWorker + + data_consistency :sticky + + queue_namespace :dependency_proxy_blob + feature_category :virtual_registry + urgency :low + + defer_on_database_health_signal :gitlab_main, [:virtual_registries_packages_maven_cache_entries], 5.minutes + deduplicate :until_executed + idempotent! + + ResponseError = Class.new(StandardError) + + def perform(upstream_id, path) + upstream = ::VirtualRegistries::Packages::Maven::Upstream.find_by_id(upstream_id) + + return unless upstream + + upload_file, etag = download_file(upstream, path) + + ::VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService.new( + upstream: upstream, + params: { + path: path, + file: upload_file, + etag: etag, + content_type: upload_file.content_type + } + ).execute + rescue ResponseError => e + Gitlab::ErrorTracking.log_exception(e, upstream_id: upstream_id, path: path) + end + + private + + def download_file(upstream, path) + url = upstream.url_for(path) + options = { + stream_body: true, + headers: upstream.headers(path) + } + + Tempfile.create(['virtual_registries_packages_maven_file'], encoding: 'ascii-8bit') do |tmp_file| + response = Gitlab::HTTP.get(url, options) do |fragment| + tmp_file.write(fragment) + end + + raise ResponseError, "Received error status #{response.code}" unless response.success? + + tmp_file.rewind + content = tmp_file.read + tmp_file.rewind + + etag = response.headers['etag'] + file_args = { + sha1: Digest::SHA1.hexdigest(content), + content_type: response.headers['content-type'] + } + file_args[:md5] = Digest::MD5.hexdigest(content) unless Gitlab::FIPS.enabled? + + [UploadedFile.new(tmp_file.path, **file_args), etag] + end + end + end + end + end +end diff --git a/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb new file mode 100644 index 00000000000000..0daaa1a1f0317d --- /dev/null +++ b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe VirtualRegistries::Packages::Maven::CreateCacheEntryWorker, feature_category: :virtual_registry do + let_it_be(:upstream) { create(:virtual_registries_packages_maven_upstream) } + + let(:worker) { described_class.new } + let(:path) { 'maven/package-name.pom' } + let(:upstream_resource_url) { upstream.url_for(path) } + let(:etag) { 'W/"51f828b51a27ae904e020f679d8f8ce0"' } + let(:content_type) { 'application/octet-stream' } + let(:mock_response) do + { + status: 200, + body: 'file-content', + headers: { + 'etag' => etag, + 'content-type' => content_type + } + } + end + + subject(:perform) { worker.perform(upstream_id, path) } + + before do + stub_request(:get, upstream_resource_url).with(headers: upstream.headers).and_return(mock_response) + end + + it_behaves_like 'worker with data consistency', described_class, data_consistency: :sticky + it_behaves_like 'an idempotent worker' do + let(:job_args) { [upstream.id, path] } + end + + it 'has an until_executed deduplicate strategy' do + expect(described_class.get_deduplicate_strategy).to eq(:until_executed) + end + + describe '#perform' do + context 'when the upstream is found' do + let(:upstream_id) { upstream.id } + let(:expected_uploaded_file_attrs) do + { + sha1: Digest::SHA1.hexdigest(mock_response[:body]), + md5: Digest::MD5.hexdigest(mock_response[:body]), + content_type: content_type + } + end + + it 'creates cache entry' do + expect(VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService).to receive(:new) + .with( + upstream: upstream, + params: { + path: path, + file: be_an(UploadedFile).and(have_attributes(expected_uploaded_file_attrs)), + etag: etag, + content_type: content_type + } + ).and_call_original + + perform + end + + it 'downloads file from upstream' do + perform + + assert_requested(:get, upstream_resource_url) do |req| + expect(req.headers).to include(upstream.headers(path).deep_stringify_keys) + end + end + + context 'when upstream returns error' do + let(:mock_response) { { status: 400 } } + + it 'logs errors' do + expect(Gitlab::ErrorTracking).to receive(:log_exception) + .with( + instance_of(described_class::ResponseError), + upstream_id: upstream.id, path: path + ) + + perform + end + end + end + + context 'when the upstream is not found' do + let(:upstream_id) { non_existing_record_id } + + it 'does not create cache entry' do + expect(VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService).not_to receive(:new) + + perform + end + end + end +end -- GitLab From 473690578d23db1a7168e6102b257503642f7f36 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Thu, 2 Oct 2025 15:13:49 +0200 Subject: [PATCH 05/13] Allow create or update entry without current_user --- .../maven/cache/entries/create_or_update_service.rb | 2 ++ .../maven/cache/entries/create_or_update_service_spec.rb | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb b/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb index bcda83d6108433..55610078e85125 100644 --- a/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb @@ -57,6 +57,8 @@ def execute private def allowed? + return true unless current_user + can?(current_user, :read_virtual_registry, upstream) end diff --git a/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb b/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb index b1d8bc5c3ff8dc..ab374fd61e3b72 100644 --- a/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb +++ b/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb @@ -139,10 +139,16 @@ it { is_expected.to eq(described_class::ERRORS[:unauthorized]) } end + context 'with no permission user' do + let(:user) { create(:user) } + + it { is_expected.to eq(described_class::ERRORS[:unauthorized]) } + end + context 'with no user' do let(:user) { nil } - it { is_expected.to eq(described_class::ERRORS[:unauthorized]) } + it_behaves_like 'returning a service response success response' end end end -- GitLab From f19317b99e5ef7a78d44bd327c305a349e8176ed Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Thu, 2 Oct 2025 16:27:39 +0200 Subject: [PATCH 06/13] Queue CreateCacheEntryWorker job --- .../maven/handle_file_request_service.rb | 30 +++++++++++++---- .../maven/handle_file_request_service_spec.rb | 33 +++++++++++++++---- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb index 352e1dcd009f77..8e3764c1ead14b 100644 --- a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb @@ -68,24 +68,30 @@ def cache_entry strong_memoize_attr :cache_entry def check_registry_upstreams_response - param_path = relative_path[1..] # file extension without the leading slash(/) ::VirtualRegistries::CheckUpstreamsService.new( registry: registry, - params: { path: param_path } + params: { path: path } ).execute end strong_memoize_attr :check_registry_upstreams_response + def upstream + return unless check_registry_upstreams_response.success? + + check_registry_upstreams_response.payload[:upstream] + end + strong_memoize_attr :upstream + def build_workhorse_upload_url_response - return check_registry_upstreams_response unless check_registry_upstreams_response.success? + return check_registry_upstreams_response unless upstream - workhorse_upload_url_response(upstream: check_registry_upstreams_response.payload[:upstream]) + workhorse_upload_url_response(upstream: upstream) end def build_workhorse_send_url_response - return check_registry_upstreams_response unless check_registry_upstreams_response.success? + return check_registry_upstreams_response unless upstream - workhorse_send_url_response(upstream: check_registry_upstreams_response.payload[:upstream]) + workhorse_send_url_response(upstream: upstream) end def head_upstream(upstream:) @@ -104,7 +110,11 @@ def download_cache_entry_digest digest_format = File.extname(request_path)[1..] # file extension without the leading dot return ERRORS[:fips_unsupported_md5] if digest_format == 'md5' && Gitlab::FIPS.enabled? - return build_workhorse_send_url_response unless cache_entry + unless cache_entry + response = build_workhorse_send_url_response + create_upstream_file_cache_entry if response.success? + return response + end create_event(from_upstream: false) cache_entry.bump_downloads_count @@ -204,6 +214,12 @@ def create_event(from_upstream:) args[:user] = current_user if current_user.is_a?(User) track_internal_event('pull_maven_package_file_through_virtual_registry', **args) end + + def create_upstream_file_cache_entry + ::VirtualRegistries::Packages::Maven::CreateCacheEntryWorker.perform_async( + upstream.id, path + ) + end end end end diff --git a/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb b/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb index c0bfb21534a410..3c0dfa93f98ed7 100644 --- a/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb +++ b/ee/spec/services/virtual_registries/packages/maven/handle_file_request_service_spec.rb @@ -7,11 +7,12 @@ let_it_be(:project) { create(:project, namespace: registry.group) } let_it_be(:user) { create(:user, owner_of: project) } let_it_be(:path) { 'com/test/package/1.2.3/package-1.2.3.pom' } + let_it_be(:request_path) { path } let_it_be(:upstream) { registry.upstreams.first } - let_it_be(:upstream_resource_url) { upstream.url_for(path) } + let_it_be(:upstream_resource_url) { upstream.url_for(request_path) } let(:etag_returned_by_upstream) { nil } - let(:service) { described_class.new(registry: registry, current_user: user, params: { path: path }) } + let(:service) { described_class.new(registry: registry, current_user: user, params: { path: request_path }) } describe '#execute' do subject(:execute) { service.execute } @@ -77,7 +78,7 @@ def event_data_from(action) :virtual_registries_packages_maven_cache_entry, :upstream_checked, :processing, - relative_path: "/#{path}", + relative_path: "/#{request_path}", upstream: upstream, group: registry.group ) @@ -108,7 +109,7 @@ def event_data_from(action) create(:virtual_registries_packages_maven_cache_entry, :upstream_checked, upstream: upstream, - relative_path: "/#{path}", + relative_path: "/#{request_path}", group: registry.group ) end @@ -148,7 +149,7 @@ def event_data_from(action) end context 'when accessing the sha1 digest' do - let(:path) { "#{super()}.sha1" } + let(:request_path) { "#{path}.sha1" } let(:upstream_digest_url) { "#{upstream_resource_url}.sha1" } let(:expected_digest) { cache_entry.file_sha1 } @@ -157,14 +158,23 @@ def event_data_from(action) context 'when the cache entry does not exist' do before do VirtualRegistries::Packages::Maven::Cache::Entry.delete_all + + stub_external_registry_request(etag: etag_returned_by_upstream) end it_behaves_like 'returning a service response success response', action: :workhorse_send_url + + it 'queues background job to create cache entry' do + expect(VirtualRegistries::Packages::Maven::CreateCacheEntryWorker).to receive(:perform_async) + .with(upstream.id, path) + + execute + end end end context 'when accessing the md5 digest' do - let(:path) { "#{super()}.md5" } + let(:request_path) { "#{path}.md5" } let(:upstream_digest_url) { "#{upstream_resource_url}.md5" } let(:expected_digest) { cache_entry.file_md5 } @@ -173,10 +183,19 @@ def event_data_from(action) context 'when the cache entry does not exist' do before do VirtualRegistries::Packages::Maven::Cache::Entry.delete_all + + stub_external_registry_request(etag: etag_returned_by_upstream) end it_behaves_like 'returning a service response success response', action: :workhorse_send_url + it 'queues background job to create cache entry' do + expect(VirtualRegistries::Packages::Maven::CreateCacheEntryWorker).to receive(:perform_async) + .with(upstream.id, path) + + execute + end + context 'in FIPS mode', :fips_mode do it { is_expected.to eq(described_class::ERRORS[:fips_unsupported_md5]) } end @@ -217,7 +236,7 @@ def event_data_from(action) end context 'with no path' do - let(:path) { nil } + let(:request_path) { nil } it { is_expected.to eq(described_class::ERRORS[:path_not_present]) } end -- GitLab From 2d3f63a40ce013673833f903744b9cf8c0be1d7c Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Fri, 3 Oct 2025 00:44:25 +0200 Subject: [PATCH 07/13] Refactor CreateCacheEntryWorker to support object storage --- .../maven/create_cache_entry_worker.rb | 41 ++++++++----------- .../maven/create_cache_entry_worker_spec.rb | 4 +- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb index 2d30f0e6e957a7..d4d27a844bf299 100644 --- a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb +++ b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb @@ -23,15 +23,15 @@ def perform(upstream_id, path) return unless upstream - upload_file, etag = download_file(upstream, path) + file, etag = download_file(upstream, path) ::VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService.new( upstream: upstream, params: { path: path, - file: upload_file, + file: file, etag: etag, - content_type: upload_file.content_type + content_type: file.content_type } ).execute rescue ResponseError => e @@ -42,31 +42,26 @@ def perform(upstream_id, path) def download_file(upstream, path) url = upstream.url_for(path) - options = { - stream_body: true, - headers: upstream.headers(path) - } + response = Gitlab::HTTP.get(url, headers: upstream.headers(path)) - Tempfile.create(['virtual_registries_packages_maven_file'], encoding: 'ascii-8bit') do |tmp_file| - response = Gitlab::HTTP.get(url, options) do |fragment| - tmp_file.write(fragment) - end + raise ResponseError, "Received error status #{response.code}" unless response.success? - raise ResponseError, "Received error status #{response.code}" unless response.success? + content = response.body + args = { + content_type: response.headers["content-type"], + sha1: Digest::SHA1.hexdigest(content) + } + args[:md5] = Digest::MD5.hexdigest(content) unless Gitlab::FIPS.enabled? - tmp_file.rewind - content = tmp_file.read - tmp_file.rewind + # Create a temporary file and use UploadedFile to ensure proper object storage handling + temp_file = Tempfile.new(['virtual_registries_packages_maven_file', File.extname(path)]) + temp_file.binmode + temp_file.write(content) + temp_file.rewind - etag = response.headers['etag'] - file_args = { - sha1: Digest::SHA1.hexdigest(content), - content_type: response.headers['content-type'] - } - file_args[:md5] = Digest::MD5.hexdigest(content) unless Gitlab::FIPS.enabled? + file = UploadedFile.new(temp_file.path, **args) - [UploadedFile.new(tmp_file.path, **file_args), etag] - end + [file, response.headers["etag"]] end end end diff --git a/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb index 0daaa1a1f0317d..4bb7ba4f590f58 100644 --- a/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb +++ b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb @@ -39,7 +39,7 @@ describe '#perform' do context 'when the upstream is found' do let(:upstream_id) { upstream.id } - let(:expected_uploaded_file_attrs) do + let(:expected_file_attrs) do { sha1: Digest::SHA1.hexdigest(mock_response[:body]), md5: Digest::MD5.hexdigest(mock_response[:body]), @@ -53,7 +53,7 @@ upstream: upstream, params: { path: path, - file: be_an(UploadedFile).and(have_attributes(expected_uploaded_file_attrs)), + file: be_an(UploadedFile).and(have_attributes(expected_file_attrs)), etag: etag, content_type: content_type } -- GitLab From 0dba74ff3effbaba3a6fbec14bffe81307404452 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Fri, 3 Oct 2025 20:42:43 +0200 Subject: [PATCH 08/13] Set file_store attribute default value dynamically --- .../packages/maven/cache/entry.rb | 2 ++ .../cache/entry_uploader.rb | 7 ++++++ .../packages/maven/cache/entry_spec.rb | 22 +++++++++++++++++++ .../cache/entry_uploader_spec.rb | 6 +++++ 4 files changed, 37 insertions(+) diff --git a/ee/app/models/virtual_registries/packages/maven/cache/entry.rb b/ee/app/models/virtual_registries/packages/maven/cache/entry.rb index f6b8659965eab2..c291e88cf9949d 100644 --- a/ee/app/models/virtual_registries/packages/maven/cache/entry.rb +++ b/ee/app/models/virtual_registries/packages/maven/cache/entry.rb @@ -49,6 +49,8 @@ class Entry < ApplicationRecord mount_file_store_uploader ::VirtualRegistries::Cache::EntryUploader + attribute :file_store, default: -> { VirtualRegistries::Cache::EntryUploader.default_store } + before_validation :set_object_storage_key, if: -> { object_storage_key.blank? && upstream } attr_readonly :object_storage_key diff --git a/ee/app/uploaders/virtual_registries/cache/entry_uploader.rb b/ee/app/uploaders/virtual_registries/cache/entry_uploader.rb index a2c5ca9d189976..f7bc23d02191ef 100644 --- a/ee/app/uploaders/virtual_registries/cache/entry_uploader.rb +++ b/ee/app/uploaders/virtual_registries/cache/entry_uploader.rb @@ -8,6 +8,13 @@ class EntryUploader < GitlabUploader storage_location :dependency_proxy + class << self + # A default store allows to initialize the Entry model with the right value + def default_store + object_store_enabled? ? ObjectStorage::Store::REMOTE : ObjectStorage::Store::LOCAL + end + end + alias_method :upload, :model before :cache, :set_content_type diff --git a/ee/spec/models/virtual_registries/packages/maven/cache/entry_spec.rb b/ee/spec/models/virtual_registries/packages/maven/cache/entry_spec.rb index c05fa0d72d5867..4ba0fd6b1d6f36 100644 --- a/ee/spec/models/virtual_registries/packages/maven/cache/entry_spec.rb +++ b/ee/spec/models/virtual_registries/packages/maven/cache/entry_spec.rb @@ -228,6 +228,28 @@ end end + describe 'file_store attribute' do + context 'when object storage is enabled' do + it 'defaults to remote store' do + expect(VirtualRegistries::Cache::EntryUploader).to receive(:object_store_enabled?) + .and_return(true) + + entry = described_class.new + expect(entry.file_store).to eq(ObjectStorage::Store::REMOTE) + end + end + + context 'when object storage is disabled' do + it 'defaults to local store' do + expect(VirtualRegistries::Cache::EntryUploader).to receive(:object_store_enabled?) + .and_return(false) + + entry = described_class.new + expect(entry.file_store).to eq(ObjectStorage::Store::LOCAL) + end + end + end + describe '#filename' do let(:cache_entry) { build(:virtual_registries_packages_maven_cache_entry) } diff --git a/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb b/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb index 069520d028f7f1..cf2e5081cc6136 100644 --- a/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb +++ b/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb @@ -20,6 +20,12 @@ it { is_expected.to include_module(::ObjectStorage::Concern) } end + describe '.default_store' do + it 'returns remote store' do + expect(described_class.default_store).to eq(ObjectStorage::Store::REMOTE) + end + end + describe '#store_dir' do subject { uploader.store_dir } -- GitLab From 21e858538e2174116918477125cdd01683628cb2 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Fri, 3 Oct 2025 22:03:53 +0200 Subject: [PATCH 09/13] Add skip_permission_check instead of using current_user --- .../maven/cache/entries/create_or_update_service.rb | 6 +++++- .../packages/maven/create_cache_entry_worker.rb | 3 ++- .../maven/cache/entries/create_or_update_service_spec.rb | 4 ++-- .../packages/maven/create_cache_entry_worker_spec.rb | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb b/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb index 55610078e85125..93b66b898cd960 100644 --- a/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/cache/entries/create_or_update_service.rb @@ -57,7 +57,7 @@ def execute private def allowed? - return true unless current_user + return true if skip_permission_check can?(current_user, :read_virtual_registry, upstream) end @@ -81,6 +81,10 @@ def etag def content_type params[:content_type] end + + def skip_permission_check + params[:skip_permission_check] || false + end end end end diff --git a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb index d4d27a844bf299..39db7ab963a6f3 100644 --- a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb +++ b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb @@ -31,7 +31,8 @@ def perform(upstream_id, path) path: path, file: file, etag: etag, - content_type: file.content_type + content_type: file.content_type, + skip_permission_check: true } ).execute rescue ResponseError => e diff --git a/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb b/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb index ab374fd61e3b72..492df0baa0f528 100644 --- a/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb +++ b/ee/spec/services/virtual_registries/packages/maven/cache/entries/create_or_update_service_spec.rb @@ -145,8 +145,8 @@ it { is_expected.to eq(described_class::ERRORS[:unauthorized]) } end - context 'with no user' do - let(:user) { nil } + context 'with skip_permission_check eq true' do + let(:params) { super().merge({ skip_permission_check: true }) } it_behaves_like 'returning a service response success response' end diff --git a/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb index 4bb7ba4f590f58..7fc8ae37e3187e 100644 --- a/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb +++ b/ee/spec/workers/virtual_registries/packages/maven/create_cache_entry_worker_spec.rb @@ -55,7 +55,8 @@ path: path, file: be_an(UploadedFile).and(have_attributes(expected_file_attrs)), etag: etag, - content_type: content_type + content_type: content_type, + skip_permission_check: true } ).and_call_original -- GitLab From db20641ed4d7ab870c8e8cb7876302598881a7f9 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Fri, 3 Oct 2025 22:35:07 +0200 Subject: [PATCH 10/13] Fix uploader test --- .../cache/entry_uploader_spec.rb | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb b/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb index cf2e5081cc6136..8dadb47df88b40 100644 --- a/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb +++ b/ee/spec/uploaders/virtual_registries/cache/entry_uploader_spec.rb @@ -21,8 +21,24 @@ end describe '.default_store' do - it 'returns remote store' do - expect(described_class.default_store).to eq(ObjectStorage::Store::REMOTE) + context 'when object_store_enabled? is true' do + before do + allow(described_class).to receive(:object_store_enabled?).and_return(true) + end + + it 'returns remote store' do + expect(described_class.default_store).to eq(ObjectStorage::Store::REMOTE) + end + end + + context 'when object_store_enabled? is false' do + before do + allow(described_class).to receive(:object_store_enabled?).and_return(false) + end + + it 'returns local store' do + expect(described_class.default_store).to eq(ObjectStorage::Store::LOCAL) + end end end -- GitLab From 15ba835a776b11df10680d6ed39be6f0033b4994 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Tue, 21 Oct 2025 18:17:59 +0200 Subject: [PATCH 11/13] Adjust methods order --- .../maven/handle_file_request_service.rb | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb index 8e3764c1ead14b..4cc181c503d4c6 100644 --- a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb @@ -67,6 +67,18 @@ def cache_entry end strong_memoize_attr :cache_entry + def build_workhorse_upload_url_response + return check_registry_upstreams_response unless upstream + + workhorse_upload_url_response(upstream: upstream) + end + + def build_workhorse_send_url_response + return check_registry_upstreams_response unless upstream + + workhorse_send_url_response(upstream: upstream) + end + def check_registry_upstreams_response ::VirtualRegistries::CheckUpstreamsService.new( registry: registry, @@ -82,18 +94,6 @@ def upstream end strong_memoize_attr :upstream - def build_workhorse_upload_url_response - return check_registry_upstreams_response unless upstream - - workhorse_upload_url_response(upstream: upstream) - end - - def build_workhorse_send_url_response - return check_registry_upstreams_response unless upstream - - workhorse_send_url_response(upstream: upstream) - end - def head_upstream(upstream:) url = upstream.url_for(request_path) headers = upstream.headers -- GitLab From 14aca0cb72820120a6e3668d03f53a5fc46cb150 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Tue, 21 Oct 2025 18:19:00 +0200 Subject: [PATCH 12/13] Remove unnecessary strong_memoize_attr :upstream --- .../packages/maven/handle_file_request_service.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb index 4cc181c503d4c6..2ebc0af47a89dc 100644 --- a/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb +++ b/ee/app/services/virtual_registries/packages/maven/handle_file_request_service.rb @@ -92,7 +92,6 @@ def upstream check_registry_upstreams_response.payload[:upstream] end - strong_memoize_attr :upstream def head_upstream(upstream:) url = upstream.url_for(request_path) -- GitLab From abb614c216b91da025d7f47fcc38ac0523494e67 Mon Sep 17 00:00:00 2001 From: Sylvia Shen Date: Wed, 22 Oct 2025 18:07:26 +0200 Subject: [PATCH 13/13] Use Tempfile block and stream_body to download the file --- .../maven/create_cache_entry_worker.rb | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb index 39db7ab963a6f3..c8a63ce2eeccb6 100644 --- a/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb +++ b/ee/app/workers/virtual_registries/packages/maven/create_cache_entry_worker.rb @@ -23,46 +23,68 @@ def perform(upstream_id, path) return unless upstream - file, etag = download_file(upstream, path) - - ::VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService.new( - upstream: upstream, - params: { - path: path, - file: file, - etag: etag, - content_type: file.content_type, - skip_permission_check: true - } - ).execute + Tempfile.create('virtual_registries_packages_maven_file') do |temp_file| + temp_file.binmode + + file, etag = download_file(upstream, path, temp_file) + + ::VirtualRegistries::Packages::Maven::Cache::Entries::CreateOrUpdateService.new( + upstream: upstream, + params: { + path: path, + file: file, + etag: etag, + content_type: file.content_type, + skip_permission_check: true + } + ).execute + end rescue ResponseError => e Gitlab::ErrorTracking.log_exception(e, upstream_id: upstream_id, path: path) end private - def download_file(upstream, path) + def download_file(upstream, path, temp_file) url = upstream.url_for(path) - response = Gitlab::HTTP.get(url, headers: upstream.headers(path)) - raise ResponseError, "Received error status #{response.code}" unless response.success? + md5 = Digest::MD5.new unless Gitlab::FIPS.enabled? + sha1 = Digest::SHA1.new + response_header = nil + content_type = nil + etag = nil + + Gitlab::HTTP.get(url, stream_body: true, headers: upstream.headers(path)) do |fragment| + raise ResponseError, "Received error status #{fragment.code}" unless succeeded_response?(fragment) + + if response_header.nil? + response_header = Gitlab::HTTP::Response::Headers.new(fragment.http_response.to_hash) + content_type = response_header['content-type'] + etag = response_header['etag'] + end + + temp_file.write(fragment) + temp_file.flush + + sha1.update(fragment) + md5&.update(fragment) + end - content = response.body args = { - content_type: response.headers["content-type"], - sha1: Digest::SHA1.hexdigest(content) - } - args[:md5] = Digest::MD5.hexdigest(content) unless Gitlab::FIPS.enabled? - - # Create a temporary file and use UploadedFile to ensure proper object storage handling - temp_file = Tempfile.new(['virtual_registries_packages_maven_file', File.extname(path)]) - temp_file.binmode - temp_file.write(content) + content_type: content_type, + sha1: sha1.to_s, + md5: md5.to_s + }.compact + temp_file.rewind file = UploadedFile.new(temp_file.path, **args) - [file, response.headers["etag"]] + [file, etag] + end + + def succeeded_response?(fragment) + fragment.http_response.is_a?(Net::HTTPSuccess) end end end -- GitLab