From 8a0d51db39d2b17c6415d62600d1967564d65816 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Fri, 17 Oct 2025 22:28:55 -0700 Subject: [PATCH] Ensure Content-MD5 header is included on AWS backups Unlike the AWS CLI, the fog-aws gem only includes the `Content-MD5` HTTP header in a PutObject call if multipart uploads are used. However, this prevents Amazon Object Lock from working because Object Lock requires some checksum to be available. https://github.com/fog/fog-aws/issues/750 is an upstream issue to include this header by default, but to unblock backups we add it manually. Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/335775 Changelog: fixed --- lib/backup/remote_storage.rb | 55 ++++++++++++++--- spec/lib/backup/manager_spec.rb | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 8 deletions(-) diff --git a/lib/backup/remote_storage.rb b/lib/backup/remote_storage.rb index cbf3b3322295d8..8724ffc6aaf98d 100644 --- a/lib/backup/remote_storage.rb +++ b/lib/backup/remote_storage.rb @@ -38,6 +38,8 @@ def upload(backup_information:) end end + private + def remote_target if options.remote_directory File.join(options.remote_directory, tar_file) @@ -47,22 +49,57 @@ def remote_target end def create_attributes - attrs = { + { key: remote_target, - body: File.open(File.join(backup_path, tar_file)), - multipart_chunk_size: Gitlab.config.backup.upload.multipart_chunk_size, + body: File.open(backup_filename), + multipart_chunk_size: upload_multipart_chunk_size, storage_class: Gitlab.config.backup.upload.storage_class - }.merge(encryption_attributes) + }.merge(provider_attributes) + end + def provider_attributes + if aws_provider? + default_attributes.merge(aws_attributes) + else + default_attributes + end + end + + def default_attributes # Google bucket-only policies prevent setting an ACL. In any case, by default, # all objects are set to the default ACL, which is project-private: # https://cloud.google.com/storage/docs/json_api/v1/defaultObjectAccessControls - attrs[:public] = false unless google_provider? + return {} if google_provider? + + { public: false } + end + + def aws_attributes + attrs = aws_encryption_attributes || {} + + return attrs if ::Gitlab::FIPS.enabled? + + size = File.stat(backup_filename).size + # Multipart uploads automatically add the MD5 header for each chunk + if size < upload_multipart_chunk_size + File.open(backup_filename, 'rb') do |file| + # This needs to be base64-encoded: https://datatracker.ietf.org/doc/html/rfc1864 + attrs[:content_md5] = Base64.encode64(OpenSSL::Digest::MD5.digest(file.read)).strip # rubocop:disable Fips/MD5 -- This is not used in FIPS + end + end attrs end - def encryption_attributes + def backup_filename + @backup_filename ||= File.join(backup_path, tar_file) + end + + def upload_multipart_chunk_size + Gitlab.config.backup.upload.multipart_chunk_size + end + + def aws_encryption_attributes return object_storage_config.fog_attributes if object_storage_config.aws_server_side_encryption_enabled? # Use customer-managed keys. Also, this preserves backward-compatibility @@ -96,10 +133,12 @@ def encryption_attributes end def google_provider? - Gitlab.config.backup.upload.connection&.provider&.downcase == 'google' + object_storage_config.google? end - private + def aws_provider? + object_storage_config.aws? + end def connect_to_remote_directory connection = ::Fog::Storage.new(object_storage_config.credentials) diff --git a/spec/lib/backup/manager_spec.rb b/spec/lib/backup/manager_spec.rb index 76dadebdb12c81..884a517c545089 100644 --- a/spec/lib/backup/manager_spec.rb +++ b/spec/lib/backup/manager_spec.rb @@ -647,6 +647,110 @@ expect(progress.string).to include('Uploading backup archive to remote storage directory ... done (encrypted with aws:kms)') end end + + context 'with MD5 checksum for small files' do + let(:small_backup_file) { Tempfile.new('small_backup', backup_path) } + let(:small_backup_filename) { File.basename(small_backup_file.path) } + + before do + # Create a small file (1KB) that's smaller than multipart threshold + small_backup_file.write('x' * 1024) + small_backup_file.close + + allow_next_instance_of(described_class) do |manager| + allow(manager).to receive(:tar_file).and_return(small_backup_filename) + allow(manager.remote_storage).to receive(:tar_file).and_return(small_backup_filename) + end + + stub_backup_setting( + upload: { + connection: { + provider: 'AWS', + aws_access_key_id: 'id', + aws_secret_access_key: 'secret' + }, + remote_directory: 'directory', + multipart_chunk_size: 5 * 1024 * 1024, # 5MB threshold + encryption: nil, + encryption_key: nil, + storage_class: nil + } + ) + end + + after do + small_backup_file.unlink + end + + it 'includes MD5 checksum for files smaller than multipart threshold' do + expect_any_instance_of(Fog::Collection).to receive(:create) do |_, attributes| + expect(attributes).to have_key(:content_md5) + expect(attributes[:content_md5]).to be_a(String) + expect(attributes[:content_md5]).to be_present + end + + subject.create # rubocop:disable Rails/SaveBang + end + end + + context 'with FIPS mode enabled' do + before do + allow(::Gitlab::FIPS).to receive(:enabled?).and_return(true) + end + + it 'does not include MD5 checksum when FIPS is enabled' do + expect_any_instance_of(Fog::Collection).to receive(:create) do |_, attributes| + expect(attributes).not_to have_key(:content_md5) + end + + subject.create # rubocop:disable Rails/SaveBang + end + end + + context 'with large files and multipart uploads' do + let(:large_backup_file) { Tempfile.new('large_backup', backup_path) } + let(:large_backup_filename) { File.basename(large_backup_file.path) } + + before do + # Create a large file that exceeds multipart threshold + # We'll mock the file size instead of creating a huge file + allow(File).to receive(:stat).with(large_backup_file.path).and_return( + instance_double(File::Stat, size: 10 * 1024 * 1024) # 10MB + ) + + allow_next_instance_of(described_class) do |manager| + allow(manager).to receive(:tar_file).and_return(large_backup_filename) + allow(manager.remote_storage).to receive(:tar_file).and_return(large_backup_filename) + end + + stub_backup_setting( + upload: { + connection: { + provider: 'AWS', + aws_access_key_id: 'id', + aws_secret_access_key: 'secret' + }, + remote_directory: 'directory', + multipart_chunk_size: 5 * 1024 * 1024, # 5MB threshold + encryption: nil, + encryption_key: nil, + storage_class: nil + } + ) + end + + after do + large_backup_file.unlink + end + + it 'does not include MD5 checksum for files larger than multipart threshold' do + expect_any_instance_of(Fog::Collection).to receive(:create) do |_, attributes| + expect(attributes).not_to have_key(:content_md5) + end + + subject.create # rubocop:disable Rails/SaveBang + end + end end context 'with Google provider' do -- GitLab