From 059b08a7b54cb6f2c405709ce0d397752b8bbe1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zaj=C4=85c?= Date: Sat, 12 Jul 2025 03:04:35 +0200 Subject: [PATCH 1/3] Restore and finalize MigrateRemediationsForVulnerabilityFindings This migration was scheduled on the main database, before the sec decomposition happened so we need to finalize it on the main database. Changelog: added --- ...emediations_for_vulnerability_findings.yml | 8 + ...bility_findings_jobs_from_main_database.rb | 22 ++ ..._vulnerability_findings_to_sec_database.rb | 26 +++ ...remediations_for_vulnerability_findings.rb | 23 ++ db/schema_migrations/20250715081215 | 1 + db/schema_migrations/20250715081315 | 1 + db/schema_migrations/20250715081415 | 1 + ...remediations_for_vulnerability_findings.rb | 166 +++++++++++++++ ...iations_for_vulnerability_findings_spec.rb | 196 ++++++++++++++++++ 9 files changed, 444 insertions(+) create mode 100644 db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml create mode 100644 db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb create mode 100644 db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb create mode 100644 db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb create mode 100644 db/schema_migrations/20250715081215 create mode 100644 db/schema_migrations/20250715081315 create mode 100644 db/schema_migrations/20250715081415 create mode 100644 lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb create mode 100644 spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb diff --git a/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml b/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml new file mode 100644 index 00000000000000..8593b811672cf9 --- /dev/null +++ b/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: MigrateRemediationsForVulnerabilityFindings +description: Migrates remediations from the `vulnerability_occurrences.raw_metadata` column to their own table. +feature_category: vulnerability_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/109397 +milestone: '15.10' +queued_migration_version: 20230330103104 +finalized_by: 20250715081415 diff --git a/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb b/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb new file mode 100644 index 00000000000000..263a837671ca86 --- /dev/null +++ b/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class DeleteMigrateRemediationsForVulnerabilityFindingsJobsFromMainDatabase < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + milestone '18.3' + + def up + delete_batched_background_migration( + "MigrateRemediationsForVulnerabilityFindings", + :vulnerability_occurrences, + :id, + [] + ) + end + + def down + # no-op + end +end diff --git a/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb new file mode 100644 index 00000000000000..b45784db1afd8c --- /dev/null +++ b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class SwitchMigrateRemediationsForVulnerabilityFindingsToSecDatabase < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + milestone '18.3' + + def up + migration(:gitlab_main)&.update(gitlab_schema: "gitlab_sec") + end + + def down + migration(:gitlab_sec)&.update(gitlab_schema: "gitlab_main") + end + + def migration(schema) + Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration( + schema, + "MigrateRemediationsForVulnerabilityFindings", + :vulnerability_occurrences, + :id, + [] + ) + end +end diff --git a/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb b/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb new file mode 100644 index 00000000000000..a990ce0f49cf5e --- /dev/null +++ b/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class FinalizeMigrateRemediationsForVulnerabilityFindings < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + milestone '18.3' + + def up + ensure_batched_background_migration_is_finished( + job_class_name: 'MigrateRemediationsForVulnerabilityFindings', + table_name: :vulnerability_occurrences, + column_name: :id, + job_arguments: [], + finalize: true + ) + end + + def down + # no-op + end +end diff --git a/db/schema_migrations/20250715081215 b/db/schema_migrations/20250715081215 new file mode 100644 index 00000000000000..0c24e9aae70603 --- /dev/null +++ b/db/schema_migrations/20250715081215 @@ -0,0 +1 @@ +18620878b0c5f7cb61cfa9ca92b75b0ab3226bd526a1c403df83134045bec12d \ No newline at end of file diff --git a/db/schema_migrations/20250715081315 b/db/schema_migrations/20250715081315 new file mode 100644 index 00000000000000..7695b70975c7eb --- /dev/null +++ b/db/schema_migrations/20250715081315 @@ -0,0 +1 @@ +4de28ca137b6dbf548dc974982eea9d44ec293faa9dd7f6991c426f3c6ec77ae \ No newline at end of file diff --git a/db/schema_migrations/20250715081415 b/db/schema_migrations/20250715081415 new file mode 100644 index 00000000000000..7484cd7a0e9f70 --- /dev/null +++ b/db/schema_migrations/20250715081415 @@ -0,0 +1 @@ +f54d39a3be715f1de20d3f140d98680a2cf5253bf517e0d398055b29152d8735 \ No newline at end of file diff --git a/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb new file mode 100644 index 00000000000000..bde5ff546a426f --- /dev/null +++ b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb @@ -0,0 +1,166 @@ +# frozen_string_literal: true + +module Vulnerabilities + # The class is mimicking Vulnerabilites::Remediation + class Remediation < SecApplicationRecord + include FileStoreMounter + include ShaAttribute + + self.table_name = 'vulnerability_remediations' + + sha_attribute :checksum + + mount_file_store_uploader AttachmentUploader + + def retrieve_upload(_identifier, paths) + Upload.find_by(model: self, path: paths) + end + end +end + +module Gitlab + module BackgroundMigration + # The class to migrate the remediation data into their own records from the json attribute + class MigrateRemediationsForVulnerabilityFindings < BatchedMigrationJob + feature_category :vulnerability_management + operation_name :migrate_remediations_for_vulnerability_findings + + # The class to encapsulate checksum and file for uploading + class DiffFile < StringIO + def self.checksum(value) + Digest::SHA256.hexdigest(value) + end + + def self.original_filename(checksum) + "#{checksum}.diff" + end + + # This method is used by the `carrierwave` gem + def original_filename + @original_filename ||= self.class.original_filename(checksum) + end + + def checksum + @checksum ||= self.class.checksum(string) + end + end + + # The class is mimicking Vulnerabilites::Finding + class Finding < SecApplicationRecord + self.table_name = 'vulnerability_occurrences' + + # rubocop:disable Database/JsonbSizeLimit -- this mimics the class as it was + validates :details, json_schema: { filename: 'vulnerability_finding_details', draft: 7 }, if: false + # rubocop:enable Database/JsonbSizeLimit + end + + # The class is mimicking Vulnerabilites::FindingRemediation + class FindingRemediation < SecApplicationRecord + self.table_name = 'vulnerability_findings_remediations' + end + + def perform + each_sub_batch do |sub_batch| + migrate_remediations(sub_batch) + end + end + + private + + def migrate_remediations(sub_batch) + sub_batch.each do |finding| + FindingRemediation.transaction do + remediations = append_remediations_diff_checksum(finding.raw_metadata) + + result_ids = create_remediations(finding, remediations) + + create_finding_remediations(finding.id, result_ids) + end + rescue StandardError => e # rubocop:todo BackgroundMigration/AvoidSilentRescueExceptions -- https://gitlab.com/gitlab-org/gitlab/-/issues/431592 + logger.error( + message: e.message, + class: self.class.name, + model_id: finding.id + ) + end + end + + def create_finding_remediations(finding_id, result_ids) + attrs = result_ids.map do |result_id| + build_finding_remediation_attrs(finding_id, result_id) + end + + return unless attrs.present? + + FindingRemediation.upsert_all( + attrs, + returning: false, + unique_by: [:vulnerability_occurrence_id, :vulnerability_remediation_id] + ) + end + + def create_remediations(finding, remediations) + attrs = remediations.map do |remediation| + build_remediation_attrs(finding, remediation) + end + + return [] unless attrs.present? + + ids_checksums = ::Vulnerabilities::Remediation.upsert_all( + attrs, + returning: %w[id checksum], + unique_by: [:project_id, :checksum] + ) + + ids_checksums.each do |id_checksum| + upload_file(id_checksum['id'], id_checksum['checksum'], remediations) + end + + ids_checksums.pluck('id') + end + + def upload_file(id, checksum, remediations) + deserialized_checksum = Gitlab::Database::ShaAttribute.new.deserialize(checksum) + diff = remediations.find { |rem| rem['checksum'] == deserialized_checksum }["diff"] + file = DiffFile.new(diff) + ::Vulnerabilities::Remediation.find_by(id: id).update!(file: file) + end + + def build_remediation_attrs(finding, remediation) + { + project_id: finding.project_id, + summary: remediation['summary'], + file: DiffFile.original_filename(remediation['checksum']), + checksum: remediation['checksum'], + created_at: Time.current, + updated_at: Time.current + } + end + + def build_finding_remediation_attrs(finding_id, remediation_id) + { + vulnerability_occurrence_id: finding_id, + vulnerability_remediation_id: remediation_id, + created_at: Time.current, + updated_at: Time.current + } + end + + def append_remediations_diff_checksum(metadata) + parsed_metadata = Gitlab::Json.parse(metadata) + + return [] unless parsed_metadata['remediations'] + + parsed_metadata['remediations'].filter_map do |remediation| + next unless remediation && remediation['diff'].present? + + remediation.merge('checksum' => DiffFile.checksum(remediation['diff'])) + end.compact.uniq + end + + def logger + @logger ||= ::Gitlab::AppLogger + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb b/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb new file mode 100644 index 00000000000000..761b627405a66c --- /dev/null +++ b/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb @@ -0,0 +1,196 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::MigrateRemediationsForVulnerabilityFindings, feature_category: :vulnerability_management do + let(:vulnerability_occurrences) { table(:vulnerability_occurrences, database: :sec) } + let(:vulnerability_findings_remediations) { table(:vulnerability_findings_remediations, database: :sec) } + let(:vulnerability_remediations) { table(:vulnerability_remediations, database: :sec) } + let(:vulnerability_scanners) { table(:vulnerability_scanners, database: :sec) } + let(:remediation_hash) { { summary: 'summary', diff: "ZGlmZiAtLWdp" } } + let(:organization1) { table(:organizations).create!(name: 'test', path: 'test') } + let(:namespace1) do + table(:namespaces).create!(name: 'namespace 1', path: 'namespace1', + organization_id: organization1.id) + end + + let(:project1) do + table(:projects).create!(namespace_id: namespace1.id, project_namespace_id: namespace1.id, + organization_id: organization1.id) + end + + let(:user) do + table(:users).create!(email: 'test1@example.com', projects_limit: 5, organization_id: organization1.id) + end + + let(:scanner1) do + vulnerability_scanners.create!(project_id: project1.id, external_id: 'test 1', name: 'test scanner 1') + end + + let(:stating_id) { vulnerability_occurrences.pluck(:id).min } + let(:end_id) { vulnerability_occurrences.pluck(:id).max } + + let(:migration) do + described_class.new( + start_id: stating_id, + end_id: end_id, + batch_table: :vulnerability_occurrences, + batch_column: :id, + sub_batch_size: 2, + pause_ms: 2, + connection: SecApplicationRecord.connection + ) + end + + subject(:perform_migration) { migration.perform } + + context 'without the presence of remediation key' do + before do + create_finding!(project1.id, scanner1.id, { other_keys: 'test' }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation equals to an array of nil element' do + before do + create_finding!(project1.id, scanner1.id, { remediations: [nil] }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation with empty string as the diff key' do + let!(:finding) do + create_finding!(project1.id, scanner1.id, { remediations: [{ summary: 'summary', diff: '' }] }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation equals to an array of duplicated elements' do + let!(:finding) do + create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash, remediation_hash] }) + end + + it 'creates new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding.id).length).to eq(1) + end + end + + context 'with existing remediations within raw_metadata' do + let!(:finding1) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + let!(:finding2) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + + it 'creates new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(1) + end + + context 'when create throws exception other than ActiveRecord::RecordNotUnique' do + before do + allow(migration).to receive(:create_finding_remediations).and_raise(StandardError) + end + + it 'rolls back all related transactions' do + expect(Gitlab::AppLogger).to receive(:error).with({ + class: described_class.name, message: StandardError.to_s, model_id: finding1.id + }) + expect(Gitlab::AppLogger).to receive(:error).with({ + class: described_class.name, message: StandardError.to_s, model_id: finding2.id + }) + expect { perform_migration }.not_to change { vulnerability_remediations.count } + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(0) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(0) + end + end + end + + context 'with existing remediation records' do + let!(:finding) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + + before do + vulnerability_remediations.create!(project_id: project1.id, summary: remediation_hash[:summary], + checksum: checksum(remediation_hash[:diff]), file: Tempfile.new.path) + end + + it 'does not create new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding.id).length).to eq(1) + end + end + + context 'with same raw_metadata for different projects' do + let(:namespace2) do + table(:namespaces).create!(name: 'namespace 2', path: 'namespace2', organization_id: organization1.id) + end + + let(:project2) do + table(:projects).create!(namespace_id: namespace2.id, project_namespace_id: namespace2.id, + organization_id: organization1.id) + end + + let(:scanner2) do + table(:vulnerability_scanners, database: :sec).create!(project_id: project2.id, external_id: 'test 2', + name: 'test scanner 2') + end + + let!(:finding1) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + let!(:finding2) { create_finding!(project2.id, scanner2.id, { remediations: [remediation_hash] }) } + + it 'creates new remediation for each project' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(2) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(1) + end + end + + private + + def create_finding!(project_id, scanner_id, raw_metadata) + identifier = table(:vulnerability_identifiers, database: :sec).create!(project_id: project_id, + external_type: 'uuid-v5', external_id: 'uuid-v5', + fingerprint: OpenSSL::Digest.hexdigest('SHA256', SecureRandom.uuid), name: 'Identifier for UUIDv5 2 2') + + finding = table(:vulnerability_occurrences, database: :sec).create!( + project_id: project_id, scanner_id: scanner_id, + primary_identifier_id: identifier.id, name: 'test', severity: 4, report_type: 0, + uuid: SecureRandom.uuid, location: { "image" => "alpine:3.4" }, + location_fingerprint: 'test', metadata_version: 'test', + raw_metadata: raw_metadata.to_json) + + vulnerability = table(:vulnerabilities, database: :sec).create!(project_id: project_id, author_id: user.id, + title: 'test', severity: 4, report_type: 0, finding_id: finding.id) + + finding.update!(vulnerability_id: vulnerability.id) + + finding + end + + def checksum(value) + sha = Digest::SHA256.hexdigest(value) + Gitlab::Database::ShaAttribute.new.serialize(sha) + end +end -- GitLab From 0969a85852861a82b8247b51d9a8032eb9e0b74d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zaj=C4=85c?= Date: Fri, 25 Jul 2025 16:49:45 +0200 Subject: [PATCH 2/3] Add uploads_sharding_key method --- .../migrate_remediations_for_vulnerability_findings.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb index bde5ff546a426f..88bb59fc29b2c5 100644 --- a/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb +++ b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb @@ -15,6 +15,10 @@ class Remediation < SecApplicationRecord def retrieve_upload(_identifier, paths) Upload.find_by(model: self, path: paths) end + + def uploads_sharding_key + { project_id: project_id } + end end end -- GitLab From 59fcf187f39a03968f0767fb6a0ef06c8c35dcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zaj=C4=85c?= Date: Mon, 28 Jul 2025 14:29:42 +0200 Subject: [PATCH 3/3] Do not use application code --- ..._vulnerability_findings_to_sec_database.rb | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb index b45784db1afd8c..6f1f3c5c47f791 100644 --- a/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb +++ b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb @@ -7,20 +7,20 @@ class SwitchMigrateRemediationsForVulnerabilityFindingsToSecDatabase < Gitlab::D milestone '18.3' def up - migration(:gitlab_main)&.update(gitlab_schema: "gitlab_sec") + execute <<~SQL + UPDATE batched_background_migrations + SET gitlab_schema = 'gitlab_sec' + WHERE job_class_name = 'MigrateRemediationsForVulnerabilityFindings' + AND table_name = 'vulnerability_occurrences'; + SQL end def down - migration(:gitlab_sec)&.update(gitlab_schema: "gitlab_main") - end - - def migration(schema) - Gitlab::Database::BackgroundMigration::BatchedMigration.find_for_configuration( - schema, - "MigrateRemediationsForVulnerabilityFindings", - :vulnerability_occurrences, - :id, - [] - ) + execute <<~SQL + UPDATE batched_background_migrations + SET gitlab_schema = 'gitlab_main' + WHERE job_class_name = 'MigrateRemediationsForVulnerabilityFindings' + AND table_name = 'vulnerability_occurrences'; + SQL end end -- GitLab