diff --git a/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml b/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml new file mode 100644 index 0000000000000000000000000000000000000000..8593b811672cf92f0db6e100dc9ac563c77f5e8a --- /dev/null +++ b/db/docs/batched_background_migrations/migrate_remediations_for_vulnerability_findings.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: MigrateRemediationsForVulnerabilityFindings +description: Migrates remediations from the `vulnerability_occurrences.raw_metadata` column to their own table. +feature_category: vulnerability_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/109397 +milestone: '15.10' +queued_migration_version: 20230330103104 +finalized_by: 20250715081415 diff --git a/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb b/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb new file mode 100644 index 0000000000000000000000000000000000000000..263a837671ca8614e8b3f3c1d97a472ba3b38fa7 --- /dev/null +++ b/db/post_migrate/20250715081215_delete_migrate_remediations_for_vulnerability_findings_jobs_from_main_database.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class DeleteMigrateRemediationsForVulnerabilityFindingsJobsFromMainDatabase < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + milestone '18.3' + + def up + delete_batched_background_migration( + "MigrateRemediationsForVulnerabilityFindings", + :vulnerability_occurrences, + :id, + [] + ) + end + + def down + # no-op + end +end diff --git a/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb new file mode 100644 index 0000000000000000000000000000000000000000..6f1f3c5c47f791fc4f388e7ffe0895b78bbaf773 --- /dev/null +++ b/db/post_migrate/20250715081315_switch_migrate_remediations_for_vulnerability_findings_to_sec_database.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class SwitchMigrateRemediationsForVulnerabilityFindingsToSecDatabase < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + milestone '18.3' + + def up + execute <<~SQL + UPDATE batched_background_migrations + SET gitlab_schema = 'gitlab_sec' + WHERE job_class_name = 'MigrateRemediationsForVulnerabilityFindings' + AND table_name = 'vulnerability_occurrences'; + SQL + end + + def down + execute <<~SQL + UPDATE batched_background_migrations + SET gitlab_schema = 'gitlab_main' + WHERE job_class_name = 'MigrateRemediationsForVulnerabilityFindings' + AND table_name = 'vulnerability_occurrences'; + SQL + end +end diff --git a/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb b/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb new file mode 100644 index 0000000000000000000000000000000000000000..a990ce0f49cf5e11bc17c4e2c4140d8d598f957e --- /dev/null +++ b/db/post_migrate/20250715081415_finalize_migrate_remediations_for_vulnerability_findings.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class FinalizeMigrateRemediationsForVulnerabilityFindings < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + milestone '18.3' + + def up + ensure_batched_background_migration_is_finished( + job_class_name: 'MigrateRemediationsForVulnerabilityFindings', + table_name: :vulnerability_occurrences, + column_name: :id, + job_arguments: [], + finalize: true + ) + end + + def down + # no-op + end +end diff --git a/db/schema_migrations/20250715081215 b/db/schema_migrations/20250715081215 new file mode 100644 index 0000000000000000000000000000000000000000..0c24e9aae7060308799be363f1752dbd210b54d6 --- /dev/null +++ b/db/schema_migrations/20250715081215 @@ -0,0 +1 @@ +18620878b0c5f7cb61cfa9ca92b75b0ab3226bd526a1c403df83134045bec12d \ No newline at end of file diff --git a/db/schema_migrations/20250715081315 b/db/schema_migrations/20250715081315 new file mode 100644 index 0000000000000000000000000000000000000000..7695b70975c7ebc529af3a5332753acd023f5e5c --- /dev/null +++ b/db/schema_migrations/20250715081315 @@ -0,0 +1 @@ +4de28ca137b6dbf548dc974982eea9d44ec293faa9dd7f6991c426f3c6ec77ae \ No newline at end of file diff --git a/db/schema_migrations/20250715081415 b/db/schema_migrations/20250715081415 new file mode 100644 index 0000000000000000000000000000000000000000..7484cd7a0e9f7050adeb0b5fd74c5ee8a2258421 --- /dev/null +++ b/db/schema_migrations/20250715081415 @@ -0,0 +1 @@ +f54d39a3be715f1de20d3f140d98680a2cf5253bf517e0d398055b29152d8735 \ No newline at end of file diff --git a/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb new file mode 100644 index 0000000000000000000000000000000000000000..88bb59fc29b2c5595c00eac26e928f2f80911a3e --- /dev/null +++ b/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings.rb @@ -0,0 +1,170 @@ +# frozen_string_literal: true + +module Vulnerabilities + # The class is mimicking Vulnerabilites::Remediation + class Remediation < SecApplicationRecord + include FileStoreMounter + include ShaAttribute + + self.table_name = 'vulnerability_remediations' + + sha_attribute :checksum + + mount_file_store_uploader AttachmentUploader + + def retrieve_upload(_identifier, paths) + Upload.find_by(model: self, path: paths) + end + + def uploads_sharding_key + { project_id: project_id } + end + end +end + +module Gitlab + module BackgroundMigration + # The class to migrate the remediation data into their own records from the json attribute + class MigrateRemediationsForVulnerabilityFindings < BatchedMigrationJob + feature_category :vulnerability_management + operation_name :migrate_remediations_for_vulnerability_findings + + # The class to encapsulate checksum and file for uploading + class DiffFile < StringIO + def self.checksum(value) + Digest::SHA256.hexdigest(value) + end + + def self.original_filename(checksum) + "#{checksum}.diff" + end + + # This method is used by the `carrierwave` gem + def original_filename + @original_filename ||= self.class.original_filename(checksum) + end + + def checksum + @checksum ||= self.class.checksum(string) + end + end + + # The class is mimicking Vulnerabilites::Finding + class Finding < SecApplicationRecord + self.table_name = 'vulnerability_occurrences' + + # rubocop:disable Database/JsonbSizeLimit -- this mimics the class as it was + validates :details, json_schema: { filename: 'vulnerability_finding_details', draft: 7 }, if: false + # rubocop:enable Database/JsonbSizeLimit + end + + # The class is mimicking Vulnerabilites::FindingRemediation + class FindingRemediation < SecApplicationRecord + self.table_name = 'vulnerability_findings_remediations' + end + + def perform + each_sub_batch do |sub_batch| + migrate_remediations(sub_batch) + end + end + + private + + def migrate_remediations(sub_batch) + sub_batch.each do |finding| + FindingRemediation.transaction do + remediations = append_remediations_diff_checksum(finding.raw_metadata) + + result_ids = create_remediations(finding, remediations) + + create_finding_remediations(finding.id, result_ids) + end + rescue StandardError => e # rubocop:todo BackgroundMigration/AvoidSilentRescueExceptions -- https://gitlab.com/gitlab-org/gitlab/-/issues/431592 + logger.error( + message: e.message, + class: self.class.name, + model_id: finding.id + ) + end + end + + def create_finding_remediations(finding_id, result_ids) + attrs = result_ids.map do |result_id| + build_finding_remediation_attrs(finding_id, result_id) + end + + return unless attrs.present? + + FindingRemediation.upsert_all( + attrs, + returning: false, + unique_by: [:vulnerability_occurrence_id, :vulnerability_remediation_id] + ) + end + + def create_remediations(finding, remediations) + attrs = remediations.map do |remediation| + build_remediation_attrs(finding, remediation) + end + + return [] unless attrs.present? + + ids_checksums = ::Vulnerabilities::Remediation.upsert_all( + attrs, + returning: %w[id checksum], + unique_by: [:project_id, :checksum] + ) + + ids_checksums.each do |id_checksum| + upload_file(id_checksum['id'], id_checksum['checksum'], remediations) + end + + ids_checksums.pluck('id') + end + + def upload_file(id, checksum, remediations) + deserialized_checksum = Gitlab::Database::ShaAttribute.new.deserialize(checksum) + diff = remediations.find { |rem| rem['checksum'] == deserialized_checksum }["diff"] + file = DiffFile.new(diff) + ::Vulnerabilities::Remediation.find_by(id: id).update!(file: file) + end + + def build_remediation_attrs(finding, remediation) + { + project_id: finding.project_id, + summary: remediation['summary'], + file: DiffFile.original_filename(remediation['checksum']), + checksum: remediation['checksum'], + created_at: Time.current, + updated_at: Time.current + } + end + + def build_finding_remediation_attrs(finding_id, remediation_id) + { + vulnerability_occurrence_id: finding_id, + vulnerability_remediation_id: remediation_id, + created_at: Time.current, + updated_at: Time.current + } + end + + def append_remediations_diff_checksum(metadata) + parsed_metadata = Gitlab::Json.parse(metadata) + + return [] unless parsed_metadata['remediations'] + + parsed_metadata['remediations'].filter_map do |remediation| + next unless remediation && remediation['diff'].present? + + remediation.merge('checksum' => DiffFile.checksum(remediation['diff'])) + end.compact.uniq + end + + def logger + @logger ||= ::Gitlab::AppLogger + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb b/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..761b627405a66cfd7cb927316024ca3b11af3be0 --- /dev/null +++ b/spec/lib/gitlab/background_migration/migrate_remediations_for_vulnerability_findings_spec.rb @@ -0,0 +1,196 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::MigrateRemediationsForVulnerabilityFindings, feature_category: :vulnerability_management do + let(:vulnerability_occurrences) { table(:vulnerability_occurrences, database: :sec) } + let(:vulnerability_findings_remediations) { table(:vulnerability_findings_remediations, database: :sec) } + let(:vulnerability_remediations) { table(:vulnerability_remediations, database: :sec) } + let(:vulnerability_scanners) { table(:vulnerability_scanners, database: :sec) } + let(:remediation_hash) { { summary: 'summary', diff: "ZGlmZiAtLWdp" } } + let(:organization1) { table(:organizations).create!(name: 'test', path: 'test') } + let(:namespace1) do + table(:namespaces).create!(name: 'namespace 1', path: 'namespace1', + organization_id: organization1.id) + end + + let(:project1) do + table(:projects).create!(namespace_id: namespace1.id, project_namespace_id: namespace1.id, + organization_id: organization1.id) + end + + let(:user) do + table(:users).create!(email: 'test1@example.com', projects_limit: 5, organization_id: organization1.id) + end + + let(:scanner1) do + vulnerability_scanners.create!(project_id: project1.id, external_id: 'test 1', name: 'test scanner 1') + end + + let(:stating_id) { vulnerability_occurrences.pluck(:id).min } + let(:end_id) { vulnerability_occurrences.pluck(:id).max } + + let(:migration) do + described_class.new( + start_id: stating_id, + end_id: end_id, + batch_table: :vulnerability_occurrences, + batch_column: :id, + sub_batch_size: 2, + pause_ms: 2, + connection: SecApplicationRecord.connection + ) + end + + subject(:perform_migration) { migration.perform } + + context 'without the presence of remediation key' do + before do + create_finding!(project1.id, scanner1.id, { other_keys: 'test' }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation equals to an array of nil element' do + before do + create_finding!(project1.id, scanner1.id, { remediations: [nil] }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation with empty string as the diff key' do + let!(:finding) do + create_finding!(project1.id, scanner1.id, { remediations: [{ summary: 'summary', diff: '' }] }) + end + + it 'does not create any remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + end + end + + context 'with remediation equals to an array of duplicated elements' do + let!(:finding) do + create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash, remediation_hash] }) + end + + it 'creates new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding.id).length).to eq(1) + end + end + + context 'with existing remediations within raw_metadata' do + let!(:finding1) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + let!(:finding2) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + + it 'creates new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(1) + end + + context 'when create throws exception other than ActiveRecord::RecordNotUnique' do + before do + allow(migration).to receive(:create_finding_remediations).and_raise(StandardError) + end + + it 'rolls back all related transactions' do + expect(Gitlab::AppLogger).to receive(:error).with({ + class: described_class.name, message: StandardError.to_s, model_id: finding1.id + }) + expect(Gitlab::AppLogger).to receive(:error).with({ + class: described_class.name, message: StandardError.to_s, model_id: finding2.id + }) + expect { perform_migration }.not_to change { vulnerability_remediations.count } + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(0) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(0) + end + end + end + + context 'with existing remediation records' do + let!(:finding) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + + before do + vulnerability_remediations.create!(project_id: project1.id, summary: remediation_hash[:summary], + checksum: checksum(remediation_hash[:diff]), file: Tempfile.new.path) + end + + it 'does not create new remediation' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.not_to change { vulnerability_remediations.count } + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding.id).length).to eq(1) + end + end + + context 'with same raw_metadata for different projects' do + let(:namespace2) do + table(:namespaces).create!(name: 'namespace 2', path: 'namespace2', organization_id: organization1.id) + end + + let(:project2) do + table(:projects).create!(namespace_id: namespace2.id, project_namespace_id: namespace2.id, + organization_id: organization1.id) + end + + let(:scanner2) do + table(:vulnerability_scanners, database: :sec).create!(project_id: project2.id, external_id: 'test 2', + name: 'test scanner 2') + end + + let!(:finding1) { create_finding!(project1.id, scanner1.id, { remediations: [remediation_hash] }) } + let!(:finding2) { create_finding!(project2.id, scanner2.id, { remediations: [remediation_hash] }) } + + it 'creates new remediation for each project' do + expect(Gitlab::AppLogger).not_to receive(:error) + + expect { perform_migration }.to change { vulnerability_remediations.count }.by(2) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding1.id).length).to eq(1) + expect(vulnerability_findings_remediations.where(vulnerability_occurrence_id: finding2.id).length).to eq(1) + end + end + + private + + def create_finding!(project_id, scanner_id, raw_metadata) + identifier = table(:vulnerability_identifiers, database: :sec).create!(project_id: project_id, + external_type: 'uuid-v5', external_id: 'uuid-v5', + fingerprint: OpenSSL::Digest.hexdigest('SHA256', SecureRandom.uuid), name: 'Identifier for UUIDv5 2 2') + + finding = table(:vulnerability_occurrences, database: :sec).create!( + project_id: project_id, scanner_id: scanner_id, + primary_identifier_id: identifier.id, name: 'test', severity: 4, report_type: 0, + uuid: SecureRandom.uuid, location: { "image" => "alpine:3.4" }, + location_fingerprint: 'test', metadata_version: 'test', + raw_metadata: raw_metadata.to_json) + + vulnerability = table(:vulnerabilities, database: :sec).create!(project_id: project_id, author_id: user.id, + title: 'test', severity: 4, report_type: 0, finding_id: finding.id) + + finding.update!(vulnerability_id: vulnerability.id) + + finding + end + + def checksum(value) + sha = Digest::SHA256.hexdigest(value) + Gitlab::Database::ShaAttribute.new.serialize(sha) + end +end