diff --git a/db/docs/batched_background_migrations/delete_push_rules_duplicates.yml b/db/docs/batched_background_migrations/delete_push_rules_duplicates.yml new file mode 100644 index 0000000000000000000000000000000000000000..c68fda3c72e7bf852319c943e2032611b202d13d --- /dev/null +++ b/db/docs/batched_background_migrations/delete_push_rules_duplicates.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: DeletePushRulesDuplicates +description: Deletes duplicated records with the same `project_id` from `push_rules` table +feature_category: source_code_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/199307 +milestone: '18.3' +queued_migration_version: 20250725143908 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20250725143908_queue_delete_push_rules_duplicates.rb b/db/post_migrate/20250725143908_queue_delete_push_rules_duplicates.rb new file mode 100644 index 0000000000000000000000000000000000000000..5e994b138d8f881f378cb953d439ec080a6cd8ee --- /dev/null +++ b/db/post_migrate/20250725143908_queue_delete_push_rules_duplicates.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class QueueDeletePushRulesDuplicates < Gitlab::Database::Migration[2.3] + milestone '18.3' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + MIGRATION = "DeletePushRulesDuplicates" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + + def up + queue_batched_background_migration( + MIGRATION, + :push_rules, + :id, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :push_rules, :id, []) + end +end diff --git a/db/schema_migrations/20250725143908 b/db/schema_migrations/20250725143908 new file mode 100644 index 0000000000000000000000000000000000000000..57d92fa050368b6faa790a2b3017d34d7b467518 --- /dev/null +++ b/db/schema_migrations/20250725143908 @@ -0,0 +1 @@ +caa148c2db9bc73a2b3cadc0b050b5134799a0a39178a1dfde887734ae39e276 \ No newline at end of file diff --git a/lib/gitlab/background_migration/delete_push_rules_duplicates.rb b/lib/gitlab/background_migration/delete_push_rules_duplicates.rb new file mode 100644 index 0000000000000000000000000000000000000000..5cb893e307abd31f4ec0a888910f77ffb4a097d6 --- /dev/null +++ b/lib/gitlab/background_migration/delete_push_rules_duplicates.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class DeletePushRulesDuplicates < BatchedMigrationJob + class Project < ::ApplicationRecord + self.table_name = 'projects' + + has_one :push_rule, class_name: '::Gitlab::BackgroundMigration::DeletePushRulesDuplicates::PushRule' + end + + class PushRule < ::ApplicationRecord + self.table_name = 'push_rules' + belongs_to :project, class_name: '::Gitlab::BackgroundMigration::DeletePushRulesDuplicates::Project' + end + + operation_name :delete_push_rule_duplicates + feature_category :source_code_management + + def perform + each_sub_batch do |sub_batch| + project_ids = sub_batch.pluck(:project_id).uniq.compact + + duplicated_project_ids = PushRule.where(project_id: project_ids) + .group(:project_id) + .having('count(*) > 1') + .pluck(:project_id) + + next if duplicated_project_ids.blank? + + projects_with_push_rules = Project.where(id: duplicated_project_ids) + .includes(:push_rule) + .index_by(&:id) + + ids_to_delete = [] + + duplicated_project_ids.each do |project_id| + project = projects_with_push_rules[project_id] + canonical_push_rule = project&.push_rule + + next unless canonical_push_rule + + # Get all push rule IDs for this project except the canonical one + duplicate_ids = PushRule.where(project_id: project_id) + .where.not(id: canonical_push_rule.id) + .pluck(:id) + + ids_to_delete.concat(duplicate_ids) + end + + # Bulk delete all duplicates + PushRule.where(id: ids_to_delete).delete_all if ids_to_delete.any? + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/delete_push_rules_duplicates_spec.rb b/spec/lib/gitlab/background_migration/delete_push_rules_duplicates_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..5e13cfac07453ac1c7b99f02a1ab68df433b891f --- /dev/null +++ b/spec/lib/gitlab/background_migration/delete_push_rules_duplicates_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::DeletePushRulesDuplicates, feature_category: :source_code_management do + let(:migration) do + described_class.new( + start_id: 1, + end_id: 5000, + batch_table: :push_rules, + batch_column: :id, + sub_batch_size: 500, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ) + end + + let(:organizations) { table(:organizations) } + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:push_rules) { table(:push_rules) } + + let!(:organization) { organizations.create!(name: 'organization', path: 'organization') } + let!(:namespace) do + namespaces.create!(name: 'test-namespace', path: 'test-namespace', organization_id: organization.id) + end + + let!(:project1) { create_project_for_push_rule('test-project-1') } + let!(:project2) { create_project_for_push_rule('test-project-2') } + let!(:project3) { create_project_for_push_rule('test-project-3') } + + describe '#perform' do + let!(:record_1) { push_rules.create!(id: 1, project_id: project1.id) } + let!(:record_2_duplicate) { push_rules.create!(id: 2, project_id: project1.id) } + let!(:record_3) { push_rules.create!(id: 3, project_id: project2.id) } + let!(:record_4) { push_rules.create!(id: 4, project_id: project3.id) } + let!(:record_5_duplicate) { push_rules.create!(id: 5, project_id: project3.id) } + let!(:record_6_duplicate) { push_rules.create!(id: 6, project_id: project3.id) } + let!(:record_7_no_project) { push_rules.create!(id: 7, project_id: nil) } + + it 'deletes duplicate records' do + expect { migration.perform }.to change { push_rules.count }.from(7).to(4) + expect(push_rules.find_by(id: record_1.id)).to be_present + expect(push_rules.find_by(id: record_3.id)).to be_present + expect(push_rules.find_by(id: record_4.id)).to be_present + expect(push_rules.find_by(id: record_7_no_project.id)).to be_present + + expect(push_rules.find_by(id: record_2_duplicate.id)).to be_nil + expect(push_rules.find_by(id: record_5_duplicate.id)).to be_nil + expect(push_rules.find_by(id: record_6_duplicate.id)).to be_nil + end + + it 'also removes records outside of the batch range' do + push_rules.create!(id: 6001, project_id: project1.id) + + expect { migration.perform }.to change { push_rules.count }.from(8).to(4) + + expect(push_rules.find_by(id: 6001)).to be_nil + end + + it 'handles empty batches gracefully' do + push_rules.delete_all + + expect { migration.perform }.not_to raise_error + expect(push_rules.count).to eq(0) + end + + it 'handles batches without duplicates' do + push_rules.delete_all + push_rules.create!(id: 1, project_id: project1.id) + push_rules.create!(id: 2, project_id: project2.id) + + expect { migration.perform }.not_to change { push_rules.count } + expect(push_rules.count).to eq(2) + end + end + + private + + def create_project_for_push_rule(name) + project_namespace = namespaces.create!( + name: name, + path: name, + type: 'Project', + organization_id: organization.id + ) + projects.create!( + name: name, + path: name, + namespace_id: namespace.id, + organization_id: organization.id, + project_namespace_id: project_namespace.id + ) + end +end diff --git a/spec/migrations/20250725143908_queue_delete_push_rules_duplicates_spec.rb b/spec/migrations/20250725143908_queue_delete_push_rules_duplicates_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..44f8352e2903fcbf7907ffc9626f5f58f1dad60c --- /dev/null +++ b/spec/migrations/20250725143908_queue_delete_push_rules_duplicates_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueDeletePushRulesDuplicates, migration: :gitlab_main, feature_category: :source_code_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_main, + table_name: :push_rules, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end