diff --git a/app/services/loose_foreign_keys/batch_cleaner_service.rb b/app/services/loose_foreign_keys/batch_cleaner_service.rb index 833b78430c2d4a014504bfc4af7eee9de34b9971..2af187c5acebe535fc3e5a094153f355bf47c286 100644 --- a/app/services/loose_foreign_keys/batch_cleaner_service.rb +++ b/app/services/loose_foreign_keys/batch_cleaner_service.rb @@ -34,6 +34,27 @@ def initialize( end def execute + if feature_category_additions_enabled? + new_cleaner_service + else + original_cleaner_service + end + + return if modification_tracker.over_limit? + + # At this point, all associations are cleaned up, we can update the status of the parent records + update_count = Gitlab::Database::SharedModel.using_connection(connection) do + LooseForeignKeys::DeletedRecord.mark_records_processed(deleted_parent_records) + end + + deleted_records_counter.increment({ table: parent_table, db_config_name: db_config_name }, update_count) + end + + private + + attr_reader :parent_table, :loose_foreign_key_definitions, :deleted_parent_records, :modification_tracker, :deleted_records_counter, :deleted_records_rescheduled_count, :deleted_records_incremented_count, :connection, :logger + + def original_cleaner_service loose_foreign_key_definitions.each do |loose_foreign_key_definition| run_cleaner_service(loose_foreign_key_definition, with_skip_locked: true) @@ -49,20 +70,47 @@ def execute break end end + end - return if modification_tracker.over_limit? + def new_cleaner_service + loose_foreign_key_definitions.each do |loose_foreign_key_definition| + break if process_definition_with_feature_category(loose_foreign_key_definition) + end + end - # At this point, all associations are cleaned up, we can update the status of the parent records - update_count = Gitlab::Database::SharedModel.using_connection(connection) do - LooseForeignKeys::DeletedRecord.mark_records_processed(deleted_parent_records) + def process_definition_with_feature_category(loose_foreign_key_definition) + feature_category = find_child_table_feature_category(loose_foreign_key_definition.from_table) + + Gitlab::ApplicationContext.with_context(feature_category: feature_category) do + process_cleaner_services(loose_foreign_key_definition) end + end - deleted_records_counter.increment({ table: parent_table, db_config_name: db_config_name }, update_count) + def process_cleaner_services(loose_foreign_key_definition) + run_cleaner_service(loose_foreign_key_definition, with_skip_locked: true) + + if modification_tracker.over_limit? + handle_over_limit + return true + end + + run_cleaner_service(loose_foreign_key_definition, with_skip_locked: false) + + if modification_tracker.over_limit? + handle_over_limit + return true + end + + false end - private + def find_child_table_feature_category(table) + Gitlab::Database::Dictionary.entries.find_by_table_name(table)&.feature_categories&.first || :database + end - attr_reader :parent_table, :loose_foreign_key_definitions, :deleted_parent_records, :modification_tracker, :deleted_records_counter, :deleted_records_rescheduled_count, :deleted_records_incremented_count, :connection, :logger + def feature_category_additions_enabled? + Feature.enabled?(:loose_foreign_key_worker_improvements, Feature.current_pod) + end def handle_over_limit records_to_reschedule = [] diff --git a/config/feature_flags/gitlab_com_derisk/loose_foreign_key_worker_improvements.yml b/config/feature_flags/gitlab_com_derisk/loose_foreign_key_worker_improvements.yml new file mode 100644 index 0000000000000000000000000000000000000000..20df92ff33c29b30d0ce9eb96666e16834f4c46b --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/loose_foreign_key_worker_improvements.yml @@ -0,0 +1,10 @@ +--- +name: loose_foreign_key_worker_improvements +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/573367 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/209695 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/578006 +milestone: '18.6' +group: group::database frameworks +type: gitlab_com_derisk +default_enabled: false diff --git a/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb b/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb index fa3afb65f8aa8cca68a37b06844a60ca147a3b29..0708cd8a4fc6db3ecef3382c1fa2054a2e1b5591 100644 --- a/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb +++ b/spec/services/loose_foreign_keys/batch_cleaner_service_spec.rb @@ -137,222 +137,248 @@ def create_table_structure migration.drop_table :_test_loose_fk_child_table_4 end - context 'when parent records are deleted' do - let(:deleted_records_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_processed_deleted_records) } + shared_examples 'cleans up loose foreign key records' do + context 'when parent records are deleted' do + let(:deleted_records_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_processed_deleted_records) } - before do - parent_record_1.delete + before do + parent_record_1.delete - expect(loose_fk_child_table_1.count).to eq(4) - expect(loose_fk_child_table_2.count).to eq(4) - expect(loose_fk_child_table_4.count).to eq(4) + expect(loose_fk_child_table_1.count).to eq(4) + expect(loose_fk_child_table_2.count).to eq(4) + expect(loose_fk_child_table_4.count).to eq(4) - described_class.new( - parent_table: '_test_loose_fk_parent_table', - loose_foreign_key_definitions: loose_foreign_key_definitions, - deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), - connection: ::ApplicationRecord.connection - ).execute - end + described_class.new( + parent_table: '_test_loose_fk_parent_table', + loose_foreign_key_definitions: loose_foreign_key_definitions, + deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), + connection: ::ApplicationRecord.connection + ).execute + end - it 'cleans up the child records' do - expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty - expect(loose_fk_child_table_2.where(parent_id_with_different_column: nil).count).to eq(2) - expect(loose_fk_child_table_4.where(parent_id: parent_record_1.id, association_type: 'association_type_x')).to be_empty - end + it 'cleans up the child records' do + expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty + expect(loose_fk_child_table_2.where(parent_id_with_different_column: nil).count).to eq(2) + expect(loose_fk_child_table_4.where(parent_id: parent_record_1.id, association_type: 'association_type_x')).to be_empty + end - it 'updates the child records' do - expect(loose_fk_child_table_3.where(parent_id: parent_record_1.id, status: 4).count).to eq(2) - end + it 'updates the child records' do + expect(loose_fk_child_table_3.where(parent_id: parent_record_1.id, status: 4).count).to eq(2) + end - it 'cleans up the pending parent DeletedRecord' do - expect(LooseForeignKeys::DeletedRecord.status_pending.count).to eq(0) - expect(LooseForeignKeys::DeletedRecord.status_processed.count).to eq(1) - end + it 'cleans up the pending parent DeletedRecord' do + expect(LooseForeignKeys::DeletedRecord.status_pending.count).to eq(0) + expect(LooseForeignKeys::DeletedRecord.status_processed.count).to eq(1) + end - it 'records the DeletedRecord status updates', :prometheus do - counter = Gitlab::Metrics.client.get(:loose_foreign_key_processed_deleted_records) + it 'records the DeletedRecord status updates', :prometheus do + counter = Gitlab::Metrics.client.get(:loose_foreign_key_processed_deleted_records) - expect(counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) - end + expect(counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) + end - it 'does not delete unrelated records' do - expect(loose_fk_child_table_1.where(parent_id: other_parent_record.id).count).to eq(2) - expect(loose_fk_child_table_2.where(parent_id_with_different_column: other_parent_record.id).count).to eq(2) - expect(loose_fk_child_table_4.where(parent_id: parent_record_1.id, association_type: 'association_type_y').count).to eq(2) - end + it 'does not delete unrelated records' do + expect(loose_fk_child_table_1.where(parent_id: other_parent_record.id).count).to eq(2) + expect(loose_fk_child_table_2.where(parent_id_with_different_column: other_parent_record.id).count).to eq(2) + expect(loose_fk_child_table_4.where(parent_id: parent_record_1.id, association_type: 'association_type_y').count).to eq(2) + end - it 'does not update unrelated records' do - expect(loose_fk_child_table_3.where(parent_id: other_parent_record.id, status: 1).count).to eq(2) + it 'does not update unrelated records' do + expect(loose_fk_child_table_3.where(parent_id: other_parent_record.id, status: 1).count).to eq(2) + end end - end - context 'when the child table is partitioned' do - let(:parent_child_table) { table(:_test_p_loose_fk_parent_table) } - let(:partitioned_child_table1) { table("gitlab_partitions_dynamic._test_p_loose_fk_parent_table_100") } - let(:partitioned_child_table2) { table("gitlab_partitions_dynamic._test_p_loose_fk_parent_table_101") } - - let(:loose_foreign_key_definitions) do - [ - ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( - '_test_p_loose_fk_parent_table', - '_test_loose_fk_parent_table', - { - column: 'parent_id', - on_delete: :async_delete, - gitlab_schema: :gitlab_main - } - ) - ] - end + context 'when the child table is partitioned' do + let(:parent_child_table) { table(:_test_p_loose_fk_parent_table) } + let(:partitioned_child_table1) { table("gitlab_partitions_dynamic._test_p_loose_fk_parent_table_100") } + let(:partitioned_child_table2) { table("gitlab_partitions_dynamic._test_p_loose_fk_parent_table_101") } - before do - ApplicationRecord.connection.execute(<<~SQL) - CREATE TABLE _test_p_loose_fk_parent_table ( - parent_id bigint NOT NULL, - created_at timestamptz NOT NULL, - PRIMARY KEY (created_at) - ) PARTITION BY RANGE(created_at); - - CREATE TABLE gitlab_partitions_dynamic._test_p_loose_fk_parent_table_100 PARTITION OF _test_p_loose_fk_parent_table - FOR VALUES FROM ('2020-01-01') TO ('2020-02-01'); - - CREATE TABLE gitlab_partitions_dynamic._test_p_loose_fk_parent_table_101 PARTITION OF _test_p_loose_fk_parent_table - FOR VALUES FROM ('2020-02-01') TO ('2020-03-01'); - SQL - - partitioned_child_table1.create!(parent_id: parent_record_1.id, created_at: '2020-01-02 02:00') - partitioned_child_table2.create!(parent_id: parent_record_1.id, created_at: '2020-02-02 02:00') - partitioned_child_table2.create!(parent_id: other_parent_record.id, created_at: '2020-02-02 03:00') - end + let(:loose_foreign_key_definitions) do + [ + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + '_test_p_loose_fk_parent_table', + '_test_loose_fk_parent_table', + { + column: 'parent_id', + on_delete: :async_delete, + gitlab_schema: :gitlab_main + } + ) + ] + end - context 'when parent records are deleted' do - it 'cleans up the child partitioned records' do - expect(parent_child_table.count).to eq(3) - expect(partitioned_child_table1.count).to eq(1) - expect(partitioned_child_table2.count).to eq(2) + before do + ApplicationRecord.connection.execute(<<~SQL) + CREATE TABLE _test_p_loose_fk_parent_table ( + parent_id bigint NOT NULL, + created_at timestamptz NOT NULL, + PRIMARY KEY (created_at) + ) PARTITION BY RANGE(created_at); + + CREATE TABLE gitlab_partitions_dynamic._test_p_loose_fk_parent_table_100 PARTITION OF _test_p_loose_fk_parent_table + FOR VALUES FROM ('2020-01-01') TO ('2020-02-01'); + + CREATE TABLE gitlab_partitions_dynamic._test_p_loose_fk_parent_table_101 PARTITION OF _test_p_loose_fk_parent_table + FOR VALUES FROM ('2020-02-01') TO ('2020-03-01'); + SQL + + partitioned_child_table1.create!(parent_id: parent_record_1.id, created_at: '2020-01-02 02:00') + partitioned_child_table2.create!(parent_id: parent_record_1.id, created_at: '2020-02-02 02:00') + partitioned_child_table2.create!(parent_id: other_parent_record.id, created_at: '2020-02-02 03:00') + end - parent_record_1.delete + context 'when parent records are deleted' do + it 'cleans up the child partitioned records' do + expect(parent_child_table.count).to eq(3) + expect(partitioned_child_table1.count).to eq(1) + expect(partitioned_child_table2.count).to eq(2) - expect_next_instance_of(LooseForeignKeys::PartitionCleanerService) do |service| - expect(service).to receive(:execute).at_least(:once).and_call_original - end.at_least(:once) + parent_record_1.delete - described_class.new( - parent_table: '_test_loose_fk_parent_table', - loose_foreign_key_definitions: loose_foreign_key_definitions, - deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), - connection: ::ApplicationRecord.connection - ).execute + expect_next_instance_of(LooseForeignKeys::PartitionCleanerService) do |service| + expect(service).to receive(:execute).at_least(:once).and_call_original + end.at_least(:once) + + described_class.new( + parent_table: '_test_loose_fk_parent_table', + loose_foreign_key_definitions: loose_foreign_key_definitions, + deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), + connection: ::ApplicationRecord.connection + ).execute - expect(parent_child_table.count).to eq(1) - expect(partitioned_child_table1.count).to eq(0) - expect(partitioned_child_table2.count).to eq(1) + expect(parent_child_table.count).to eq(1) + expect(partitioned_child_table1.count).to eq(0) + expect(partitioned_child_table2.count).to eq(1) + end end end - end - describe 'fair queueing' do - context 'when the execution is over the limit' do - let(:modification_tracker) { instance_double(LooseForeignKeys::ModificationTracker) } - let(:over_limit_return_values) { [true] } - let(:deleted_record) { LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 1).first } - let(:deleted_records_rescheduled_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_rescheduled_deleted_records) } - let(:deleted_records_incremented_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_incremented_deleted_records) } + describe 'fair queueing' do + context 'when the execution is over the limit' do + let(:modification_tracker) { instance_double(LooseForeignKeys::ModificationTracker) } + let(:over_limit_return_values) { [true] } + let(:deleted_record) { LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 1).first } + let(:deleted_records_rescheduled_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_rescheduled_deleted_records) } + let(:deleted_records_incremented_counter) { Gitlab::Metrics.client.get(:loose_foreign_key_incremented_deleted_records) } + + let(:cleaner) do + described_class.new( + parent_table: '_test_loose_fk_parent_table', + loose_foreign_key_definitions: loose_foreign_key_definitions, + deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), + connection: ::ApplicationRecord.connection, + modification_tracker: modification_tracker + ) + end - let(:cleaner) do - described_class.new( - parent_table: '_test_loose_fk_parent_table', - loose_foreign_key_definitions: loose_foreign_key_definitions, - deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), - connection: ::ApplicationRecord.connection, - modification_tracker: modification_tracker - ) - end + before do + parent_record_1.delete + allow(modification_tracker).to receive(:over_limit?).and_return(*over_limit_return_values) + allow(modification_tracker).to receive(:add_deletions) + end - before do - parent_record_1.delete - allow(modification_tracker).to receive(:over_limit?).and_return(*over_limit_return_values) - allow(modification_tracker).to receive(:add_deletions) - end + context 'when the deleted record is under the maximum allowed cleanup attempts' do + it 'updates the cleanup_attempts column', :aggregate_failures do + deleted_record.update!(cleanup_attempts: 1) + + cleaner.execute - context 'when the deleted record is under the maximum allowed cleanup attempts' do - it 'updates the cleanup_attempts column', :aggregate_failures do - deleted_record.update!(cleanup_attempts: 1) + expect(deleted_record.reload.cleanup_attempts).to eq(2) + expect(deleted_records_incremented_counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) + end - cleaner.execute + context 'when the deleted record is above the maximum allowed cleanup attempts' do + it 'reschedules the record', :aggregate_failures do + deleted_record.update!(cleanup_attempts: LooseForeignKeys::BatchCleanerService::CLEANUP_ATTEMPTS_BEFORE_RESCHEDULE + 1) - expect(deleted_record.reload.cleanup_attempts).to eq(2) - expect(deleted_records_incremented_counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) - end + freeze_time do + cleaner.execute + + expect(deleted_record.reload).to have_attributes( + cleanup_attempts: 0, + consume_after: 5.minutes.from_now + ) + expect(deleted_records_rescheduled_counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) + end + end + end + + describe 'when over limit happens on the second cleanup call without skip locked' do + # over_limit? is called twice, we test here the 2nd call + # - When invoking cleanup with SKIP LOCKED + # - When invoking cleanup (no SKIP LOCKED) + let(:over_limit_return_values) { [false, true] } - context 'when the deleted record is above the maximum allowed cleanup attempts' do - it 'reschedules the record', :aggregate_failures do - deleted_record.update!(cleanup_attempts: LooseForeignKeys::BatchCleanerService::CLEANUP_ATTEMPTS_BEFORE_RESCHEDULE + 1) + it 'updates the cleanup_attempts column' do + expect(cleaner).to receive(:run_cleaner_service).twice + + deleted_record.update!(cleanup_attempts: 1) - freeze_time do cleaner.execute - expect(deleted_record.reload).to have_attributes( - cleanup_attempts: 0, - consume_after: 5.minutes.from_now - ) - expect(deleted_records_rescheduled_counter.get(table: loose_fk_parent_table.table_name, db_config_name: 'main')).to eq(1) + expect(deleted_record.reload.cleanup_attempts).to eq(2) end end end + end + end - describe 'when over limit happens on the second cleanup call without skip locked' do - # over_limit? is called twice, we test here the 2nd call - # - When invoking cleanup with SKIP LOCKED - # - When invoking cleanup (no SKIP LOCKED) - let(:over_limit_return_values) { [false, true] } + describe 'when the definition is invalid' do + let(:loose_foreign_key_definitions) do + [ + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + '_test_loose_fk_child_table_1', + '_test_loose_fk_parent_table', + { + column: 'parent_id', + on_delete: :async_delete, + gitlab_schema: :gitlab_main + } + ), + ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( + '_test_loose_fk_child_table_2', + '_test_loose_fk_parent_table', + { + column: 'parent_id_with_different_column', + on_delete: :not_valid, + gitlab_schema: :gitlab_main + } + ) + ] + end - it 'updates the cleanup_attempts column' do - expect(cleaner).to receive(:run_cleaner_service).twice + before do + parent_record_1.delete + end - deleted_record.update!(cleanup_attempts: 1) + it 'logs error and skips the definition' do + expect(Sidekiq.logger).to receive(:error).with("Invalid on_delete argument: not_valid").twice + expect(Sidekiq.logger).to receive(:error).with("Invalid on_delete argument for definition: _test_loose_fk_child_table_2").twice - cleaner.execute + described_class.new( + parent_table: '_test_loose_fk_parent_table', + loose_foreign_key_definitions: loose_foreign_key_definitions, + deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), + connection: ::ApplicationRecord.connection + ).execute - expect(deleted_record.reload.cleanup_attempts).to eq(2) - end - end + expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty + expect(loose_fk_child_table_2.where(parent_id_with_different_column: parent_record_1.id).count).to eq(2) end end end - describe 'when the definition is invalid' do - let(:loose_foreign_key_definitions) do - [ - ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( - '_test_loose_fk_child_table_1', - '_test_loose_fk_parent_table', - { - column: 'parent_id', - on_delete: :async_delete, - gitlab_schema: :gitlab_main - } - ), - ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( - '_test_loose_fk_child_table_2', - '_test_loose_fk_parent_table', - { - column: 'parent_id_with_different_column', - on_delete: :not_valid, - gitlab_schema: :gitlab_main - } - ) - ] + context 'with feature flag enabled (default)' do + before do + stub_feature_flags(loose_foreign_key_worker_improvements: true) end - before do + it_behaves_like 'cleans up loose foreign key records' + + it 'uses new cleaner service with feature category context' do parent_record_1.delete - end - it 'logs error and skips the definition' do - expect(Sidekiq.logger).to receive(:error).with("Invalid on_delete argument: not_valid").twice - expect(Sidekiq.logger).to receive(:error).with("Invalid on_delete argument for definition: _test_loose_fk_child_table_2").twice + expect(Gitlab::ApplicationContext).to receive(:with_context).exactly(4).times.and_call_original described_class.new( parent_table: '_test_loose_fk_parent_table', @@ -360,9 +386,27 @@ def create_table_structure deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), connection: ::ApplicationRecord.connection ).execute + end + end - expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty - expect(loose_fk_child_table_2.where(parent_id_with_different_column: parent_record_1.id).count).to eq(2) + context 'with feature flag disabled' do + before do + stub_feature_flags(loose_foreign_key_worker_improvements: false) + end + + it_behaves_like 'cleans up loose foreign key records' + + it 'uses original cleaner service without feature category context' do + parent_record_1.delete + + expect(Gitlab::ApplicationContext).not_to receive(:with_context) + + described_class.new( + parent_table: '_test_loose_fk_parent_table', + loose_foreign_key_definitions: loose_foreign_key_definitions, + deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100), + connection: ::ApplicationRecord.connection + ).execute end end end