From e63b7b1f567ba6c597c85084e5817902fd92f47a Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 17 Jul 2025 17:02:07 -0400 Subject: [PATCH] Backfill project_id on merge_request_diff_files where nil This is part of the groundwork for sharding and eventual partitioning. Changelog: added --- ...project_id_on_merge_request_diff_files.yml | 8 +++++ ..._project_id_on_merge_request_diff_files.rb | 27 +++++++++++++++ db/schema_migrations/20250717193053 | 1 + ..._project_id_on_merge_request_diff_files.rb | 33 +++++++++++++++++++ ...ect_id_on_merge_request_diff_files_spec.rb | 13 ++++++++ ...ect_id_on_merge_request_diff_files_spec.rb | 27 +++++++++++++++ 6 files changed, 109 insertions(+) create mode 100644 db/docs/batched_background_migrations/backfill_project_id_on_merge_request_diff_files.yml create mode 100644 db/post_migrate/20250717193053_queue_backfill_project_id_on_merge_request_diff_files.rb create mode 100644 db/schema_migrations/20250717193053 create mode 100644 lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files_spec.rb create mode 100644 spec/migrations/20250717193053_queue_backfill_project_id_on_merge_request_diff_files_spec.rb diff --git a/db/docs/batched_background_migrations/backfill_project_id_on_merge_request_diff_files.yml b/db/docs/batched_background_migrations/backfill_project_id_on_merge_request_diff_files.yml new file mode 100644 index 00000000000000..6429acd4c4e6da --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_project_id_on_merge_request_diff_files.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: BackfillProjectIdOnMergeRequestDiffFiles +description: Backfills nil project_ids on merge_request_diff_files +feature_category: source_code_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/123123123123 +milestone: '18.3' +queued_migration_version: 20250717193053 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20250717193053_queue_backfill_project_id_on_merge_request_diff_files.rb b/db/post_migrate/20250717193053_queue_backfill_project_id_on_merge_request_diff_files.rb new file mode 100644 index 00000000000000..12640ab7645f4e --- /dev/null +++ b/db/post_migrate/20250717193053_queue_backfill_project_id_on_merge_request_diff_files.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class QueueBackfillProjectIdOnMergeRequestDiffFiles < Gitlab::Database::Migration[2.3] + milestone '18.3' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + MIGRATION = "BackfillProjectIdOnMergeRequestDiffFiles" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + + def up + queue_batched_background_migration( + MIGRATION, + :merge_request_diff_files, + :project_id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :merge_request_diff_files, :project_id, []) + end +end diff --git a/db/schema_migrations/20250717193053 b/db/schema_migrations/20250717193053 new file mode 100644 index 00000000000000..0b593a4378a447 --- /dev/null +++ b/db/schema_migrations/20250717193053 @@ -0,0 +1 @@ +2caddcd00637f8b18382524ddd31016f2c0e7c58224e232b7536e99610575213 \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files.rb b/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files.rb new file mode 100644 index 00000000000000..57388d749a1c2d --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/database/batched_background_migrations.html +# for more information on how to use batched background migrations + +# Update below commented lines with appropriate values. + +module Gitlab + module BackgroundMigration + class BackfillProjectIdOnMergeRequestDiffFiles < BatchedMigrationJob + feature_category :source_code_management + + operation_name :backfill_project_id_on_merge_request_diff_files + scope_to ->(relation) { relation.where(project_id: nil) } # rubocop: disable Database/AvoidScopeTo -- `project_id` is an indexed column + + def construct_query(sub_batch:) + <<~SQL + UPDATE merge_request_diff_files + SET project_id = merge_request_diffs.project_id + FROM merge_request_diffs + WHERE merge_request_diffs.id = merge_request_diff_files.merge_request_diff_id + AND merge_request_diff_files.id IN (#{sub_batch.select(:id).to_sql}) + SQL + end + + def perform + each_sub_batch do |sub_batch| + connection.execute(construct_query(sub_batch: sub_batch)) + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files_spec.rb new file mode 100644 index 00000000000000..11cfb4ef27222e --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_project_id_on_merge_request_diff_files_spec.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillProjectIdOnMergeRequestDiffFiles, feature_category: :source_code_management do + # This is a fake test, obviously, to satisfy rubocop so I can make a checkpoint + # commit. + # + it "true" do + foo = 1 + expect(foo).to be(true) + end +end diff --git a/spec/migrations/20250717193053_queue_backfill_project_id_on_merge_request_diff_files_spec.rb b/spec/migrations/20250717193053_queue_backfill_project_id_on_merge_request_diff_files_spec.rb new file mode 100644 index 00000000000000..ea045822267882 --- /dev/null +++ b/spec/migrations/20250717193053_queue_backfill_project_id_on_merge_request_diff_files_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillProjectIdOnMergeRequestDiffFiles, migration: :gitlab_main, feature_category: :source_code_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_main, + table_name: :merge_request_diff_files, + column_name: :project_id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end -- GitLab