From 381e79aed91490c53a797506a942bd60a629b5df Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 13 Oct 2025 18:12:33 +0200 Subject: [PATCH 01/17] Create background migration to backfill job definitions Changelog: other --- .../move_ci_builds_metadata.yml | 8 +++ ...013133259_queue_move_ci_builds_metadata.rb | 49 +++++++++++++++ db/schema_migrations/20251013133259 | 1 + .../move_ci_builds_metadata.rb | 61 +++++++++++++++++++ .../move_ci_builds_metadata_spec.rb | 37 +++++++++++ ...3259_queue_move_ci_builds_metadata_spec.rb | 27 ++++++++ 6 files changed, 183 insertions(+) create mode 100644 db/docs/batched_background_migrations/move_ci_builds_metadata.yml create mode 100644 db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb create mode 100644 db/schema_migrations/20251013133259 create mode 100644 lib/gitlab/background_migration/move_ci_builds_metadata.rb create mode 100644 spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb create mode 100644 spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb diff --git a/db/docs/batched_background_migrations/move_ci_builds_metadata.yml b/db/docs/batched_background_migrations/move_ci_builds_metadata.yml new file mode 100644 index 00000000000000..0140a11d13fa2e --- /dev/null +++ b/db/docs/batched_background_migrations/move_ci_builds_metadata.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: MoveCiBuildsMetadata +description: Copies the data from p_ci_builds_metadata into the deduplicated tables +feature_category: continuous_integration +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/1 +milestone: '18.6' +queued_migration_version: 20251013133259 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb new file mode 100644 index 00000000000000..2a63eef1f75d49 --- /dev/null +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] + milestone '18.6' + + restrict_gitlab_migration gitlab_schema: :gitlab_ci + + MIGRATION = "MoveCiBuildsMetadata" + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + PARTITION_ID = 107 + + def up + queue_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + :partition_id, + PARTITION_ID, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE, + batch_min_value: batch_min_value, + batch_max_value: batch_max_value + ) + end + + def down + delete_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + [:partition_id, PARTITION_ID] + ) + end + + private + + def batch_min_value + connection.select_value(<<~SQL) + SELECT COALESCE(MIN(id), 1) FROM p_ci_builds_metadata WHERE partition_id = #{PARTITION_ID}; + SQL + end + + def batch_max_value + connection.select_value(<<~SQL) + SELECT MAX(id) FROM p_ci_builds_metadata WHERE partition_id = #{PARTITION_ID}; + SQL + end +end diff --git a/db/schema_migrations/20251013133259 b/db/schema_migrations/20251013133259 new file mode 100644 index 00000000000000..a3ff3ed2ec7408 --- /dev/null +++ b/db/schema_migrations/20251013133259 @@ -0,0 +1 @@ +22629f248e9f5c789ce81c53e5f2dec3e977d23ebeefb6227dbd56b0ddc1924a \ No newline at end of file diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb new file mode 100644 index 00000000000000..f3f25fd9a731ab --- /dev/null +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +# rubocop:disable Database/AvoidScopeTo -- uses partition pruning, doesn't need a specialized index +module Gitlab + module BackgroundMigration + class MoveCiBuildsMetadata < BatchedMigrationJob + feature_category :continuous_integration + operation_name :create_job_definition_from_builds_metadata + + scope_to ->(relation) { relation.where([@job_arguments].to_h) } + + def self.job_arguments_count + 2 + end + + def perform + each_sub_batch do |sub_batch| + # Assumes that all the metadata records that are associated with a job that + # already has a job definition instance record don't need to be migrated + sub_batch.where.not('EXISTS (?)', scoped_definition_instances.select(1)).each do |metadata| + job_definition = find_or_create_job_definition_from(metadata) + create_job_definition_instance(metadata, job_definition) + end + end + end + + # TODO + def find_or_create_job_definition_from(metadata) + instance = definition_model.where(partition_id: metadata.partition_id).new + instance.save! + instance + end + + def create_job_definition_instance(metadata, job_definition) + definition_instance_model.create!( + job_id: metadata.build_id, + partition_id: metadata.partition_id, + job_definition_id: job_definition.id, + project_id: metadata.project_id + ) + end + + def scoped_definition_instances + definition_instance_model + .where('p_ci_job_definition_instances.partition_id = p_ci_builds_metadata.partition_id') + .where('p_ci_job_definition_instances.job_id = p_ci_builds_metadata.build_id') + end + + def definition_model + @definition_model ||= define_batchable_model( + :p_ci_job_definitions, connection: connection, primary_key: :id) + end + + def definition_instance_model + @definition_instance_model ||= define_batchable_model( + :p_ci_job_definition_instances, connection: connection, primary_key: :id) + end + end + end +end +# rubocop:enable Database/AvoidScopeTo diff --git a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb new file mode 100644 index 00000000000000..ede6a3b5463390 --- /dev/null +++ b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::MoveCiBuildsMetadata, feature_category: :continuous_integration, migration: :gitlab_ci do + let(:pipelines_table) { table(:p_ci_pipelines, primary_key: :id) } + let(:builds_table) { table(:p_ci_builds, primary_key: :id) } + let(:builds_metadata_table) { table(:p_ci_builds_metadata, primary_key: :id) } + + let(:pipeline) { pipelines_table.create!(partition_id: 100, project_id: 1) } + let(:job) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + + let(:migration_attrs) do + { + start_id: builds_metadata_table.minimum(:id), + end_id: builds_metadata_table.maximum(:id), + batch_table: :p_ci_builds_metadata, + batch_column: :id, + sub_batch_size: 2, + pause_ms: 0, + connection: ApplicationRecord.connection, + job_arguments: ['partition_id', 100] + } + end + + let(:migration) { described_class.new(**migration_attrs) } + + before do + builds_metadata_table.create!(partition_id: 100, build_id: job.id, project_id: 1) + end + + describe '#perform' do + it 'does not raise errors' do + expect { migration.perform }.not_to raise_error + end + end +end diff --git a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb new file mode 100644 index 00000000000000..bf6abf0b5e1a71 --- /dev/null +++ b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueMoveCiBuildsMetadata, migration: :gitlab_ci, feature_category: :continuous_integration do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds_metadata, + column_name: :id, + job_arguments: [:partition_id, described_class::PARTITION_ID], + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end -- GitLab From 672ae6c62e24e2b58f9363f578bafb050ca79d06 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 13 Oct 2025 18:19:17 +0200 Subject: [PATCH 02/17] Update dictionary file --- .../batched_background_migrations/move_ci_builds_metadata.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/docs/batched_background_migrations/move_ci_builds_metadata.yml b/db/docs/batched_background_migrations/move_ci_builds_metadata.yml index 0140a11d13fa2e..56cf8b3b010071 100644 --- a/db/docs/batched_background_migrations/move_ci_builds_metadata.yml +++ b/db/docs/batched_background_migrations/move_ci_builds_metadata.yml @@ -2,7 +2,7 @@ migration_job_name: MoveCiBuildsMetadata description: Copies the data from p_ci_builds_metadata into the deduplicated tables feature_category: continuous_integration -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/1 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/208674 milestone: '18.6' queued_migration_version: 20251013133259 finalized_by: # version of the migration that finalized this BBM -- GitLab From c1447efd5dbdaedd9b3958a332521eb7f5188842 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Wed, 15 Oct 2025 10:54:25 +0200 Subject: [PATCH 03/17] Create job definitions and update builds --- .../move_ci_builds_metadata.rb | 135 ++++++++++++++++-- 1 file changed, 123 insertions(+), 12 deletions(-) diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index f3f25fd9a731ab..f7042d0635f8a6 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true # rubocop:disable Database/AvoidScopeTo -- uses partition pruning, doesn't need a specialized index +# rubocop:disable Metrics/ClassLength -- TODO refactor module Gitlab module BackgroundMigration class MoveCiBuildsMetadata < BatchedMigrationJob @@ -17,27 +18,55 @@ def perform each_sub_batch do |sub_batch| # Assumes that all the metadata records that are associated with a job that # already has a job definition instance record don't need to be migrated - sub_batch.where.not('EXISTS (?)', scoped_definition_instances.select(1)).each do |metadata| - job_definition = find_or_create_job_definition_from(metadata) - create_job_definition_instance(metadata, job_definition) - end + available_metadata = sub_batch.where.not('EXISTS (?)', scoped_definition_instances.select(1)).to_a + setup_definitions(available_metadata) + update_jobs(available_metadata) end end - # TODO - def find_or_create_job_definition_from(metadata) - instance = definition_model.where(partition_id: metadata.partition_id).new - instance.save! - instance + def setup_definitions(available_metadata) + tag_names_by_job_id = load_tags_for(available_metadata) + run_steps_by_job_id = load_run_steps_for(available_metadata) + + definition_instances_attrs = available_metadata.map do |metadata| + tag_list = tag_names_by_job_id.fetch(metadata.build_id) { [] } + run_steps = run_steps_by_job_id.fetch(metadata.build_id) { [] } + + definition = find_or_create_job_definition_from(metadata, tag_list, run_steps) + job_definition_instance_attrs(metadata, definition) + end + + definition_instance_model.insert_all(definition_instances_attrs, unique_by: [:job_id, :partition_id]) + end + + def update_jobs(available_metadata) + scoped_metadata_sql = <<~SQL + "p_ci_builds"."id" = "p_ci_builds_metadata"."build_id" + AND "p_ci_builds"."partition_id" = "p_ci_builds_metadata"."partition_id" + SQL + + update_sql = <<~SQL + scoped_user_id = COALESCE(p_ci_builds.scoped_user_id, (p_ci_builds_metadata.config_options->>'scoped_user_id')::bigint), + timeout = COALESCE(p_ci_builds.timeout, p_ci_builds_metadata.timeout), + timeout_source = COALESCE(p_ci_builds.timeout_source, p_ci_builds_metadata.timeout_source::smallint), + exit_code = COALESCE(p_ci_builds.exit_code, p_ci_builds_metadata.exit_code), + debug_trace_enabled = COALESCE(p_ci_builds.debug_trace_enabled, p_ci_builds_metadata.debug_trace_enabled) + FROM p_ci_builds_metadata + SQL + + job_model + .where(scoped_metadata_sql) + .where([:id, :partition_id] => available_metadata.pluck(:build_id, :partition_id)) + .update_all(update_sql) end - def create_job_definition_instance(metadata, job_definition) - definition_instance_model.create!( + def job_definition_instance_attrs(metadata, job_definition) + { job_id: metadata.build_id, partition_id: metadata.partition_id, job_definition_id: job_definition.id, project_id: metadata.project_id - ) + } end def scoped_definition_instances @@ -55,7 +84,89 @@ def definition_instance_model @definition_instance_model ||= define_batchable_model( :p_ci_job_definition_instances, connection: connection, primary_key: :id) end + + def job_taggings_model + @job_taggings_model ||= define_batchable_model( + :p_ci_build_tags, connection: connection, primary_key: :id) + end + + def job_model + @job_execution_config_model ||= define_batchable_model( + :p_ci_builds, connection: connection, primary_key: :id) + end + + def find_or_create_job_definition_from(metadata, tag_list, run_steps) + config = generate_definition_config(metadata, tag_list, run_steps) + checksum = compute_checksum(config) + current_time = Time.current + + attrs = { + project_id: metadata.project_id, + partition_id: metadata.partition_id, + config: config, + checksum: checksum, + created_at: current_time, + updated_at: current_time, + interruptible: config.fetch(:interruptible, false) + } + + find_or_create_definition_by(attrs) + end + + def generate_definition_config(metadata, tag_list, run_steps) + config = { + options: metadata.config_options, + yaml_variables: metadata.config_variables, + id_tokens: metadata.id_tokens, + secrets: metadata.secrets + } + config[:interruptible] = metadata.interruptible unless metadata.interruptible.nil? + config[:tag_list] = tag_list + config[:run_steps] = run_steps || [] + config + end + + def compute_checksum(config) + Digest::SHA256.hexdigest(Gitlab::Json.dump(config)) + end + + # rubocop:disable Performance/ActiveRecordSubtransactions -- No longer a problem? + # rubocop:disable BackgroundMigration/AvoidSilentRescueExceptions -- specific exception + def find_or_create_definition_by(attrs) + record = definition_model.find_by(attrs.slice(:project_id, :partition_id, :checksum)) + return record if record.present? + + definition_model.transaction(requires_new: true) { definition_model.create(attrs) } + rescue ActiveRecord::RecordNotUnique + definition_model.find_by!(attrs.slice(:project_id, :partition_id, :checksum)) + end + # rubocop:enable BackgroundMigration/AvoidSilentRescueExceptions + # rubocop:enable Performance/ActiveRecordSubtransactions + + def load_tags_for(metadata_records) + job_taggings_model + .where([:build_id, :partition_id] => metadata_records.pluck(:build_id, :partition_id)) + .joins('INNER JOIN "tags" ON "tags"."id" = "p_ci_build_tags"."tag_id"') + .group(:build_id) + .pluck(:build_id, Arel.sql('COALESCE(array_agg(tags.name ORDER BY tags.name), ARRAY[]::text[])')) + .to_h + end + + def load_run_steps_for(metadata_records) + join_sql = <<~SQL + INNER JOIN "p_ci_builds_execution_configs" + ON "p_ci_builds"."execution_config_id" = "p_ci_builds_execution_configs"."id" + AND "p_ci_builds"."partition_id" = "p_ci_builds_execution_configs"."partition_id" + SQL + + job_model + .where([:id, :partition_id] => metadata_records.pluck(:build_id, :partition_id)) + .joins(join_sql) + .pluck(Arel.sql('p_ci_builds.id'), Arel.sql('p_ci_builds_execution_configs.run_steps')) + .to_h + end end end end # rubocop:enable Database/AvoidScopeTo +# rubocop:enable Metrics/ClassLength -- GitLab From 2069e9a5264ce1ae0eca3d8493084c9bcf196d50 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Wed, 15 Oct 2025 17:16:40 +0200 Subject: [PATCH 04/17] Add tests for migration --- .../move_ci_builds_metadata.rb | 47 ++- .../move_ci_builds_metadata_spec.rb | 279 +++++++++++++++++- 2 files changed, 316 insertions(+), 10 deletions(-) diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index f7042d0635f8a6..5678155272bfaa 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -21,6 +21,8 @@ def perform available_metadata = sub_batch.where.not('EXISTS (?)', scoped_definition_instances.select(1)).to_a setup_definitions(available_metadata) update_jobs(available_metadata) + update_job_artifacts(available_metadata) + copy_environments(sub_batch) end end @@ -40,7 +42,7 @@ def setup_definitions(available_metadata) end def update_jobs(available_metadata) - scoped_metadata_sql = <<~SQL + scoped_metadata_sql = <<~SQL.squish "p_ci_builds"."id" = "p_ci_builds_metadata"."build_id" AND "p_ci_builds"."partition_id" = "p_ci_builds_metadata"."partition_id" SQL @@ -60,6 +62,40 @@ def update_jobs(available_metadata) .update_all(update_sql) end + def update_job_artifacts(available_metadata) + scoped_metadata_sql = <<~SQL.squish + "p_ci_job_artifacts"."job_id" = "p_ci_builds_metadata"."build_id" + AND "p_ci_job_artifacts"."partition_id" = "p_ci_builds_metadata"."partition_id" + SQL + + update_sql = <<~SQL.squish + exposed_as = COALESCE( + p_ci_job_artifacts.exposed_as, + p_ci_builds_metadata.config_options->'artifacts'->>'expose_as' + ), + exposed_paths = COALESCE( + p_ci_job_artifacts.exposed_paths, + CASE + WHEN p_ci_builds_metadata.config_options->'artifacts'->'paths' IS NOT NULL + THEN ARRAY( + SELECT jsonb_array_elements_text(p_ci_builds_metadata.config_options->'artifacts'->'paths') + ) + ELSE NULL + END + ) + FROM p_ci_builds_metadata + SQL + + job_artifact_model + .where(scoped_metadata_sql) + .where([:job_id, :partition_id] => available_metadata.pluck(:build_id, :partition_id)) + .update_all(update_sql) + end + + def copy_environments(sub_batch) + # plug https://gitlab.com/gitlab-org/gitlab/-/merge_requests/201868 + end + def job_definition_instance_attrs(metadata, job_definition) { job_id: metadata.build_id, @@ -95,6 +131,11 @@ def job_model :p_ci_builds, connection: connection, primary_key: :id) end + def job_artifact_model + @job_artifact_model ||= define_batchable_model( + :p_ci_job_artifacts, connection: connection, primary_key: :id) + end + def find_or_create_job_definition_from(metadata, tag_list, run_steps) config = generate_definition_config(metadata, tag_list, run_steps) checksum = compute_checksum(config) @@ -122,7 +163,7 @@ def generate_definition_config(metadata, tag_list, run_steps) } config[:interruptible] = metadata.interruptible unless metadata.interruptible.nil? config[:tag_list] = tag_list - config[:run_steps] = run_steps || [] + config[:run_steps] = run_steps config end @@ -153,7 +194,7 @@ def load_tags_for(metadata_records) end def load_run_steps_for(metadata_records) - join_sql = <<~SQL + join_sql = <<~SQL.squish INNER JOIN "p_ci_builds_execution_configs" ON "p_ci_builds"."execution_config_id" = "p_ci_builds_execution_configs"."id" AND "p_ci_builds"."partition_id" = "p_ci_builds_execution_configs"."partition_id" diff --git a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb index ede6a3b5463390..6c6b9ae6175b2c 100644 --- a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb +++ b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb @@ -6,9 +6,39 @@ let(:pipelines_table) { table(:p_ci_pipelines, primary_key: :id) } let(:builds_table) { table(:p_ci_builds, primary_key: :id) } let(:builds_metadata_table) { table(:p_ci_builds_metadata, primary_key: :id) } + let(:artifacts_table) { table(:p_ci_job_artifacts, primary_key: :id) } + let(:tags_table) { table(:tags, primary_key: :id) } + let(:taggings_table) { table(:p_ci_build_tags, primary_key: :id) } - let(:pipeline) { pipelines_table.create!(partition_id: 100, project_id: 1) } - let(:job) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + let(:definitions_table) { table(:p_ci_job_definitions, primary_key: :id) } + let(:definition_instances_table) { table(:p_ci_job_definition_instances, primary_key: :build_id) } + let(:execution_configs_table) { table(:p_ci_builds_execution_configs, primary_key: :id) } + + let!(:pipeline) { pipelines_table.create!(partition_id: 100, project_id: 1) } + let!(:job_a) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + let!(:job_b) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + + let(:duplicate_configs) do + { + config_options: { image: 'ruby', script: 'rspec' }, + config_variables: { 'HOME' => '~' }, + id_tokens: { 'VAULT_ID_TOKEN' => { aud: 'https://gitlab.test' } }, + secrets: { DATABASE_PASSWORD: { vault: 'production/db/password' } }, + interruptible: true + } + end + + let!(:metadata_a) do + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_a.id, **duplicate_configs + ) + end + + let!(:metadata_b) do + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_b.id, **duplicate_configs + ) + end let(:migration_attrs) do { @@ -25,13 +55,248 @@ let(:migration) { described_class.new(**migration_attrs) } - before do - builds_metadata_table.create!(partition_id: 100, build_id: job.id, project_id: 1) - end - - describe '#perform' do + describe '#perform', :aggregate_failures do it 'does not raise errors' do expect { migration.perform }.not_to raise_error end + + it 'creates unique job definitions' do + expect { migration.perform } + .to change { definition_instances_table.where(job_id: [job_a.id, job_b.id]).count }.by(2) + .and change { definitions_table.count }.by(1) + + job_definition = find_definition(job_a) + + expect(job_definition.checksum).to be_present + expect(job_definition.project_id).to eq(job_a.project_id) + expect(job_definition.partition_id).to eq(job_a.partition_id) + expect(job_definition.interruptible).to eq(metadata_a.interruptible) + + expect(job_definition.config).to match({ + 'options' => { 'image' => 'ruby', 'script' => 'rspec' }, + 'secrets' => { 'DATABASE_PASSWORD' => { 'vault' => 'production/db/password' } }, + 'tag_list' => [], + 'id_tokens' => { 'VAULT_ID_TOKEN' => { 'aud' => 'https://gitlab.test' } }, + 'run_steps' => [], + 'interruptible' => true, + 'yaml_variables' => { 'HOME' => '~' } + }) + end + + context 'when jobs have tags' do + let(:tag_a) { tags_table.create!(name: 'ruby') } + let(:tag_b) { tags_table.create!(name: 'rails') } + let(:tag_c) { tags_table.create!(name: 'postgresql') } + let(:tag_d) { tags_table.create!(name: 'docker') } + + before do + taggings_table.insert_all([ + { build_id: job_a.id, partition_id: job_a.partition_id, project_id: job_a.project_id, tag_id: tag_a.id }, + { build_id: job_a.id, partition_id: job_a.partition_id, project_id: job_a.project_id, tag_id: tag_c.id }, + { build_id: job_b.id, partition_id: job_b.partition_id, project_id: job_b.project_id, tag_id: tag_b.id }, + { build_id: job_b.id, partition_id: job_b.partition_id, project_id: job_b.project_id, tag_id: tag_d.id } + ], unique_by: [:id, :partition_id]) + end + + it 'creates job definitions with tags' do + expect { migration.perform }.to change { definitions_table.count }.by(2) + + job_definition_a = find_definition(job_a) + job_definition_b = find_definition(job_b) + + expect(job_definition_a.config['tag_list']).to eq(%w[postgresql ruby]) + expect(job_definition_b.config['tag_list']).to eq(%w[docker rails]) + end + end + + context 'when jobs have execution configs' do + let(:run_steps) do + [{ 'name' => 'metrics', 'step' => 'gitlab.com/components/cicd-components/metrics@ref' }] + end + + let!(:pipeline_a) { pipelines_table.create!(partition_id: 100, project_id: 1) } + let!(:pipeline_b) { pipelines_table.create!(partition_id: 100, project_id: 1) } + + let!(:execution_config_a) do + execution_configs_table.create!( + partition_id: 100, project_id: 1, pipeline_id: pipeline_a.id, run_steps: run_steps + ) + end + + let!(:execution_config_b) do + execution_configs_table.create!( + partition_id: 100, project_id: 1, pipeline_id: pipeline_b.id, run_steps: run_steps + ) + end + + let!(:job_a) do + builds_table.create!( + partition_id: 100, project_id: 1, commit_id: pipeline_a.id, execution_config_id: execution_config_a.id + ) + end + + let!(:job_b) do + builds_table.create!( + partition_id: 100, project_id: 1, commit_id: pipeline_b.id, execution_config_id: execution_config_b.id + ) + end + + let(:tag_a) { tags_table.create!(name: 'ruby') } + let(:tag_b) { tags_table.create!(name: 'rails') } + + before do + taggings_table.insert_all([ + { build_id: job_a.id, partition_id: job_a.partition_id, project_id: job_a.project_id, tag_id: tag_a.id }, + { build_id: job_a.id, partition_id: job_a.partition_id, project_id: job_a.project_id, tag_id: tag_b.id }, + { build_id: job_b.id, partition_id: job_b.partition_id, project_id: job_b.project_id, tag_id: tag_a.id }, + { build_id: job_b.id, partition_id: job_b.partition_id, project_id: job_b.project_id, tag_id: tag_b.id } + ], unique_by: [:id, :partition_id]) + end + + it 'creates job definitions with tags' do + expect { migration.perform }.to change { definitions_table.count }.by(1) + + job_definition = find_definition(job_a) + + expect(job_definition.config['tag_list']).to eq(%w[rails ruby]) + expect(job_definition.config['run_steps']).to eq(run_steps) + end + end + + context 'if p_ci_builds need to be updated' do + let!(:job_c) do + builds_table.create!( + partition_id: 100, project_id: 1, commit_id: pipeline.id, + timeout: 2800, timeout_source: 2, exit_code: 137, + debug_trace_enabled: false, scoped_user_id: 10 + ) + end + + let!(:metadata_a) do + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_a.id, + timeout: 3600, timeout_source: 2, exit_code: 0, + debug_trace_enabled: true, **duplicate_configs + ) + end + + let!(:metadata_b) do + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_b.id, + timeout: 1800, timeout_source: 1, exit_code: 1, + debug_trace_enabled: false, + **duplicate_configs.deep_merge(config_options: { scoped_user_id: 50 }) + ) + end + + let!(:metadata_c) do + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_c.id, + timeout: 1800, timeout_source: 1, exit_code: 0, + debug_trace_enabled: true, + **duplicate_configs.deep_merge(config_options: { scoped_user_id: 60 }) + ) + end + + it 'updates jobs from metadata attributes' do + expect { migration.perform }.not_to raise_error + [job_a, job_b, job_c].each(&:reload) + + expect(job_a.timeout).to eq(3600) + expect(job_a.timeout_source).to eq(2) + expect(job_a.exit_code).to eq(0) + expect(job_a.debug_trace_enabled).to be(true) + expect(job_a.scoped_user_id).to be_nil + + expect(job_b.timeout).to eq(1800) + expect(job_b.timeout_source).to eq(1) + expect(job_b.exit_code).to eq(1) + expect(job_b.debug_trace_enabled).to be(false) + expect(job_b.scoped_user_id).to eq(50) + + expect(job_c.timeout).to eq(2800) + expect(job_c.timeout_source).to eq(2) + expect(job_c.exit_code).to eq(137) + expect(job_c.debug_trace_enabled).to be(false) + expect(job_c.scoped_user_id).to eq(10) + end + end + + context 'if p_ci_job_artifacts need to be updated' do + let!(:metadata_b) do + artifacts_options = { + config_options: { + artifacts: { + expose_as: 'string_b', + paths: ['my/path/b1', 'my/path/b2'] + } + } + } + + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_b.id, + **duplicate_configs.deep_merge(artifacts_options) + ) + end + + let!(:job_c) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + + let!(:metadata_c) do + artifacts_options = { + config_options: { + artifacts: { + expose_as: 'string_c', + paths: ['my/path/c1', 'my/path/c2'] + } + } + } + + builds_metadata_table.create!( + partition_id: 100, project_id: 1, build_id: job_c.id, + **duplicate_configs.deep_merge(artifacts_options) + ) + end + + let!(:artifact_a) do + artifacts_table.create!( + job_id: job_a.id, partition_id: job_a.partition_id, + project_id: job_a.project_id, file_type: 1 + ) + end + + let!(:artifact_b) do + artifacts_table.create!( + job_id: job_b.id, partition_id: job_b.partition_id, + project_id: job_b.project_id, file_type: 1 + ) + end + + let!(:artifact_c) do + artifacts_table.create!( + job_id: job_c.id, partition_id: job_c.partition_id, + project_id: job_c.project_id, file_type: 1, + exposed_as: 'artif_string', exposed_paths: ['artif/path/1', 'artif/path/2'] + ) + end + + it 'updates jobs from metadata attributes' do + expect { migration.perform }.not_to raise_error + [artifact_a, artifact_b, artifact_c].each(&:reload) + + expect(artifact_a.exposed_as).to be_nil + expect(artifact_a.exposed_paths).to be_nil + + expect(artifact_b.exposed_as).to eq('string_b') + expect(artifact_b.exposed_paths).to eq(['my/path/b1', 'my/path/b2']) + + expect(artifact_c.exposed_as).to eq('artif_string') + expect(artifact_c.exposed_paths).to eq(['artif/path/1', 'artif/path/2']) + end + end + + def find_definition(job) + instance = definition_instances_table.find_by(job_id: job.id) + definitions_table.find(instance.job_definition_id) + end end end -- GitLab From c456f8872161b092a1d3ca230a26da637f1268ee Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Thu, 16 Oct 2025 15:56:12 +0200 Subject: [PATCH 05/17] Apply reviewer feedback --- .../move_ci_builds_metadata.rb | 1 + .../move_ci_builds_metadata_spec.rb | 45 +++++++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index 5678155272bfaa..e6f6c4bfa7b25f 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -87,6 +87,7 @@ def update_job_artifacts(available_metadata) SQL job_artifact_model + .where(file_type: 2) # metadata .where(scoped_metadata_sql) .where([:job_id, :partition_id] => available_metadata.pluck(:build_id, :partition_id)) .update_all(update_sql) diff --git a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb index 6c6b9ae6175b2c..f10e96b1d88e3f 100644 --- a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb +++ b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb @@ -264,6 +264,13 @@ ) end + let!(:artifact_meta_a) do + artifacts_table.create!( + job_id: job_a.id, partition_id: job_a.partition_id, + project_id: job_a.project_id, file_type: 2 + ) + end + let!(:artifact_b) do artifacts_table.create!( job_id: job_b.id, partition_id: job_b.partition_id, @@ -271,26 +278,46 @@ ) end + let!(:artifact_meta_b) do + artifacts_table.create!( + job_id: job_b.id, partition_id: job_b.partition_id, + project_id: job_b.project_id, file_type: 2 + ) + end + let!(:artifact_c) do artifacts_table.create!( job_id: job_c.id, partition_id: job_c.partition_id, - project_id: job_c.project_id, file_type: 1, + project_id: job_c.project_id, file_type: 1 + ) + end + + let!(:artifact_meta_c) do + artifacts_table.create!( + job_id: job_c.id, partition_id: job_c.partition_id, + project_id: job_c.project_id, file_type: 2, exposed_as: 'artif_string', exposed_paths: ['artif/path/1', 'artif/path/2'] ) end - it 'updates jobs from metadata attributes' do + it 'updates metadata type artifacts from metadata attributes' do expect { migration.perform }.not_to raise_error - [artifact_a, artifact_b, artifact_c].each(&:reload) + [artifact_a, artifact_b, artifact_c, artifact_meta_a, artifact_meta_b, artifact_meta_c].each(&:reload) expect(artifact_a.exposed_as).to be_nil expect(artifact_a.exposed_paths).to be_nil - - expect(artifact_b.exposed_as).to eq('string_b') - expect(artifact_b.exposed_paths).to eq(['my/path/b1', 'my/path/b2']) - - expect(artifact_c.exposed_as).to eq('artif_string') - expect(artifact_c.exposed_paths).to eq(['artif/path/1', 'artif/path/2']) + expect(artifact_meta_a.exposed_as).to be_nil + expect(artifact_meta_a.exposed_paths).to be_nil + + expect(artifact_b.exposed_as).to be_nil + expect(artifact_b.exposed_paths).to be_nil + expect(artifact_meta_b.exposed_as).to eq('string_b') + expect(artifact_meta_b.exposed_paths).to eq(['my/path/b1', 'my/path/b2']) + + expect(artifact_c.exposed_as).to be_nil + expect(artifact_c.exposed_paths).to be_nil + expect(artifact_meta_c.exposed_as).to eq('artif_string') + expect(artifact_meta_c.exposed_paths).to eq(['artif/path/1', 'artif/path/2']) end end -- GitLab From 361c1f13cca367f669f84a2dcb13d634bee67446 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Thu, 16 Oct 2025 17:10:45 +0200 Subject: [PATCH 06/17] Enqueue migrations for each partition --- ...013133259_queue_move_ci_builds_metadata.rb | 60 +++++++++++-------- ...3259_queue_move_ci_builds_metadata_spec.rb | 2 +- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index 2a63eef1f75d49..b1cea1cf0f5ca1 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -8,42 +8,54 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] MIGRATION = "MoveCiBuildsMetadata" BATCH_SIZE = 1000 SUB_BATCH_SIZE = 100 - PARTITION_ID = 107 def up - queue_batched_background_migration( - MIGRATION, - :p_ci_builds_metadata, - :id, - :partition_id, - PARTITION_ID, - batch_size: BATCH_SIZE, - sub_batch_size: SUB_BATCH_SIZE, - batch_min_value: batch_min_value, - batch_max_value: batch_max_value - ) + Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| + partition_ids = partition.condition.scan(/\d+/).map(&:to_i) + + queue_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + :partition_id, + partition_ids, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE, + batch_min_value: batch_min_value(partition_ids), + batch_max_value: batch_max_value(partition_ids) + ) + end end def down - delete_batched_background_migration( - MIGRATION, - :p_ci_builds_metadata, - :id, - [:partition_id, PARTITION_ID] - ) + Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| + partition_ids = partition.condition.scan(/\d+/).map(&:to_i) + + delete_batched_background_migration( + MIGRATION, + :p_ci_builds_metadata, + :id, + [:partition_id, partition_ids] + ) + end end private - def batch_min_value - connection.select_value(<<~SQL) - SELECT COALESCE(MIN(id), 1) FROM p_ci_builds_metadata WHERE partition_id = #{PARTITION_ID}; + def batch_min_value(ids) + connection.select_value(ActiveRecord::Base.sanitize_sql_array([<<~SQL, ids])) + SELECT COALESCE(MIN(id), 1) FROM p_ci_builds_metadata WHERE partition_id in (?); SQL end - def batch_max_value - connection.select_value(<<~SQL) - SELECT MAX(id) FROM p_ci_builds_metadata WHERE partition_id = #{PARTITION_ID}; + def batch_max_value(ids) + connection.select_value(ActiveRecord::Base.sanitize_sql_array([<<~SQL, ids])) + SELECT MAX(id) FROM p_ci_builds_metadata WHERE partition_id in (?); SQL end + + # Workaround to allow a single migration to enqueue multiple background migrations + def assign_attributes_safely(migration, max_batch_size, batch_table_name, gitlab_schema, _queued_migration_version) + super(migration, max_batch_size, batch_table_name, gitlab_schema, nil) + end end diff --git a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb index bf6abf0b5e1a71..088b359e8081c4 100644 --- a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb +++ b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb @@ -17,7 +17,7 @@ gitlab_schema: :gitlab_ci, table_name: :p_ci_builds_metadata, column_name: :id, - job_arguments: [:partition_id, described_class::PARTITION_ID], + job_arguments: [:partition_id, [100]], batch_size: described_class::BATCH_SIZE, sub_batch_size: described_class::SUB_BATCH_SIZE ) -- GitLab From e9982041c7adb687e1b27ba5710b4b047a4da36e Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Fri, 17 Oct 2025 11:50:16 +0200 Subject: [PATCH 07/17] Copy environments to the new tables --- ...013133259_queue_move_ci_builds_metadata.rb | 4 +- .../move_ci_builds_metadata.rb | 88 ++++- .../move_ci_builds_metadata_spec.rb | 321 ++++++++++++++++-- 3 files changed, 366 insertions(+), 47 deletions(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index b1cea1cf0f5ca1..68e0638225d961 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -6,8 +6,8 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] restrict_gitlab_migration gitlab_schema: :gitlab_ci MIGRATION = "MoveCiBuildsMetadata" - BATCH_SIZE = 1000 - SUB_BATCH_SIZE = 100 + BATCH_SIZE = 500 + SUB_BATCH_SIZE = 50 def up Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index e6f6c4bfa7b25f..d5937d25851960 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -43,8 +43,8 @@ def setup_definitions(available_metadata) def update_jobs(available_metadata) scoped_metadata_sql = <<~SQL.squish - "p_ci_builds"."id" = "p_ci_builds_metadata"."build_id" - AND "p_ci_builds"."partition_id" = "p_ci_builds_metadata"."partition_id" + p_ci_builds.id = p_ci_builds_metadata.build_id + AND p_ci_builds.partition_id = p_ci_builds_metadata.partition_id SQL update_sql = <<~SQL @@ -64,8 +64,8 @@ def update_jobs(available_metadata) def update_job_artifacts(available_metadata) scoped_metadata_sql = <<~SQL.squish - "p_ci_job_artifacts"."job_id" = "p_ci_builds_metadata"."build_id" - AND "p_ci_job_artifacts"."partition_id" = "p_ci_builds_metadata"."partition_id" + p_ci_job_artifacts.job_id = p_ci_builds_metadata.build_id + AND p_ci_job_artifacts.partition_id = p_ci_builds_metadata.partition_id SQL update_sql = <<~SQL.squish @@ -93,10 +93,6 @@ def update_job_artifacts(available_metadata) .update_all(update_sql) end - def copy_environments(sub_batch) - # plug https://gitlab.com/gitlab-org/gitlab/-/merge_requests/201868 - end - def job_definition_instance_attrs(metadata, job_definition) { job_id: metadata.build_id, @@ -188,7 +184,7 @@ def find_or_create_definition_by(attrs) def load_tags_for(metadata_records) job_taggings_model .where([:build_id, :partition_id] => metadata_records.pluck(:build_id, :partition_id)) - .joins('INNER JOIN "tags" ON "tags"."id" = "p_ci_build_tags"."tag_id"') + .joins('INNER JOIN tags ON tags.id = p_ci_build_tags.tag_id') .group(:build_id) .pluck(:build_id, Arel.sql('COALESCE(array_agg(tags.name ORDER BY tags.name), ARRAY[]::text[])')) .to_h @@ -196,9 +192,9 @@ def load_tags_for(metadata_records) def load_run_steps_for(metadata_records) join_sql = <<~SQL.squish - INNER JOIN "p_ci_builds_execution_configs" - ON "p_ci_builds"."execution_config_id" = "p_ci_builds_execution_configs"."id" - AND "p_ci_builds"."partition_id" = "p_ci_builds_execution_configs"."partition_id" + INNER JOIN p_ci_builds_execution_configs + ON p_ci_builds.execution_config_id = p_ci_builds_execution_configs.id + AND p_ci_builds.partition_id = p_ci_builds_execution_configs.partition_id SQL job_model @@ -207,6 +203,74 @@ def load_run_steps_for(metadata_records) .pluck(Arel.sql('p_ci_builds.id'), Arel.sql('p_ci_builds_execution_configs.run_steps')) .to_h end + + def copy_environments(sub_batch) + job_environment_attributes = fetch_environment_attributes(sub_batch) + return if job_environment_attributes.empty? + + bulk_insert_job_environments(job_environment_attributes) + end + + def fetch_environment_attributes(relation) + join_sql = <<~SQL.squish + INNER JOIN p_ci_builds + ON p_ci_builds.partition_id = p_ci_builds_metadata.partition_id + AND p_ci_builds.id = p_ci_builds_metadata.build_id + SQL + + select_sql = <<~SQL.squish + p_ci_builds_metadata.project_id, + p_ci_builds_metadata.build_id AS ci_job_id, + p_ci_builds_metadata.expanded_environment_name, + p_ci_builds_metadata.config_options -> 'environment' AS options, + p_ci_builds.commit_id AS ci_pipeline_id + SQL + + relation + .where.not(expanded_environment_name: nil) + .joins(join_sql) + .select(select_sql) + .map { |metadata| extract_environment_attributes(metadata) } + end + + def extract_environment_attributes(metadata) + attributes = metadata.attributes.slice( + 'project_id', 'ci_job_id', 'ci_pipeline_id', 'expanded_environment_name', 'options' + ) + + options = attributes['options'] || {} + kubernetes_options = options['kubernetes']&.slice('namespace') + options = options.slice('action', 'deployment_tier') + options['kubernetes'] = kubernetes_options if kubernetes_options.present? + + attributes['options'] = options.to_json + attributes + end + + def bulk_insert_job_environments(attributes) + values_list = Arel::Nodes::ValuesList.new(attributes.map(&:values)).to_sql + command = <<~SQL.squish + WITH ci_job_attributes (project_id, ci_job_id, ci_pipeline_id, expanded_environment_name, options) AS (#{values_list}) + INSERT INTO job_environments (project_id, environment_id, ci_job_id, ci_pipeline_id, deployment_id, expanded_environment_name, options) + SELECT + ci_job_attributes.project_id, + environments.id, + ci_job_id, + ci_pipeline_id, + deployments.id, + expanded_environment_name, + options::jsonb + FROM + ci_job_attributes + INNER JOIN environments ON environments.project_id = ci_job_attributes.project_id + AND environments.name = ci_job_attributes.expanded_environment_name + LEFT JOIN deployments ON deployments.deployable_id = ci_job_attributes.ci_job_id + AND deployments.deployable_type = 'CommitStatus' + ON CONFLICT DO NOTHING; + SQL + + ApplicationRecord.connection.execute(command) + end end end end diff --git a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb index f10e96b1d88e3f..94eae922508b66 100644 --- a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb +++ b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb @@ -1,22 +1,51 @@ # frozen_string_literal: true require 'spec_helper' +# rubocop:disable RSpec/MultipleMemoizedHelpers -- We need extra helpers to define tables + +RSpec.describe Gitlab::BackgroundMigration::MoveCiBuildsMetadata, feature_category: :continuous_integration do + let(:pipelines_table) { table(:p_ci_pipelines, primary_key: :id, database: :ci) } + let(:builds_table) { table(:p_ci_builds, primary_key: :id, database: :ci) } + let(:builds_metadata_table) { table(:p_ci_builds_metadata, primary_key: :id, database: :ci) } + let(:artifacts_table) { table(:p_ci_job_artifacts, primary_key: :id, database: :ci) } + let(:tags_table) { table(:tags, primary_key: :id, database: :ci) } + let(:taggings_table) { table(:p_ci_build_tags, primary_key: :id, database: :ci) } + let(:definitions_table) { table(:p_ci_job_definitions, primary_key: :id, database: :ci) } + let(:definition_instances_table) { table(:p_ci_job_definition_instances, primary_key: :build_id, database: :ci) } + let(:execution_configs_table) { table(:p_ci_builds_execution_configs, primary_key: :id, database: :ci) } + + let(:organizations_table) { table(:organizations, database: :main) } + let(:namespaces_table) { table(:namespaces, database: :main) } + let(:projects_table) { table(:projects, database: :main) } + let(:environments_table) { table(:environments, database: :main) } + let(:deployments_table) { table(:deployments, database: :main) } + let(:job_environments_table) { table(:job_environments, database: :main) } + + let(:organization) do + organizations_table.create!(name: 'organization', path: 'organization') + end + + let(:namespace) do + namespaces_table.create!(name: "namespace", path: "namespace", organization_id: organization.id) + end + + let(:project) do + projects_table.create!( + namespace_id: namespace.id, + project_namespace_id: namespace.id, + organization_id: organization.id + ) + end -RSpec.describe Gitlab::BackgroundMigration::MoveCiBuildsMetadata, feature_category: :continuous_integration, migration: :gitlab_ci do - let(:pipelines_table) { table(:p_ci_pipelines, primary_key: :id) } - let(:builds_table) { table(:p_ci_builds, primary_key: :id) } - let(:builds_metadata_table) { table(:p_ci_builds_metadata, primary_key: :id) } - let(:artifacts_table) { table(:p_ci_job_artifacts, primary_key: :id) } - let(:tags_table) { table(:tags, primary_key: :id) } - let(:taggings_table) { table(:p_ci_build_tags, primary_key: :id) } + let(:pipeline) { pipelines_table.create!(partition_id: 100, project_id: project.id) } - let(:definitions_table) { table(:p_ci_job_definitions, primary_key: :id) } - let(:definition_instances_table) { table(:p_ci_job_definition_instances, primary_key: :build_id) } - let(:execution_configs_table) { table(:p_ci_builds_execution_configs, primary_key: :id) } + let!(:job_a) do + builds_table.create!(partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id) + end - let!(:pipeline) { pipelines_table.create!(partition_id: 100, project_id: 1) } - let!(:job_a) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } - let!(:job_b) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + let!(:job_b) do + builds_table.create!(partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id) + end let(:duplicate_configs) do { @@ -30,13 +59,13 @@ let!(:metadata_a) do builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_a.id, **duplicate_configs + partition_id: job_a.partition_id, project_id: project.id, build_id: job_a.id, **duplicate_configs ) end let!(:metadata_b) do builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_b.id, **duplicate_configs + partition_id: job_b.partition_id, project_id: project.id, build_id: job_b.id, **duplicate_configs ) end @@ -48,7 +77,7 @@ batch_column: :id, sub_batch_size: 2, pause_ms: 0, - connection: ApplicationRecord.connection, + connection: Ci::ApplicationRecord.connection, job_arguments: ['partition_id', 100] } end @@ -114,31 +143,31 @@ [{ 'name' => 'metrics', 'step' => 'gitlab.com/components/cicd-components/metrics@ref' }] end - let!(:pipeline_a) { pipelines_table.create!(partition_id: 100, project_id: 1) } - let!(:pipeline_b) { pipelines_table.create!(partition_id: 100, project_id: 1) } + let!(:pipeline_a) { pipelines_table.create!(partition_id: 100, project_id: project.id) } + let!(:pipeline_b) { pipelines_table.create!(partition_id: 100, project_id: project.id) } let!(:execution_config_a) do execution_configs_table.create!( - partition_id: 100, project_id: 1, pipeline_id: pipeline_a.id, run_steps: run_steps - ) + partition_id: pipeline_a.partition_id, project_id: project.id, + pipeline_id: pipeline_a.id, run_steps: run_steps) end let!(:execution_config_b) do execution_configs_table.create!( - partition_id: 100, project_id: 1, pipeline_id: pipeline_b.id, run_steps: run_steps - ) + partition_id: pipeline_b.partition_id, project_id: project.id, + pipeline_id: pipeline_b.id, run_steps: run_steps) end let!(:job_a) do builds_table.create!( - partition_id: 100, project_id: 1, commit_id: pipeline_a.id, execution_config_id: execution_config_a.id - ) + partition_id: pipeline_a.partition_id, project_id: project.id, + commit_id: pipeline_a.id, execution_config_id: execution_config_a.id) end let!(:job_b) do builds_table.create!( - partition_id: 100, project_id: 1, commit_id: pipeline_b.id, execution_config_id: execution_config_b.id - ) + partition_id: pipeline_b.partition_id, project_id: project.id, + commit_id: pipeline_b.id, execution_config_id: execution_config_b.id) end let(:tag_a) { tags_table.create!(name: 'ruby') } @@ -166,7 +195,7 @@ context 'if p_ci_builds need to be updated' do let!(:job_c) do builds_table.create!( - partition_id: 100, project_id: 1, commit_id: pipeline.id, + partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id, timeout: 2800, timeout_source: 2, exit_code: 137, debug_trace_enabled: false, scoped_user_id: 10 ) @@ -174,7 +203,7 @@ let!(:metadata_a) do builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_a.id, + partition_id: job_a.partition_id, project_id: project.id, build_id: job_a.id, timeout: 3600, timeout_source: 2, exit_code: 0, debug_trace_enabled: true, **duplicate_configs ) @@ -182,7 +211,7 @@ let!(:metadata_b) do builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_b.id, + partition_id: job_b.partition_id, project_id: project.id, build_id: job_b.id, timeout: 1800, timeout_source: 1, exit_code: 1, debug_trace_enabled: false, **duplicate_configs.deep_merge(config_options: { scoped_user_id: 50 }) @@ -191,7 +220,7 @@ let!(:metadata_c) do builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_c.id, + partition_id: job_c.partition_id, project_id: project.id, build_id: job_c.id, timeout: 1800, timeout_source: 1, exit_code: 0, debug_trace_enabled: true, **duplicate_configs.deep_merge(config_options: { scoped_user_id: 60 }) @@ -234,12 +263,14 @@ } builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_b.id, + partition_id: job_b.partition_id, project_id: project.id, build_id: job_b.id, **duplicate_configs.deep_merge(artifacts_options) ) end - let!(:job_c) { builds_table.create!(partition_id: 100, project_id: 1, commit_id: pipeline.id) } + let!(:job_c) do + builds_table.create!(partition_id: pipeline.partition_id, project_id: project.id, commit_id: pipeline.id) + end let!(:metadata_c) do artifacts_options = { @@ -252,7 +283,7 @@ } builds_metadata_table.create!( - partition_id: 100, project_id: 1, build_id: job_c.id, + partition_id: job_c.partition_id, project_id: project.id, build_id: job_c.id, **duplicate_configs.deep_merge(artifacts_options) ) end @@ -321,9 +352,233 @@ end end + context 'if environments need to be moved' do + let(:namespace_a) do + namespaces_table.create!(name: "namespace_a", path: "namespace_a", organization_id: organization.id) + end + + let(:namespace_b) do + namespaces_table.create!(name: "namespace_b", path: "namespace_b", organization_id: organization.id) + end + + let!(:project_a) do + projects_table.create!( + namespace_id: namespace_a.id, + project_namespace_id: namespace_a.id, + organization_id: organization.id + ) + end + + let!(:project_b) do + projects_table.create!( + namespace_id: namespace_b.id, + project_namespace_id: namespace_b.id, + organization_id: organization.id + ) + end + + let!(:staging_a) { environments_table.create!(project_id: project_a.id, name: 'staging_a', slug: 'stg_a') } + let!(:staging_b) { environments_table.create!(project_id: project_b.id, name: 'staging_b', slug: 'stg_b') } + let!(:production_a) { environments_table.create!(project_id: project_a.id, name: 'production_a', slug: 'prod_a') } + let!(:production_b) { environments_table.create!(project_id: project_b.id, name: 'production_b', slug: 'prod_b') } + + let!(:pipeline_a) { pipelines_table.create!(partition_id: 100, project_id: project_a.id) } + let!(:pipeline_b) { pipelines_table.create!(partition_id: 100, project_id: project_b.id) } + + let!(:job_a) do + builds_table.create!(partition_id: pipeline_a.partition_id, commit_id: pipeline_a.id, project_id: project_a.id) + end + + let!(:job_b) do + builds_table.create!(partition_id: pipeline_b.partition_id, commit_id: pipeline_b.id, project_id: project_b.id) + end + + let!(:job_c) do + builds_table.create!(partition_id: pipeline_a.partition_id, commit_id: pipeline_a.id, project_id: project_a.id) + end + + let!(:job_d) do + builds_table.create!(partition_id: pipeline_b.partition_id, commit_id: pipeline_b.id, project_id: project_b.id) + end + + let!(:job_e) do + builds_table.create!(partition_id: pipeline_a.partition_id, commit_id: pipeline_a.id, project_id: project_a.id) + end + + let!(:job_f) do + builds_table.create!(partition_id: pipeline_b.partition_id, commit_id: pipeline_b.id, project_id: project_b.id) + end + + let!(:job_g) do + builds_table.create!(partition_id: pipeline_a.partition_id, commit_id: pipeline_a.id, project_id: project_a.id) + end + + let!(:job_h) do + builds_table.create!(partition_id: pipeline_b.partition_id, commit_id: pipeline_b.id, project_id: project_b.id) + end + + let!(:deployment_a) do + deployments_table.create!( + project_id: project_a.id, environment_id: staging_a.id, deployable_type: 'CommitStatus', + deployable_id: job_a.id, iid: 1, ref: 'main', sha: 'aaaaaa', tag: true, status: 0) + end + + let!(:deployment_b) do + deployments_table.create!( + project_id: project_b.id, environment_id: staging_b.id, deployable_id: job_b.id, iid: 1, + ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:deployment_c) do + deployments_table.create!( + project_id: project_b.id, environment_id: production_b.id, deployable_type: 'CommitStatus', + deployable_id: job_d.id, iid: 2, ref: 'main', sha: 'aaaaaa', tag: false, status: 0) + end + + let!(:metadata_a) do + environment_name = 'staging_a' + options = { script: 'example', environment: { name: environment_name } } + + builds_metadata_table.create!( + partition_id: pipeline_a.partition_id, build_id: job_a.id, project_id: project_a.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata_b) do + environment_name = 'staging_b' + options = { environment: { name: 'staging_b', action: 'stop', deployment_tier: 'staging' } } + + builds_metadata_table.create!( + partition_id: pipeline_b.partition_id, build_id: job_b.id, project_id: project_b.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata_c) do + environment_name = 'production_a' + options = { script: 'example', environment: { deployment_tier: 'testing' } } + + builds_metadata_table.create!( + partition_id: pipeline_a.partition_id, build_id: job_c.id, project_id: project_a.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata_d) do + environment_name = 'production_b' + options = { script: 'example', + environment: { name: environment_name, kubernetes: { namespace: 'namespace', agent: 'agent' } } } + + builds_metadata_table.create!( + partition_id: pipeline_b.partition_id, build_id: job_d.id, project_id: project_b.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is blank + let!(:metadata_e) do + environment_name = nil + options = { script: 'example', environment: { name: 'excluded' } } + + builds_metadata_table.create!( + partition_id: pipeline_a.partition_id, build_id: job_e.id, project_id: project_a.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: environment name is present but environment has since been deleted + let!(:metadata_f) do + environment_name = 'non-existing' + options = { script: 'example', environment: { name: 'deleted' } } + + builds_metadata_table.create!( + partition_id: pipeline_b.partition_id, build_id: job_f.id, project_id: project_b.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:metadata_g) do + environment_name = 'staging_a' + options = nil + + builds_metadata_table.create!( + partition_id: pipeline_a.partition_id, build_id: job_g.id, project_id: project_a.id, + expanded_environment_name: environment_name, config_options: options) + end + + # Skipped: job environment record already exists + let!(:metadata_h) do + environment_name = 'staging_b' + options = { environment: { name: 'staging_b', action: 'stop', deployment_tier: 'staging' } } + + builds_metadata_table.create!( + partition_id: pipeline_b.partition_id, build_id: job_h.id, project_id: project_b.id, + expanded_environment_name: environment_name, config_options: options) + end + + let!(:existing_job_environment) do + job_environments_table.create!( + project_id: project_b.id, environment_id: staging_b.id, ci_pipeline_id: pipeline_b.id, + ci_job_id: job_h.id, expanded_environment_name: staging_b.name, + options: { action: 'stop', deployment_tier: 'staging' }) + end + + describe '#perform' do + it 'constructs job_environment records from associated records', :aggregate_failures do + expect { migration.perform }.to change { job_environments_table.count }.from(1).to(6) + + job_environment_a = job_environments_table.where(ci_job_id: job_a.id).first + expect(job_environment_a).to have_attributes( + project_id: project_a.id, + environment_id: staging_a.id, + ci_pipeline_id: pipeline_a.id, + deployment_id: deployment_a.id, + expanded_environment_name: staging_a.name, + options: {} + ) + + job_environment_b = job_environments_table.where(ci_job_id: job_b.id).first + expect(job_environment_b).to have_attributes( + project_id: project_b.id, + environment_id: staging_b.id, + ci_pipeline_id: pipeline_b.id, + deployment_id: nil, + expanded_environment_name: staging_b.name, + options: { 'action' => 'stop', 'deployment_tier' => 'staging' } + ) + + job_environment_c = job_environments_table.where(ci_job_id: job_c.id).first + expect(job_environment_c).to have_attributes( + project_id: project_a.id, + environment_id: production_a.id, + ci_pipeline_id: pipeline_a.id, + deployment_id: nil, + expanded_environment_name: production_a.name, + options: { 'deployment_tier' => 'testing' } + ) + + job_environment_d = job_environments_table.where(ci_job_id: job_d.id).first + expect(job_environment_d).to have_attributes( + project_id: project_b.id, + environment_id: production_b.id, + ci_pipeline_id: pipeline_b.id, + deployment_id: deployment_c.id, + expanded_environment_name: production_b.name, + options: { 'kubernetes' => { 'namespace' => 'namespace' } } + ) + + job_environment_e = job_environments_table.where(ci_job_id: job_g.id).first + expect(job_environment_e).to have_attributes( + project_id: project_a.id, + environment_id: staging_a.id, + ci_pipeline_id: pipeline_a.id, + deployment_id: nil, + expanded_environment_name: staging_a.name, + options: {} + ) + end + end + end + def find_definition(job) instance = definition_instances_table.find_by(job_id: job.id) definitions_table.find(instance.job_definition_id) end end end +# rubocop:enable RSpec/MultipleMemoizedHelpers -- GitLab From 9959b5392d64bc491218c09928ad75b6575e9a6d Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Fri, 17 Oct 2025 16:26:14 +0200 Subject: [PATCH 08/17] Add tests for enqueing on partitions --- ...013133259_queue_move_ci_builds_metadata.rb | 2 +- ...3259_queue_move_ci_builds_metadata_spec.rb | 31 ++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index 68e0638225d961..a0823f18d5d836 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -5,7 +5,7 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] restrict_gitlab_migration gitlab_schema: :gitlab_ci - MIGRATION = "MoveCiBuildsMetadata" + MIGRATION = 'MoveCiBuildsMetadata' BATCH_SIZE = 500 SUB_BATCH_SIZE = 50 diff --git a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb index 088b359e8081c4..d6ee80ac08b947 100644 --- a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb +++ b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb @@ -6,7 +6,18 @@ RSpec.describe QueueMoveCiBuildsMetadata, migration: :gitlab_ci, feature_category: :continuous_integration do let!(:batched_migration) { described_class::MIGRATION } - it 'schedules a new batched migration' do + before do + Ci::ApplicationRecord.connection.execute(<<~SQL) + CREATE TABLE IF NOT EXISTS "gitlab_partitions_dynamic"."ci_builds_metadata_100" + PARTITION OF "p_ci_builds_metadata" FOR VALUES IN (100); + CREATE TABLE IF NOT EXISTS "gitlab_partitions_dynamic"."ci_builds_metadata_101" + PARTITION OF "p_ci_builds_metadata" FOR VALUES IN (101); + CREATE TABLE IF NOT EXISTS "gitlab_partitions_dynamic"."ci_builds_metadata_102" + PARTITION OF "p_ci_builds_metadata" FOR VALUES IN (102); + SQL + end + + it 'schedules new batched migrations' do reversible_migration do |migration| migration.before -> { expect(batched_migration).not_to have_scheduled_batched_migration @@ -21,6 +32,24 @@ batch_size: described_class::BATCH_SIZE, sub_batch_size: described_class::SUB_BATCH_SIZE ) + + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds_metadata, + column_name: :id, + job_arguments: [:partition_id, [101]], + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds_metadata, + column_name: :id, + job_arguments: [:partition_id, [102]], + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) } end end -- GitLab From 1fd27f8270997f65193b49ac2a909c1002577ff0 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 11:05:45 +0200 Subject: [PATCH 09/17] Limit migration testing to first 2 partitions --- db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index a0823f18d5d836..61f64aa74d00fb 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -12,6 +12,7 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] def up Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| partition_ids = partition.condition.scan(/\d+/).map(&:to_i) + next unless (partition_ids & [100, 101]).any? queue_batched_background_migration( MIGRATION, -- GitLab From 2ab22bf946de389c3b9ffcc956016b1bd4444cc7 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 13:20:29 +0200 Subject: [PATCH 10/17] Update test matchers --- .../support/matchers/background_migrations_matchers.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spec/support/matchers/background_migrations_matchers.rb b/spec/support/matchers/background_migrations_matchers.rb index 2257622fda583c..b082b2bfe8ff61 100644 --- a/spec/support/matchers/background_migrations_matchers.rb +++ b/spec/support/matchers/background_migrations_matchers.rb @@ -33,9 +33,13 @@ end define_method :does_not_match? do |migration| - batched_migrations = - Gitlab::Database::BackgroundMigration::BatchedMigration - .where(job_class_name: migration) + batched_migrations = if [gitlab_schema, table_name, column_name, job_arguments].all?(&:present?) + Gitlab::Database::BackgroundMigration::BatchedMigration + .for_configuration(gitlab_schema, migration, table_name, column_name, job_arguments) + else + Gitlab::Database::BackgroundMigration::BatchedMigration + .where(job_class_name: migration) + end expect(batched_migrations.count).to( be(0), -- GitLab From 8362151397efc2febafb1cff3cdb34e6bcf6ef44 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 13:22:35 +0200 Subject: [PATCH 11/17] Skip empty partitions --- ...013133259_queue_move_ci_builds_metadata.rb | 12 +++++++++--- ...3259_queue_move_ci_builds_metadata_spec.rb | 19 ++++++++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index 61f64aa74d00fb..c31c96d6247b57 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -12,7 +12,7 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] def up Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| partition_ids = partition.condition.scan(/\d+/).map(&:to_i) - next unless (partition_ids & [100, 101]).any? + next if empty_partition?(partition_ids) queue_batched_background_migration( MIGRATION, @@ -45,13 +45,19 @@ def down def batch_min_value(ids) connection.select_value(ActiveRecord::Base.sanitize_sql_array([<<~SQL, ids])) - SELECT COALESCE(MIN(id), 1) FROM p_ci_builds_metadata WHERE partition_id in (?); + SELECT COALESCE(MIN(id), 1) FROM p_ci_builds_metadata WHERE partition_id IN (?); SQL end def batch_max_value(ids) connection.select_value(ActiveRecord::Base.sanitize_sql_array([<<~SQL, ids])) - SELECT MAX(id) FROM p_ci_builds_metadata WHERE partition_id in (?); + SELECT MAX(id) FROM p_ci_builds_metadata WHERE partition_id IN (?); + SQL + end + + def empty_partition?(ids) + !connection.select_value(ActiveRecord::Base.sanitize_sql_array([<<~SQL, ids])) + SELECT true FROM p_ci_builds_metadata WHERE partition_id IN (?) LIMIT 1; SQL end diff --git a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb index d6ee80ac08b947..8809328af11dc2 100644 --- a/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb +++ b/spec/migrations/20251013133259_queue_move_ci_builds_metadata_spec.rb @@ -6,6 +6,20 @@ RSpec.describe QueueMoveCiBuildsMetadata, migration: :gitlab_ci, feature_category: :continuous_integration do let!(:batched_migration) { described_class::MIGRATION } + let(:pipelines_table) { table(:p_ci_pipelines, primary_key: :id, database: :ci) } + let(:builds_table) { table(:p_ci_builds, primary_key: :id, database: :ci) } + let(:builds_metadata_table) { table(:p_ci_builds_metadata, primary_key: :id, database: :ci) } + let(:pipeline_a) { pipelines_table.create!(partition_id: 100, project_id: 1) } + let(:pipeline_b) { pipelines_table.create!(partition_id: 101, project_id: 1) } + + let(:job_a) do + builds_table.create!(partition_id: pipeline_a.partition_id, project_id: 1, commit_id: pipeline_a.id) + end + + let(:job_b) do + builds_table.create!(partition_id: pipeline_b.partition_id, project_id: 1, commit_id: pipeline_b.id) + end + before do Ci::ApplicationRecord.connection.execute(<<~SQL) CREATE TABLE IF NOT EXISTS "gitlab_partitions_dynamic"."ci_builds_metadata_100" @@ -15,6 +29,9 @@ CREATE TABLE IF NOT EXISTS "gitlab_partitions_dynamic"."ci_builds_metadata_102" PARTITION OF "p_ci_builds_metadata" FOR VALUES IN (102); SQL + + builds_metadata_table.create!(partition_id: job_a.partition_id, project_id: 1, build_id: job_a.id) + builds_metadata_table.create!(partition_id: job_b.partition_id, project_id: 1, build_id: job_b.id) end it 'schedules new batched migrations' do @@ -42,7 +59,7 @@ sub_batch_size: described_class::SUB_BATCH_SIZE ) - expect(batched_migration).to have_scheduled_batched_migration( + expect(batched_migration).not_to have_scheduled_batched_migration( gitlab_schema: :gitlab_ci, table_name: :p_ci_builds_metadata, column_name: :id, -- GitLab From c924a60c160d4eb1d1862d90c3f6860395ac9798 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 13:23:02 +0200 Subject: [PATCH 12/17] Skip empty batches --- .../move_ci_builds_metadata.rb | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index d5937d25851960..7a3d4997a7ca79 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -42,6 +42,9 @@ def setup_definitions(available_metadata) end def update_jobs(available_metadata) + filters = available_metadata.pluck(:build_id, :partition_id) + return if filters.empty? + scoped_metadata_sql = <<~SQL.squish p_ci_builds.id = p_ci_builds_metadata.build_id AND p_ci_builds.partition_id = p_ci_builds_metadata.partition_id @@ -58,11 +61,14 @@ def update_jobs(available_metadata) job_model .where(scoped_metadata_sql) - .where([:id, :partition_id] => available_metadata.pluck(:build_id, :partition_id)) + .where([:id, :partition_id] => filters) .update_all(update_sql) end def update_job_artifacts(available_metadata) + filters = available_metadata.pluck(:build_id, :partition_id) + return if filters.empty? + scoped_metadata_sql = <<~SQL.squish p_ci_job_artifacts.job_id = p_ci_builds_metadata.build_id AND p_ci_job_artifacts.partition_id = p_ci_builds_metadata.partition_id @@ -89,7 +95,7 @@ def update_job_artifacts(available_metadata) job_artifact_model .where(file_type: 2) # metadata .where(scoped_metadata_sql) - .where([:job_id, :partition_id] => available_metadata.pluck(:build_id, :partition_id)) + .where([:job_id, :partition_id] => filters) .update_all(update_sql) end @@ -182,8 +188,11 @@ def find_or_create_definition_by(attrs) # rubocop:enable Performance/ActiveRecordSubtransactions def load_tags_for(metadata_records) + filters = metadata_records.pluck(:build_id, :partition_id) + return {} if filters.empty? + job_taggings_model - .where([:build_id, :partition_id] => metadata_records.pluck(:build_id, :partition_id)) + .where([:build_id, :partition_id] => filters) .joins('INNER JOIN tags ON tags.id = p_ci_build_tags.tag_id') .group(:build_id) .pluck(:build_id, Arel.sql('COALESCE(array_agg(tags.name ORDER BY tags.name), ARRAY[]::text[])')) @@ -191,6 +200,9 @@ def load_tags_for(metadata_records) end def load_run_steps_for(metadata_records) + filters = metadata_records.pluck(:build_id, :partition_id) + return {} if filters.empty? + join_sql = <<~SQL.squish INNER JOIN p_ci_builds_execution_configs ON p_ci_builds.execution_config_id = p_ci_builds_execution_configs.id @@ -198,7 +210,7 @@ def load_run_steps_for(metadata_records) SQL job_model - .where([:id, :partition_id] => metadata_records.pluck(:build_id, :partition_id)) + .where([:id, :partition_id] => filters) .joins(join_sql) .pluck(Arel.sql('p_ci_builds.id'), Arel.sql('p_ci_builds_execution_configs.run_steps')) .to_h -- GitLab From f010569f3d89d618c67a8875aa6f333bd4a6191b Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 16:10:41 +0200 Subject: [PATCH 13/17] Test migration on the first partition --- db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index c31c96d6247b57..c98fb83cd4a57b 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -14,6 +14,8 @@ def up partition_ids = partition.condition.scan(/\d+/).map(&:to_i) next if empty_partition?(partition_ids) + next unless partition_ids.include?(100) + queue_batched_background_migration( MIGRATION, :p_ci_builds_metadata, -- GitLab From 8ce93b786fef75bf11f048902726c65541cddb85 Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Mon, 20 Oct 2025 17:08:07 +0200 Subject: [PATCH 14/17] Test migration on the second partition --- db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index c98fb83cd4a57b..4023c5e1aebb61 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -14,7 +14,7 @@ def up partition_ids = partition.condition.scan(/\d+/).map(&:to_i) next if empty_partition?(partition_ids) - next unless partition_ids.include?(100) + next unless partition_ids.include?(101) queue_batched_background_migration( MIGRATION, -- GitLab From 4d6d104c496b4451814ea590397ef7b3a64dda4d Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Tue, 21 Oct 2025 10:25:26 +0200 Subject: [PATCH 15/17] Remove testing changes --- .../20251013133259_queue_move_ci_builds_metadata.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb index 4023c5e1aebb61..111fec32655e04 100644 --- a/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb +++ b/db/post_migrate/20251013133259_queue_move_ci_builds_metadata.rb @@ -6,16 +6,14 @@ class QueueMoveCiBuildsMetadata < Gitlab::Database::Migration[2.3] restrict_gitlab_migration gitlab_schema: :gitlab_ci MIGRATION = 'MoveCiBuildsMetadata' - BATCH_SIZE = 500 - SUB_BATCH_SIZE = 50 + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 def up Gitlab::Database::PostgresPartitionedTable.each_partition(:p_ci_builds_metadata) do |partition| partition_ids = partition.condition.scan(/\d+/).map(&:to_i) next if empty_partition?(partition_ids) - next unless partition_ids.include?(101) - queue_batched_background_migration( MIGRATION, :p_ci_builds_metadata, -- GitLab From b892082dc459c2e7fc8f07f70e4955b4e2e618ec Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Tue, 21 Oct 2025 10:45:41 +0200 Subject: [PATCH 16/17] Remove sub transactions --- .../background_migration/move_ci_builds_metadata.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/gitlab/background_migration/move_ci_builds_metadata.rb b/lib/gitlab/background_migration/move_ci_builds_metadata.rb index 7a3d4997a7ca79..5c10bf85b0dda8 100644 --- a/lib/gitlab/background_migration/move_ci_builds_metadata.rb +++ b/lib/gitlab/background_migration/move_ci_builds_metadata.rb @@ -174,18 +174,14 @@ def compute_checksum(config) Digest::SHA256.hexdigest(Gitlab::Json.dump(config)) end - # rubocop:disable Performance/ActiveRecordSubtransactions -- No longer a problem? - # rubocop:disable BackgroundMigration/AvoidSilentRescueExceptions -- specific exception def find_or_create_definition_by(attrs) - record = definition_model.find_by(attrs.slice(:project_id, :partition_id, :checksum)) + unique_attr_names = %i[project_id checksum partition_id] + record = definition_model.find_by(attrs.slice(*unique_attr_names)) return record if record.present? - definition_model.transaction(requires_new: true) { definition_model.create(attrs) } - rescue ActiveRecord::RecordNotUnique - definition_model.find_by!(attrs.slice(:project_id, :partition_id, :checksum)) + definition_model.insert_all([attrs], unique_by: unique_attr_names) + definition_model.find_by!(attrs.slice(*unique_attr_names)) end - # rubocop:enable BackgroundMigration/AvoidSilentRescueExceptions - # rubocop:enable Performance/ActiveRecordSubtransactions def load_tags_for(metadata_records) filters = metadata_records.pluck(:build_id, :partition_id) -- GitLab From 95bd656f756ab33f60e2d53c5979f09165915c3c Mon Sep 17 00:00:00 2001 From: Marius Bobin Date: Fri, 24 Oct 2025 10:06:00 +0200 Subject: [PATCH 17/17] Precreate partitions --- .../move_ci_builds_metadata_spec.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb index 94eae922508b66..eca77c0ba5f571 100644 --- a/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb +++ b/spec/lib/gitlab/background_migration/move_ci_builds_metadata_spec.rb @@ -84,6 +84,16 @@ let(:migration) { described_class.new(**migration_attrs) } + before do + Ci::ApplicationRecord.connection.execute(<<~SQL) + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_definitions_100 + PARTITION OF p_ci_job_definitions FOR VALUES IN (100); + + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_definition_instances_100 + PARTITION OF p_ci_job_definition_instances FOR VALUES IN (100); + SQL + end + describe '#perform', :aggregate_failures do it 'does not raise errors' do expect { migration.perform }.not_to raise_error -- GitLab