From 9542245533ed807d376f32a2e6476c7a9273af2c Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Mon, 9 Jun 2025 23:27:56 +0100 Subject: [PATCH 01/10] PoC: split build metadata into `job_prototype` and `job_info` tables --- app/models/ci/build.rb | 15 +- app/models/ci/build_metadata.rb | 47 +----- app/models/ci/job_info.rb | 40 +++++ app/models/ci/job_prototype.rb | 52 +++++++ app/models/ci/processable.rb | 14 +- app/models/concerns/ci/metadatable.rb | 140 ++++++++++++++++-- .../concerns/ci/partitionable/testing.rb | 2 + app/presenters/ci/build_metadata_presenter.rb | 2 + app/serializers/build_details_entity.rb | 2 +- app/serializers/build_metadata_entity.rb | 1 + .../feature_flags/wip/ci_job_prototypes.yml | 10 ++ config/initializers/postgres_partitioning.rb | 2 + db/docs/p_ci_job_infos.yml | 14 ++ db/docs/p_ci_job_prototypes.yml | 14 ++ ...250609131551_create_p_ci_job_prototypes.rb | 50 +++++++ ...609151252_add_prototype_id_to_ci_builds.rb | 35 +++++ .../20250609194050_create_p_ci_job_infos.rb | 54 +++++++ db/schema_migrations/20250609131551 | 1 + db/schema_migrations/20250609151252 | 1 + db/schema_migrations/20250609194050 | 1 + db/structure.sql | 48 ++++++ ee/app/models/concerns/ee/ci/metadatable.rb | 12 +- .../services/ci/register_job_service_spec.rb | 2 +- .../entities/ci/job_request/runner_info.rb | 2 +- lib/ci/job_token/jwt.rb | 2 +- lib/gitlab/ci/build/step.rb | 6 +- lib/gitlab/ci/jwt.rb | 2 +- lib/gitlab/ci/jwt_v2.rb | 2 +- lib/gitlab/ci/pipeline/seed/build.rb | 47 +++++- lib/gitlab/ci/pipeline/seed/context.rb | 8 + spec/lib/ci/job_token/jwt_spec.rb | 6 +- spec/lib/gitlab/ci/jwt_spec.rb | 4 +- spec/models/ci/build_metadata_spec.rb | 1 + .../api/ci/runner/jobs_request_post_spec.rb | 4 +- 34 files changed, 552 insertions(+), 91 deletions(-) create mode 100644 app/models/ci/job_info.rb create mode 100644 app/models/ci/job_prototype.rb create mode 100644 config/feature_flags/wip/ci_job_prototypes.yml create mode 100644 db/docs/p_ci_job_infos.yml create mode 100644 db/docs/p_ci_job_prototypes.yml create mode 100644 db/migrate/20250609131551_create_p_ci_job_prototypes.rb create mode 100644 db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb create mode 100644 db/migrate/20250609194050_create_p_ci_job_infos.rb create mode 100644 db/schema_migrations/20250609131551 create mode 100644 db/schema_migrations/20250609151252 create mode 100644 db/schema_migrations/20250609194050 diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 344dde3d046242..34ad4f899c0082 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -140,7 +140,6 @@ class Build < Ci::Processable delegate :google_play_integration, to: :project delegate :diffblue_cover_integration, to: :project delegate :ensure_persistent_ref, to: :pipeline - delegate :enable_debug_trace!, to: :metadata serialize :options # rubocop:disable Cop/ActiveRecordSerialize serialize :yaml_variables, coder: Gitlab::Serializer::Ci::Variables # rubocop:disable Cop/ActiveRecordSerialize @@ -246,6 +245,10 @@ def with_preloads preload(:job_artifacts_archive, :job_artifacts, :tags, project: [:namespace]) end + # TODO: need to propagate attributes for prototype_id and job_infos.* + # TODO: do the same for Ci::Bridge. + # TODO: the setter methods from metadatable will fail here! Need to explicitly + # propagate the metadata or prototype/info records. def clone_accessors %i[pipeline project ref tag options name allow_failure stage_idx @@ -346,7 +349,7 @@ def supported_keyset_orderings # rubocop:enable CodeReuse/ServiceClass # after_transition pending: :running do |build| - build.ensure_metadata.update_timeout_state + build.update_timeout_state end after_transition pending: :running do |build| @@ -447,12 +450,6 @@ def auto_retry_allowed? auto_retry.allowed? end - def exit_code=(value) - return unless value - - ensure_metadata.exit_code = value.to_i.clamp(0, Gitlab::Database::MAX_SMALLINT_VALUE) - end - def auto_retry_expected? failed? && auto_retry_allowed? end @@ -1097,7 +1094,7 @@ def max_test_cases_per_report def debug_mode? # perform the check on both sides in case the runner version is old - metadata&.debug_trace_enabled? || + debug_trace_enabled? || Gitlab::Utils.to_boolean(variables['CI_DEBUG_SERVICES']&.value, default: false) || Gitlab::Utils.to_boolean(variables['CI_DEBUG_TRACE']&.value, default: false) end diff --git a/app/models/ci/build_metadata.rb b/app/models/ci/build_metadata.rb index 2b15920d0f9182..1b68e3d6db6d01 100644 --- a/app/models/ci/build_metadata.rb +++ b/app/models/ci/build_metadata.rb @@ -45,20 +45,7 @@ class BuildMetadata < Ci::ApplicationRecord scope :with_interruptible, -> { where(interruptible: true) } scope :with_exposed_artifacts, -> { where(has_exposed_artifacts: true) } - enum :timeout_source, { - unknown_timeout_source: 1, - project_timeout_source: 2, - runner_timeout_source: 3, - job_timeout_source: 4 - } - - def update_timeout_state - timeout = timeout_with_highest_precedence - - return unless timeout - - update(timeout: timeout.value, timeout_source: timeout.source) - end + enum :timeout_source, ::Ci::JobInfo.timeout_sources def enable_debug_trace! self.debug_trace_enabled = true @@ -71,37 +58,5 @@ def enable_debug_trace! def set_build_project self.project_id ||= build.project_id end - - def timeout_with_highest_precedence - [(job_timeout || project_timeout), runner_timeout].compact.min_by(&:value) - end - - def project_timeout - strong_memoize(:project_timeout) do - BuildTimeout.new(project&.build_timeout, :project_timeout_source) - end - end - - def job_timeout - return unless build.options - - strong_memoize(:job_timeout) do - if timeout_from_options = build.options[:job_timeout] - BuildTimeout.new(timeout_from_options, :job_timeout_source) - end - end - end - - def runner_timeout - return unless runner_timeout_set? - - strong_memoize(:runner_timeout) do - BuildTimeout.new(build.runner.maximum_timeout, :runner_timeout_source) - end - end - - def runner_timeout_set? - build.runner&.maximum_timeout.to_i > 0 - end end end diff --git a/app/models/ci/job_info.rb b/app/models/ci/job_info.rb new file mode 100644 index 00000000000000..0db6b6bb0a27eb --- /dev/null +++ b/app/models/ci/job_info.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Ci + class JobInfo < Ci::ApplicationRecord + include Ci::Partitionable + include ChronicDurationAttribute + + self.table_name = :p_ci_job_infos + self.primary_key = :id + + # POC NOTES: copied from Pipeline. There should be 1 unique prototype per partition. + partitionable scope: :job, partitioned: true + query_constraints :id, :partition_id + + belongs_to :project + + belongs_to :job, + ->(info) { in_partition(info) }, + partition_foreign_key: :partition_id, + class_name: 'CommitStatus' + + validates :job, presence: true + + chronic_duration_attr_reader :timeout_human_readable, :timeout + + enum :timeout_source, { + unknown_timeout_source: 1, + project_timeout_source: 2, + runner_timeout_source: 3, + job_timeout_source: 4 + } + + scope :with_interruptible, -> { where(interruptible: true) } + + scope :scoped_job, -> do + where(arel_table[:job_id].eq(Ci::Build.arel_table[:id])) + .where(arel_table[:partition_id].eq(Ci::Build.arel_table[:partition_id])) + end + end +end diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb new file mode 100644 index 00000000000000..993b396e3bc6ef --- /dev/null +++ b/app/models/ci/job_prototype.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Ci + class JobPrototype < Ci::ApplicationRecord + include Ci::Partitionable + + self.table_name = :p_ci_job_prototypes + self.primary_key = :id + + # POC NOTES: copied from Pipeline. There should be 1 unique prototype per partition. + partitionable scope: ->(_) { Ci::Pipeline.current_partition_value }, partitioned: true + + # TODO: add schema that includes `options` and `id_tokens`, `secrets`, etc. + # validates :config, json_schema: { filename: 'job_prototype_config' } + + # TODO: add other validations + + attribute :config, ::Gitlab::Database::Type::SymbolizedJsonb.new + + def self.find_or_initialize_for_seed(partition_id:, project_id:, config:) + job_checksum = calculate_checksum_for_seed(config) + + finder_attributes = { + partition_id: partition_id, + project_id: project_id, + checksum: job_checksum + } + + prototype = find_by(**finder_attributes) + return prototype if prototype + + new(finder_attributes.merge(config: config)) + end + + # Used when creating a pipeline. Job datastructure does not match + # the datastructure used for persistence. + def self.calculate_checksum_for_seed(attrs) + # TODO: we need to filter out some attributes such as: + # enqueue_immediately, scoped_user_id, downstream_errors, environment (?) + attrs.hash + + # TODO: consider including a lot more data initially and scale back later if needed. + # E.g. `when`, `allow_failure`, `coverage_regex`, and other attributes outside `options` + # could be unique across many jobs. Later we could consider dropping columns from `p_ci_builds` + # while retaining the unique data. + end + + def enable_debug_trace! + update!(debug_trace_enabled: true) + end + end +end diff --git a/app/models/ci/processable.rb b/app/models/ci/processable.rb index 304fb01f6ecb4e..e5fd070f0e7048 100644 --- a/app/models/ci/processable.rb +++ b/app/models/ci/processable.rb @@ -36,14 +36,20 @@ class Processable < ::CommitStatus where('NOT EXISTS (?)', needs) end + # TODO: check if query performance remain acceptable scope :interruptible, -> do - joins(:metadata).merge(Ci::BuildMetadata.with_interruptible) + joins(:metadata, :job_info) + .merge(Ci::BuildMetadata.with_interruptible) + .or(Ci::JobInfo.with_interruptible) end + # TODO: check if query performance remain acceptable scope :not_interruptible, -> do - joins(:metadata).where.not( - Ci::BuildMetadata.table_name => { id: Ci::BuildMetadata.scoped_build.with_interruptible.select(:id) } - ) + joins(:metadata, :job_info) + .where.not( + Ci::BuildMetadata.table_name => { id: Ci::BuildMetadata.scoped_build.with_interruptible.select(Ci::BuildMetadata.arel_table[:id]) }) + .where.not( + Ci::JobInfo.table_name => { job_id: Ci::JobInfo.scoped_job.with_interruptible.select(Ci::JobInfo.arel_table[:job_id]) }) end state_machine :status do diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 5497d22afdb51e..92d5202ee289c9 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -16,14 +16,20 @@ module Metadatable partition_foreign_key: :partition_id, inverse_of: :build, autosave: true + + has_one :job_info, + ->(build) { where(partition_id: build.partition_id) }, + class_name: 'Ci::JobInfo', + foreign_key: :job_id, + partition_foreign_key: :partition_id, + inverse_of: :job, + autosave: true + + belongs_to :prototype, class_name: 'Ci::JobPrototype', autosave: true accepts_nested_attributes_for :metadata - delegate :timeout, to: :metadata, prefix: true, allow_nil: true - delegate :interruptible, to: :metadata, prefix: false, allow_nil: true delegate :environment_auto_stop_in, to: :metadata, prefix: false, allow_nil: true - delegate :id_tokens, to: :metadata, allow_nil: true - delegate :exit_code, to: :metadata, allow_nil: true before_validation :ensure_metadata, on: :create @@ -40,6 +46,28 @@ def ensure_metadata metadata || build_metadata(project: project) end + def timeout_value + if prototype.present? + job_info.timeout + else + metadata&.timeout + end + end + + def update_timeout_state + timeout = timeout_with_highest_precedence + + return unless timeout + + # If we have prototype data we are splitting p_ci_builds_metadata between prototype record + # and `p_ci_job_infos`. + if prototype.present? + job_info.update(timeout: timeout.value, timeout_source: timeout.source) + else + ensure_metadata.update(timeout: timeout.value, timeout_source: timeout.source) + end + end + def degenerated? self.options.blank? end @@ -53,17 +81,40 @@ def degenerate! end end + def exit_code + if prototype.present? + job_info&.exit_code + else + metadata&.exit_code + end + end + + def exit_code=(value) + return unless value + + safe_value = value.to_i.clamp(0, Gitlab::Database::MAX_SMALLINT_VALUE) + + if prototype.present? + job_info.exit_code = safe_value + else + ensure_metadata.exit_code = safe_value + end + end + def options - read_metadata_attribute(:options, :config_options, {}) + read_metadata_attribute(:options, :config_options, :options, {}) end def yaml_variables - read_metadata_attribute(:yaml_variables, :config_variables, []) + read_metadata_attribute(:yaml_variables, :config_variables, :yaml_variables, []) end def options=(value) + raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + write_metadata_attribute(:options, :config_options, value) + # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable ensure_metadata.tap do |metadata| # Store presence of exposed artifacts in build metadata to make it easier to query metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? @@ -72,25 +123,55 @@ def options=(value) end def yaml_variables=(value) - write_metadata_attribute(:yaml_variables, :config_variables, value) + raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + + write_metadata_attribute(:yaml_variables, :config_variables, :yaml_variables, value) end def interruptible - metadata&.interruptible + if prototype.present? + job_info&.interruptible + else + metadata&.interruptible + end end def interruptible=(value) + raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + ensure_metadata.interruptible = value end + def id_tokens + read_metadata_attribute(nil, :id_tokens, :id_tokens, {}) + end + def id_tokens? - metadata&.id_tokens.present? + id_tokens.present? end def id_tokens=(value) + raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + ensure_metadata.id_tokens = value end + def enable_debug_trace! + if prototype.present? + prototype.enable_debug_trace! + else + ensure_metadata.enable_debug_trace! + end + end + + def debug_trace_enabled? + if prototype.present? + prototype.debug_trace_enabled? + else + metadata&.debug_trace_enabled? + end + end + def enqueue_immediately? !!options[:enqueue_immediately] end @@ -103,14 +184,51 @@ def set_enqueue_immediately! private - def read_metadata_attribute(legacy_key, metadata_key, default_value = nil) - read_attribute(legacy_key) || metadata&.read_attribute(metadata_key) || default_value + def read_metadata_attribute(legacy_key, metadata_key, prototype_key, default_value = nil) + (legacy_key && read_attribute(legacy_key)) || + (prototype&.config && prototype.config[prototype_key]) || + metadata&.read_attribute(metadata_key) || + default_value end + # TODO: remove this after feature flag ci_job_prototypes is removed. def write_metadata_attribute(legacy_key, metadata_key, value) ensure_metadata.write_attribute(metadata_key, value) write_attribute(legacy_key, nil) end + + # ----- TIMEOUT METHODS ----- + def timeout_with_highest_precedence + [(job_timeout || project_timeout), runner_timeout].compact.min_by(&:value) + end + + def project_timeout + strong_memoize(:project_timeout) do + BuildTimeout.new(project&.build_timeout, :project_timeout_source) + end + end + + def job_timeout + return unless options + + strong_memoize(:job_timeout) do + if timeout_from_options = options[:job_timeout] + BuildTimeout.new(timeout_from_options, :job_timeout_source) + end + end + end + + def runner_timeout + return unless runner_timeout_set? + + strong_memoize(:runner_timeout) do + BuildTimeout.new(runner.maximum_timeout, :runner_timeout_source) + end + end + + def runner_timeout_set? + runner&.maximum_timeout.to_i > 0 + end end end diff --git a/app/models/concerns/ci/partitionable/testing.rb b/app/models/concerns/ci/partitionable/testing.rb index 284cf32d251953..7a41e69d4ffd07 100644 --- a/app/models/concerns/ci/partitionable/testing.rb +++ b/app/models/concerns/ci/partitionable/testing.rb @@ -22,6 +22,8 @@ module Testing Ci::JobAnnotation Ci::JobArtifact Ci::JobArtifactReport + Ci::JobInfo + Ci::JobPrototype Ci::JobVariable Ci::Pipeline Ci::PendingBuild diff --git a/app/presenters/ci/build_metadata_presenter.rb b/app/presenters/ci/build_metadata_presenter.rb index 7465e6daab045b..d3e2d09c11c5c7 100644 --- a/app/presenters/ci/build_metadata_presenter.rb +++ b/app/presenters/ci/build_metadata_presenter.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Ci + # TODO: this needs to be Ci::JobInfoPresenter that internally delegates to + # metadata when prototype doesn't exist yet. class BuildMetadataPresenter < Gitlab::View::Presenter::Delegated TIMEOUT_SOURCES = { unknown_timeout_source: nil, diff --git a/app/serializers/build_details_entity.rb b/app/serializers/build_details_entity.rb index df1d249ac894c9..af0097b61d46ad 100644 --- a/app/serializers/build_details_entity.rb +++ b/app/serializers/build_details_entity.rb @@ -8,7 +8,7 @@ class BuildDetailsEntity < Ci::JobEntity expose :stuck?, as: :stuck expose :user, using: UserEntity expose :runner, using: RunnerEntity - expose :metadata, using: BuildMetadataEntity + expose :metadata, using: BuildMetadataEntity # TODO: add Ci::JobInfoPresenter as abstraction expose :pipeline, using: Ci::PipelineEntity expose :deployment_status, if: ->(*) { build.deployment_job? } do diff --git a/app/serializers/build_metadata_entity.rb b/app/serializers/build_metadata_entity.rb index 6242ee8957d5c7..1d73c62512dd9e 100644 --- a/app/serializers/build_metadata_entity.rb +++ b/app/serializers/build_metadata_entity.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true class BuildMetadataEntity < Grape::Entity + # TODO: add Ci::JobInfoPresenter as abstraction expose :timeout_human_readable expose :timeout_source do |metadata| metadata.present.timeout_source diff --git a/config/feature_flags/wip/ci_job_prototypes.yml b/config/feature_flags/wip/ci_job_prototypes.yml new file mode 100644 index 00000000000000..e77480c66518dd --- /dev/null +++ b/config/feature_flags/wip/ci_job_prototypes.yml @@ -0,0 +1,10 @@ +--- +name: ci_job_prototypes +description: +feature_issue_url: +introduced_by_url: +rollout_issue_url: +milestone: '18.1' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index a26ca2cf2472b2..3f2903880f3318 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -25,6 +25,8 @@ Ci::JobAnnotation, Ci::JobArtifact, Ci::JobArtifactReport, + Ci::JobInfo, + Ci::JobPrototype, Ci::Pipeline, Ci::PipelineConfig, Ci::PipelineVariable, diff --git a/db/docs/p_ci_job_infos.yml b/db/docs/p_ci_job_infos.yml new file mode 100644 index 00000000000000..5be8969735f419 --- /dev/null +++ b/db/docs/p_ci_job_infos.yml @@ -0,0 +1,14 @@ +--- +table_name: p_ci_job_infos +classes: +- Ci::JobInfo +feature_categories: +- continuous_integration +description: Extra information about `p_ci_builds` table +introduced_by_url: +milestone: '18.1' +gitlab_schema: gitlab_ci +sharding_key: + project_id: projects +table_size: small +schema_inconsistencies: \ No newline at end of file diff --git a/db/docs/p_ci_job_prototypes.yml b/db/docs/p_ci_job_prototypes.yml new file mode 100644 index 00000000000000..f4765949ab6bea --- /dev/null +++ b/db/docs/p_ci_job_prototypes.yml @@ -0,0 +1,14 @@ +--- +table_name: p_ci_job_prototypes +classes: +- Ci::JobPrototype +feature_categories: +- continuous_integration +description: Unique job configurations across pipelines +introduced_by_url: +milestone: '18.1' +gitlab_schema: gitlab_ci +sharding_key: + project_id: projects +table_size: small +schema_inconsistencies: \ No newline at end of file diff --git a/db/migrate/20250609131551_create_p_ci_job_prototypes.rb b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb new file mode 100644 index 00000000000000..08ce1637302e96 --- /dev/null +++ b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CreatePCiJobPrototypes < Gitlab::Database::Migration[2.3] + include Gitlab::Database::PartitioningMigrationHelpers + + disable_ddl_transaction! + milestone '18.1' + + TABLE_NAME = :p_ci_job_prototypes + UNIQUE_INDEX = :index_ci_job_prototypes_on_partition_id_and_checksum + + def up + create_table TABLE_NAME, primary_key: [:id, :partition_id], options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| + t.bigserial :id, null: false + t.bigint :partition_id, null: false + t.bigint :project_id, null: false + # POC NOTES: I did not include interruptible, secrets, id_tokens as separate columns + # I first want to see if they are really needed long term and if they really need to + # be indexed. + # This includes also `yaml_variables` + t.jsonb :config, default: {}, null: false # renaming from `options` so we can more transparently see provenance of data + t.text :checksum, limit: 255, null: false + t.timestamps null: false + end + + add_concurrent_partitioned_index(TABLE_NAME, [:partition_id, :project_id, :checksum], unique: true, name: UNIQUE_INDEX) + + # TODO: need to do the proper partitioning steps + with_lock_retries do + connection.execute(<<~SQL) + LOCK TABLE ONLY #{TABLE_NAME} IN ACCESS EXCLUSIVE MODE; + SQL + + connection.execute(<<~SQL) + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_prototypes_100 + PARTITION OF #{TABLE_NAME} + FOR VALUES IN (100); + SQL + end + end + + def down + remove_concurrent_partitioned_index_by_name(TABLE_NAME, UNIQUE_INDEX) + + drop_table TABLE_NAME + end +end diff --git a/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb b/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb new file mode 100644 index 00000000000000..98dd2b624d7d91 --- /dev/null +++ b/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddPrototypeIdToCiBuilds < Gitlab::Database::Migration[2.3] + # When using the methods "add_concurrent_index" or "remove_concurrent_index" + # you must disable the use of transactions + # as these methods can not run in an existing transaction. + # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure + # that either of them is the _only_ method called in the migration, + # any other changes should go in a separate migration. + # This ensures that upon failure _only_ the index creation or removing fails + # and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + # disable_ddl_transaction! + # + # Configure the `gitlab_schema` to perform data manipulation (DML). + # Visit: https://docs.gitlab.com/ee/development/database/migrations_for_multiple_databases.html + # restrict_gitlab_migration gitlab_schema: :gitlab_main + + # Add dependent 'batched_background_migrations.queued_migration_version' values. + # DEPENDENT_BATCHED_BACKGROUND_MIGRATIONS = [] + milestone '18.1' + + def up + add_column :p_ci_builds, :prototype_id, :bigint + end + + def down + remove_column :p_ci_builds, :prototype_id + end +end diff --git a/db/migrate/20250609194050_create_p_ci_job_infos.rb b/db/migrate/20250609194050_create_p_ci_job_infos.rb new file mode 100644 index 00000000000000..b0a11cc73afb72 --- /dev/null +++ b/db/migrate/20250609194050_create_p_ci_job_infos.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CreatePCiJobInfos < Gitlab::Database::Migration[2.3] + # When using the methods "add_concurrent_index" or "remove_concurrent_index" + # you must disable the use of transactions + # as these methods can not run in an existing transaction. + # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure + # that either of them is the _only_ method called in the migration, + # any other changes should go in a separate migration. + # This ensures that upon failure _only_ the index creation or removing fails + # and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + disable_ddl_transaction! + milestone '18.1' + + TABLE_NAME = :p_ci_job_infos + + def up + # TODO: there may be more columns to add to this table. + create_table TABLE_NAME, primary_key: [:job_id, :partition_id], options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| + t.bigint :job_id, null: false + t.bigint :partition_id, null: false + t.bigint :project_id, null: false + t.boolean :interruptible, index: true + t.boolean :debug_trace_enabled + t.integer :timeout + t.integer :timeout_source, limit: 2 + t.integer :exit_code, limit: 2 + t.timestamps null: false + end + + # TODO: need to do the proper partitioning steps + with_lock_retries do + connection.execute(<<~SQL) + LOCK TABLE ONLY #{TABLE_NAME} IN ACCESS EXCLUSIVE MODE; + SQL + + connection.execute(<<~SQL) + CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_infos_100 + PARTITION OF #{TABLE_NAME} + FOR VALUES IN (100); + SQL + end + end + + def down + drop_table TABLE_NAME + end +end diff --git a/db/schema_migrations/20250609131551 b/db/schema_migrations/20250609131551 new file mode 100644 index 00000000000000..7d6abfaa8c193d --- /dev/null +++ b/db/schema_migrations/20250609131551 @@ -0,0 +1 @@ +b38064da0f3e9378d1479040b859fefe1b3ceac3be757fe9531ecf9568a3217a \ No newline at end of file diff --git a/db/schema_migrations/20250609151252 b/db/schema_migrations/20250609151252 new file mode 100644 index 00000000000000..8aba2761879122 --- /dev/null +++ b/db/schema_migrations/20250609151252 @@ -0,0 +1 @@ +674d381e0dc94c2a8d791c2ee5ed4bd333dfc3b68795251449ac19308152dc88 \ No newline at end of file diff --git a/db/schema_migrations/20250609194050 b/db/schema_migrations/20250609194050 new file mode 100644 index 00000000000000..02dd2da56c000f --- /dev/null +++ b/db/schema_migrations/20250609194050 @@ -0,0 +1 @@ +89137f98d833f3edfec262e8a6311bfcefccf31047d2ee94c28f7f057d663e04 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 1daf7885ab5744..a06d450447ff7d 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4648,6 +4648,7 @@ CREATE TABLE p_ci_builds ( user_id bigint, execution_config_id bigint, upstream_pipeline_partition_id bigint, + prototype_id bigint, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)), CONSTRAINT check_9aa9432137 CHECK ((project_id IS NOT NULL)) ) @@ -4728,6 +4729,32 @@ CREATE TABLE p_ci_job_artifacts ( ) PARTITION BY LIST (partition_id); +CREATE TABLE p_ci_job_infos ( + job_id bigint NOT NULL, + partition_id bigint NOT NULL, + project_id bigint NOT NULL, + interruptible boolean, + debug_trace_enabled boolean, + timeout integer, + timeout_source smallint, + exit_code smallint, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL +) +PARTITION BY LIST (partition_id); + +CREATE TABLE p_ci_job_prototypes ( + id bigint NOT NULL, + partition_id bigint NOT NULL, + project_id bigint NOT NULL, + config jsonb DEFAULT '{}'::jsonb NOT NULL, + checksum text NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + CONSTRAINT check_c43b283d76 CHECK ((char_length(checksum) <= 255)) +) +PARTITION BY LIST (partition_id); + CREATE TABLE p_ci_pipeline_variables ( key character varying NOT NULL, value text, @@ -18919,6 +18946,15 @@ CREATE SEQUENCE p_ci_job_annotations_id_seq ALTER SEQUENCE p_ci_job_annotations_id_seq OWNED BY p_ci_job_annotations.id; +CREATE SEQUENCE p_ci_job_prototypes_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE p_ci_job_prototypes_id_seq OWNED BY p_ci_job_prototypes.id; + CREATE SEQUENCE p_ci_workloads_id_seq START WITH 1 INCREMENT BY 1 @@ -27802,6 +27838,8 @@ ALTER TABLE ONLY p_catalog_resource_sync_events ALTER COLUMN id SET DEFAULT next ALTER TABLE ONLY p_ci_builds_metadata ALTER COLUMN id SET DEFAULT nextval('ci_builds_metadata_id_seq'::regclass); +ALTER TABLE ONLY p_ci_job_prototypes ALTER COLUMN id SET DEFAULT nextval('p_ci_job_prototypes_id_seq'::regclass); + ALTER TABLE ONLY p_ci_workloads ALTER COLUMN id SET DEFAULT nextval('p_ci_workloads_id_seq'::regclass); ALTER TABLE ONLY p_knowledge_graph_enabled_namespaces ALTER COLUMN id SET DEFAULT nextval('p_knowledge_graph_enabled_namespaces_id_seq'::regclass); @@ -30557,6 +30595,12 @@ ALTER TABLE ONLY p_ci_job_artifact_reports ALTER TABLE ONLY p_ci_job_artifacts ADD CONSTRAINT p_ci_job_artifacts_pkey PRIMARY KEY (id, partition_id); +ALTER TABLE ONLY p_ci_job_infos + ADD CONSTRAINT p_ci_job_infos_pkey PRIMARY KEY (job_id, partition_id); + +ALTER TABLE ONLY p_ci_job_prototypes + ADD CONSTRAINT p_ci_job_prototypes_pkey PRIMARY KEY (id, partition_id); + ALTER TABLE ONLY p_ci_pipeline_variables ADD CONSTRAINT p_ci_pipeline_variables_pkey PRIMARY KEY (id, partition_id); @@ -34470,6 +34514,8 @@ CREATE INDEX index_ci_instance_runner_monthly_usages_on_project_and_month ON ci_ CREATE UNIQUE INDEX index_ci_instance_variables_on_key ON ci_instance_variables USING btree (key); +CREATE UNIQUE INDEX index_ci_job_prototypes_on_partition_id_and_checksum ON ONLY p_ci_job_prototypes USING btree (partition_id, project_id, checksum); + CREATE INDEX index_ci_job_token_authorizations_on_origin_project_id ON ci_job_token_authorizations USING btree (origin_project_id); CREATE INDEX index_ci_job_token_group_scope_links_on_added_by_id ON ci_job_token_group_scope_links USING btree (added_by_id); @@ -36474,6 +36520,8 @@ CREATE INDEX index_p_ci_job_annotations_on_project_id ON ONLY p_ci_job_annotatio CREATE INDEX index_p_ci_job_artifact_reports_on_project_id ON ONLY p_ci_job_artifact_reports USING btree (project_id); +CREATE INDEX index_p_ci_job_infos_on_interruptible ON ONLY p_ci_job_infos USING btree (interruptible); + CREATE INDEX index_p_ci_pipeline_variables_on_project_id ON ONLY p_ci_pipeline_variables USING btree (project_id); CREATE INDEX index_p_ci_pipelines_config_on_project_id ON ONLY p_ci_pipelines_config USING btree (project_id); diff --git a/ee/app/models/concerns/ee/ci/metadatable.rb b/ee/app/models/concerns/ee/ci/metadatable.rb index 015f2c72d10bc3..caf339d58f0a9e 100644 --- a/ee/app/models/concerns/ee/ci/metadatable.rb +++ b/ee/app/models/concerns/ee/ci/metadatable.rb @@ -5,15 +5,21 @@ module Ci module Metadatable extend ActiveSupport::Concern - prepended do - delegate :secrets, to: :metadata, allow_nil: true + def secrets + if prototype.present? + prototype.config&.dig(:secrets) + else + metadata&.secrets + end end def secrets? - !!metadata&.secrets? + secrets.present? end def secrets=(value) + raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + ensure_metadata.secrets = value end end diff --git a/ee/spec/services/ci/register_job_service_spec.rb b/ee/spec/services/ci/register_job_service_spec.rb index c96af99035b010..156ad8bddc9ba0 100644 --- a/ee/spec/services/ci/register_job_service_spec.rb +++ b/ee/spec/services/ci/register_job_service_spec.rb @@ -259,7 +259,7 @@ allow_next_found_instance_of(Ci::Build) do |pending_build| expect(pending_build).to receive(:run!).ordered.and_call_original expect(pending_build).to receive(:ensure_metadata).ordered.and_return(stubbed_build_metadata) - expect(stubbed_build_metadata).to receive(:update_timeout_state).ordered + expect(pending_build).to receive(:update_timeout_state).ordered expect(pending_build).to receive(:job_jwt_variables).ordered.and_call_original end diff --git a/lib/api/entities/ci/job_request/runner_info.rb b/lib/api/entities/ci/job_request/runner_info.rb index 96336a1080eb1a..501873f0a9cfbf 100644 --- a/lib/api/entities/ci/job_request/runner_info.rb +++ b/lib/api/entities/ci/job_request/runner_info.rb @@ -5,7 +5,7 @@ module Entities module Ci module JobRequest class RunnerInfo < Grape::Entity - expose :metadata_timeout, as: :timeout + expose :timeout_value, as: :timeout expose :runner_session_url end end diff --git a/lib/ci/job_token/jwt.rb b/lib/ci/job_token/jwt.rb index 09a7c11e89b984..9bbd1aa89e7ce1 100644 --- a/lib/ci/job_token/jwt.rb +++ b/lib/ci/job_token/jwt.rb @@ -73,7 +73,7 @@ def subject_type end def expire_time(job) - ttl = [::JSONWebToken::Token::DEFAULT_EXPIRE_TIME, job.metadata_timeout.to_i].max + ttl = [::JSONWebToken::Token::DEFAULT_EXPIRE_TIME, job.timeout_value.to_i].max Time.current + ttl + LEEWAY end diff --git a/lib/gitlab/ci/build/step.rb b/lib/gitlab/ci/build/step.rb index 0fb87943a6f5a5..c4ddc8b34c3b37 100644 --- a/lib/gitlab/ci/build/step.rb +++ b/lib/gitlab/ci/build/step.rb @@ -15,7 +15,7 @@ class << self def from_commands(job) self.new(:script).tap do |step| step.script = job.options[:before_script].to_a + job.options[:script].to_a - step.timeout = job.metadata_timeout + step.timeout = job.timeout_value step.when = WHEN_ON_SUCCESS end end @@ -25,7 +25,7 @@ def from_release(job) self.new(:release).tap do |step| step.script = Gitlab::Ci::Build::Releaser.new(job: job).script - step.timeout = job.metadata_timeout + step.timeout = job.timeout_value step.when = WHEN_ON_SUCCESS end end @@ -36,7 +36,7 @@ def from_after_script(job) self.new(:after_script).tap do |step| step.script = after_script - step.timeout = job.metadata_timeout + step.timeout = job.timeout_value step.when = WHEN_ALWAYS step.allow_failure = true end diff --git a/lib/gitlab/ci/jwt.rb b/lib/gitlab/ci/jwt.rb index 67c34f744fd123..d46f8126cf86ce 100644 --- a/lib/gitlab/ci/jwt.rb +++ b/lib/gitlab/ci/jwt.rb @@ -7,7 +7,7 @@ class Jwt < JwtBase DEFAULT_EXPIRE_TIME = 60 * 5 def self.for_build(build) - self.new(build, ttl: build.metadata_timeout).encoded + self.new(build, ttl: build.timeout_value).encoded end def initialize(build, ttl:) diff --git a/lib/gitlab/ci/jwt_v2.rb b/lib/gitlab/ci/jwt_v2.rb index 0f601155e01e0d..c40150508b467a 100644 --- a/lib/gitlab/ci/jwt_v2.rb +++ b/lib/gitlab/ci/jwt_v2.rb @@ -12,7 +12,7 @@ class JwtV2 < Jwt def self.for_build( build, aud:, sub_components: [:project_path, :ref_type, :ref], target_audience: nil) - new(build, ttl: build.metadata_timeout, aud: aud, sub_components: sub_components, + new(build, ttl: build.timeout_value, aud: aud, sub_components: sub_components, target_audience: target_audience).encoded end diff --git a/lib/gitlab/ci/pipeline/seed/build.rb b/lib/gitlab/ci/pipeline/seed/build.rb index c47fbebca5fbed..d701f687d1b288 100644 --- a/lib/gitlab/ci/pipeline/seed/build.rb +++ b/lib/gitlab/ci/pipeline/seed/build.rb @@ -65,7 +65,7 @@ def errors strong_memoize_attr :errors def attributes - @seed_attributes + attrs = @seed_attributes .deep_merge(pipeline_attributes) .deep_merge(rules_attributes) .deep_merge(allow_failure_criteria_attributes) @@ -73,7 +73,10 @@ def attributes .deep_merge(runner_tags) .deep_merge(build_execution_config_attribute) .deep_merge(scoped_user_id_attribute) - .except(:stage) + + attrs = add_prototype_association(attrs) + attrs = add_job_info_association(attrs) + attrs.except(:stage) end def bridge? @@ -229,6 +232,46 @@ def calculate_yaml_variables! from: @context.root_variables, to: @job_variables, inheritance: @root_variables_inheritance ) end + + # TODO: this currently fires N+1 queries when checking for existing prototypes. + # We should eventually look into querying prototypes in bulk. + def add_prototype_association(attrs) + return attrs if Feature.disabled?(:ci_job_prototypes) + + # POC NOTES: we delete these because we want this data to move to new home + yaml_variables = attrs.delete(:yaml_variables) + options = attrs.delete(:options) + id_tokens = attrs.delete(:id_tokens) + secrets = attrs.delete(:secrets) + + # TODO: need to decide on a structure and stick to it long term + # we can consider adding a version number for each record in the future. + config = { + options: options, + id_tokens: id_tokens, + yaml_variables: yaml_variables, + secrets: secrets + } + + attrs.merge( + prototype: @context.find_or_build_prototype( + partition_id: attrs.fetch(:partition_id), + project_id: attrs.fetch(:project).id, + config: config) + ) + end + + def add_job_info_association(attrs) + return attrs if Feature.disabled?(:ci_job_prototypes) + + interruptible = attrs.delete(:interruptible) + attrs.merge( + job_info: Ci::JobInfo.new( + partition_id: attrs.fetch(:partition_id), + project_id: attrs.fetch(:project).id, + interruptible: interruptible) + ) + end end end end diff --git a/lib/gitlab/ci/pipeline/seed/context.rb b/lib/gitlab/ci/pipeline/seed/context.rb index 0c2a312e87051c..cf6f137ce18d3d 100644 --- a/lib/gitlab/ci/pipeline/seed/context.rb +++ b/lib/gitlab/ci/pipeline/seed/context.rb @@ -12,6 +12,7 @@ def initialize(pipeline, root_variables: [], logger: nil) @root_variables = root_variables @logger = logger || build_logger @execution_configs = {} + @prototypes = {} end def find_or_build_execution_config(attributes) @@ -23,6 +24,13 @@ def find_or_build_execution_config(attributes) ) end + def find_or_build_prototype(partition_id:, project_id:, config:) + @prototypes[config] ||= ::Ci::JobPrototype.find_or_initialize_for_seed( + partition_id: partition_id, + project_id: project_id, + config: config) + end + private def build_logger diff --git a/spec/lib/ci/job_token/jwt_spec.rb b/spec/lib/ci/job_token/jwt_spec.rb index da82a9496595c3..1fb4d749daf8b2 100644 --- a/spec/lib/ci/job_token/jwt_spec.rb +++ b/spec/lib/ci/job_token/jwt_spec.rb @@ -128,16 +128,16 @@ it 'returns expiration time with leeway' do freeze_time do - allow(job).to receive(:metadata_timeout).and_return(2.hours) + allow(job).to receive(:timeout_value).and_return(2.hours) expected_time = Time.current + 2.hours + described_class::LEEWAY expect(expire_time).to eq(expected_time) end end - it 'uses default expire time when metadata_timeout is smaller' do + it 'uses default expire time when timeout_value is smaller' do freeze_time do - allow(job).to receive(:metadata_timeout).and_return(1.minute) + allow(job).to receive(:timeout_value).and_return(1.minute) expected_time = Time.current + ::JSONWebToken::Token::DEFAULT_EXPIRE_TIME + diff --git a/spec/lib/gitlab/ci/jwt_spec.rb b/spec/lib/gitlab/ci/jwt_spec.rb index 20eac17c6bfb18..74a19d4152bdda 100644 --- a/spec/lib/gitlab/ci/jwt_spec.rb +++ b/spec/lib/gitlab/ci/jwt_spec.rb @@ -244,7 +244,7 @@ end it 'generates JWT for the given job with ttl equal to build timeout' do - expect(build).to receive(:metadata_timeout).and_return(3_600) + expect(build).to receive(:timeout_value).and_return(3_600) payload, _headers = JWT.decode(jwt, rsa_key.public_key, true, { algorithm: 'RS256' }) ttl = payload["exp"] - payload["iat"] @@ -253,7 +253,7 @@ end it 'generates JWT for the given job with default ttl if build timeout is not set' do - expect(build).to receive(:metadata_timeout).and_return(nil) + expect(build).to receive(:timeout_value).and_return(nil) payload, _headers = JWT.decode(jwt, rsa_key.public_key, true, { algorithm: 'RS256' }) ttl = payload["exp"] - payload["iat"] diff --git a/spec/models/ci/build_metadata_spec.rb b/spec/models/ci/build_metadata_spec.rb index 85fa6af6fad14e..ece1f83e4cc9b8 100644 --- a/spec/models/ci/build_metadata_spec.rb +++ b/spec/models/ci/build_metadata_spec.rb @@ -26,6 +26,7 @@ it { is_expected.to belong_to(:build) } it { is_expected.to belong_to(:project) } + # TODO: move to metadatable concern specs describe '#update_timeout_state' do subject { metadata } diff --git a/spec/requests/api/ci/runner/jobs_request_post_spec.rb b/spec/requests/api/ci/runner/jobs_request_post_spec.rb index 9f2895afc9eaa6..c2c8de89e39d18 100644 --- a/spec/requests/api/ci/runner/jobs_request_post_spec.rb +++ b/spec/requests/api/ci/runner/jobs_request_post_spec.rb @@ -217,12 +217,12 @@ let(:expected_steps) do [{ 'name' => 'script', 'script' => %w[echo], - 'timeout' => job.metadata_timeout, + 'timeout' => job.timeout_value, 'when' => 'on_success', 'allow_failure' => false }, { 'name' => 'after_script', 'script' => %w[ls date], - 'timeout' => job.metadata_timeout, + 'timeout' => job.timeout_value, 'when' => 'always', 'allow_failure' => true }] end -- GitLab From c505c62854f01e48142bb510909b8e7ff8bc15fe Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Tue, 10 Jun 2025 21:26:29 +0100 Subject: [PATCH 02/10] fix Rubocop offenses --- app/models/ci/build.rb | 2 +- app/models/ci/processable.rb | 8 ++++-- app/models/concerns/ci/metadatable.rb | 24 ++++++++---------- ...250609131551_create_p_ci_job_prototypes.rb | 12 ++++++--- ...609151252_add_prototype_id_to_ci_builds.rb | 25 ++----------------- .../20250609194050_create_p_ci_job_infos.rb | 5 ++-- lib/gitlab/ci/pipeline/seed/build.rb | 4 +-- 7 files changed, 33 insertions(+), 47 deletions(-) diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 34ad4f899c0082..952fe2ed34e9a5 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -348,7 +348,7 @@ def supported_keyset_orderings # rubocop:enable CodeReuse/ServiceClass # - after_transition pending: :running do |build| + after_transition pending: :running do |build| # rubocop:disable Sytle/SymbolProc -- reason: old code build.update_timeout_state end diff --git a/app/models/ci/processable.rb b/app/models/ci/processable.rb index e5fd070f0e7048..8e5d85514c4c5c 100644 --- a/app/models/ci/processable.rb +++ b/app/models/ci/processable.rb @@ -47,9 +47,13 @@ class Processable < ::CommitStatus scope :not_interruptible, -> do joins(:metadata, :job_info) .where.not( - Ci::BuildMetadata.table_name => { id: Ci::BuildMetadata.scoped_build.with_interruptible.select(Ci::BuildMetadata.arel_table[:id]) }) + Ci::BuildMetadata.table_name => { + id: Ci::BuildMetadata.scoped_build.with_interruptible.select(Ci::BuildMetadata.arel_table[:id]) + }) .where.not( - Ci::JobInfo.table_name => { job_id: Ci::JobInfo.scoped_job.with_interruptible.select(Ci::JobInfo.arel_table[:job_id]) }) + Ci::JobInfo.table_name => { + job_id: Ci::JobInfo.scoped_job.with_interruptible.select(Ci::JobInfo.arel_table[:job_id]) + }) end state_machine :status do diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 92d5202ee289c9..afa60650312ea5 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -16,7 +16,7 @@ module Metadatable partition_foreign_key: :partition_id, inverse_of: :build, autosave: true - + has_one :job_info, ->(build) { where(partition_id: build.partition_id) }, class_name: 'Ci::JobInfo', @@ -199,32 +199,30 @@ def write_metadata_attribute(legacy_key, metadata_key, value) # ----- TIMEOUT METHODS ----- def timeout_with_highest_precedence - [(job_timeout || project_timeout), runner_timeout].compact.min_by(&:value) + [job_timeout || project_timeout, runner_timeout].compact.min_by(&:value) end def project_timeout - strong_memoize(:project_timeout) do - BuildTimeout.new(project&.build_timeout, :project_timeout_source) - end + BuildTimeout.new(project&.build_timeout, :project_timeout_source) end + strong_memoize_attr :project_timeout def job_timeout return unless options - strong_memoize(:job_timeout) do - if timeout_from_options = options[:job_timeout] - BuildTimeout.new(timeout_from_options, :job_timeout_source) - end - end + timeout_from_options = options[:job_timeout] + return unless timeout_from_options + + BuildTimeout.new(timeout_from_options, :job_timeout_source) if timeout_from_options end + strong_memoize_attr :job_timeout def runner_timeout return unless runner_timeout_set? - strong_memoize(:runner_timeout) do - BuildTimeout.new(runner.maximum_timeout, :runner_timeout_source) - end + BuildTimeout.new(runner.maximum_timeout, :runner_timeout_source) end + strong_memoize_attr :runner_timeout def runner_timeout_set? runner&.maximum_timeout.to_i > 0 diff --git a/db/migrate/20250609131551_create_p_ci_job_prototypes.rb b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb index 08ce1637302e96..650dcfa618e053 100644 --- a/db/migrate/20250609131551_create_p_ci_job_prototypes.rb +++ b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb @@ -13,7 +13,8 @@ class CreatePCiJobPrototypes < Gitlab::Database::Migration[2.3] UNIQUE_INDEX = :index_ci_job_prototypes_on_partition_id_and_checksum def up - create_table TABLE_NAME, primary_key: [:id, :partition_id], options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| + create_table TABLE_NAME, primary_key: [:id, :partition_id], + options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| t.bigserial :id, null: false t.bigint :partition_id, null: false t.bigint :project_id, null: false @@ -21,12 +22,15 @@ def up # I first want to see if they are really needed long term and if they really need to # be indexed. # This includes also `yaml_variables` - t.jsonb :config, default: {}, null: false # renaming from `options` so we can more transparently see provenance of data + + # renaming from `options` so we can more transparently see provenance of data + t.jsonb :config, default: {}, null: false t.text :checksum, limit: 255, null: false - t.timestamps null: false + t.timestamps_with_timezone null: false end - add_concurrent_partitioned_index(TABLE_NAME, [:partition_id, :project_id, :checksum], unique: true, name: UNIQUE_INDEX) + add_concurrent_partitioned_index( + TABLE_NAME, [:partition_id, :project_id, :checksum], unique: true, name: UNIQUE_INDEX) # TODO: need to do the proper partitioning steps with_lock_retries do diff --git a/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb b/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb index 98dd2b624d7d91..aee2753c907cf5 100644 --- a/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb +++ b/db/migrate/20250609151252_add_prototype_id_to_ci_builds.rb @@ -1,32 +1,11 @@ # frozen_string_literal: true -# See https://docs.gitlab.com/ee/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - class AddPrototypeIdToCiBuilds < Gitlab::Database::Migration[2.3] - # When using the methods "add_concurrent_index" or "remove_concurrent_index" - # you must disable the use of transactions - # as these methods can not run in an existing transaction. - # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure - # that either of them is the _only_ method called in the migration, - # any other changes should go in a separate migration. - # This ensures that upon failure _only_ the index creation or removing fails - # and can be retried or reverted easily. - # - # To disable transactions uncomment the following line and remove these - # comments: - # disable_ddl_transaction! - # - # Configure the `gitlab_schema` to perform data manipulation (DML). - # Visit: https://docs.gitlab.com/ee/development/database/migrations_for_multiple_databases.html - # restrict_gitlab_migration gitlab_schema: :gitlab_main - - # Add dependent 'batched_background_migrations.queued_migration_version' values. - # DEPENDENT_BATCHED_BACKGROUND_MIGRATIONS = [] milestone '18.1' def up - add_column :p_ci_builds, :prototype_id, :bigint + # TODO: create association table + add_column :p_ci_builds, :prototype_id, :bigint # rubocop:disable Migration/PreventAddingColumns -- PoC: we will create association table end def down diff --git a/db/migrate/20250609194050_create_p_ci_job_infos.rb b/db/migrate/20250609194050_create_p_ci_job_infos.rb index b0a11cc73afb72..c945b730fb49b0 100644 --- a/db/migrate/20250609194050_create_p_ci_job_infos.rb +++ b/db/migrate/20250609194050_create_p_ci_job_infos.rb @@ -22,7 +22,8 @@ class CreatePCiJobInfos < Gitlab::Database::Migration[2.3] def up # TODO: there may be more columns to add to this table. - create_table TABLE_NAME, primary_key: [:job_id, :partition_id], options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| + create_table TABLE_NAME, primary_key: [:job_id, :partition_id], + options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| t.bigint :job_id, null: false t.bigint :partition_id, null: false t.bigint :project_id, null: false @@ -31,7 +32,7 @@ def up t.integer :timeout t.integer :timeout_source, limit: 2 t.integer :exit_code, limit: 2 - t.timestamps null: false + t.timestamps_with_timezone null: false end # TODO: need to do the proper partitioning steps diff --git a/lib/gitlab/ci/pipeline/seed/build.rb b/lib/gitlab/ci/pipeline/seed/build.rb index d701f687d1b288..6055548f3c7481 100644 --- a/lib/gitlab/ci/pipeline/seed/build.rb +++ b/lib/gitlab/ci/pipeline/seed/build.rb @@ -236,7 +236,7 @@ def calculate_yaml_variables! # TODO: this currently fires N+1 queries when checking for existing prototypes. # We should eventually look into querying prototypes in bulk. def add_prototype_association(attrs) - return attrs if Feature.disabled?(:ci_job_prototypes) + return attrs if Feature.disabled?(:ci_job_prototypes, @pipeline.project) # POC NOTES: we delete these because we want this data to move to new home yaml_variables = attrs.delete(:yaml_variables) @@ -262,7 +262,7 @@ def add_prototype_association(attrs) end def add_job_info_association(attrs) - return attrs if Feature.disabled?(:ci_job_prototypes) + return attrs if Feature.disabled?(:ci_job_prototypes, @pipeline.project) interruptible = attrs.delete(:interruptible) attrs.merge( -- GitLab From c3351a5ff8bbcddc7d5380885a6b1df641fa155b Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Tue, 10 Jun 2025 22:33:19 +0100 Subject: [PATCH 03/10] enable setters for options and yaml_variables --- app/models/ci/job_info.rb | 8 +++++- app/models/ci/job_prototype.rb | 20 ++++++++++++--- app/models/concerns/ci/metadatable.rb | 37 +++++++++++++++++---------- spec/factories/ci/job_prototypes.rb | 7 +++++ 4 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 spec/factories/ci/job_prototypes.rb diff --git a/app/models/ci/job_info.rb b/app/models/ci/job_info.rb index 0db6b6bb0a27eb..a155ce67e9ed28 100644 --- a/app/models/ci/job_info.rb +++ b/app/models/ci/job_info.rb @@ -17,7 +17,9 @@ class JobInfo < Ci::ApplicationRecord belongs_to :job, ->(info) { in_partition(info) }, partition_foreign_key: :partition_id, - class_name: 'CommitStatus' + class_name: 'CommitStatus', + inverse_of: :job_info, + optional: true validates :job, presence: true @@ -36,5 +38,9 @@ class JobInfo < Ci::ApplicationRecord where(arel_table[:job_id].eq(Ci::Build.arel_table[:id])) .where(arel_table[:partition_id].eq(Ci::Build.arel_table[:partition_id])) end + + def enable_debug_trace! + update!(debug_trace_enabled: true) + end end end diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb index 993b396e3bc6ef..a9ff6dd474a0ac 100644 --- a/app/models/ci/job_prototype.rb +++ b/app/models/ci/job_prototype.rb @@ -37,7 +37,8 @@ def self.find_or_initialize_for_seed(partition_id:, project_id:, config:) def self.calculate_checksum_for_seed(attrs) # TODO: we need to filter out some attributes such as: # enqueue_immediately, scoped_user_id, downstream_errors, environment (?) - attrs.hash + # TODO: Use SHA256 digest and convert to json before conversion. + attrs.hash.to_s # TODO: consider including a lot more data initially and scale back later if needed. # E.g. `when`, `allow_failure`, `coverage_regex`, and other attributes outside `options` @@ -45,8 +46,21 @@ def self.calculate_checksum_for_seed(attrs) # while retaining the unique data. end - def enable_debug_trace! - update!(debug_trace_enabled: true) + def options=(value) + update_config(options: value) + end + + def yaml_variables=(value) + update_config(yaml_variables: value) + end + + private + + def update_config(attrs) + # TODO: this should eventually raise an exception. Allowing only because the specs + # contain a lot of factory code that uses the setter methods. + self.config = (config || {}).merge(attrs) + self.checksum = self.class.calculate_checksum_for_seed(config) end end end diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index afa60650312ea5..89961f2f5ae919 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -7,6 +7,7 @@ module Ci # module Metadatable extend ActiveSupport::Concern + include Gitlab::Utils::StrongMemoize included do has_one :metadata, @@ -110,22 +111,26 @@ def yaml_variables end def options=(value) - raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) - - write_metadata_attribute(:options, :config_options, value) - - # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable - ensure_metadata.tap do |metadata| - # Store presence of exposed artifacts in build metadata to make it easier to query - metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? - metadata.environment_auto_stop_in = value&.dig(:environment, :auto_stop_in) + if Feature.enabled?(:ci_job_prototypes, project) + ensure_prototype.options = value + else + write_metadata_attribute(:options, :config_options, value) + + # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable + ensure_metadata.tap do |metadata| + # Store presence of exposed artifacts in build metadata to make it easier to query + metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? + metadata.environment_auto_stop_in = value&.dig(:environment, :auto_stop_in) + end end end def yaml_variables=(value) - raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) - - write_metadata_attribute(:yaml_variables, :config_variables, :yaml_variables, value) + if Feature.enabled?(:ci_job_prototypes, project) + ensure_prototype.yaml_variables = value + else + write_metadata_attribute(:yaml_variables, :config_variables, value) + end end def interruptible @@ -158,7 +163,7 @@ def id_tokens=(value) def enable_debug_trace! if prototype.present? - prototype.enable_debug_trace! + job_info.enable_debug_trace! else ensure_metadata.enable_debug_trace! end @@ -166,7 +171,7 @@ def enable_debug_trace! def debug_trace_enabled? if prototype.present? - prototype.debug_trace_enabled? + job_info.debug_trace_enabled? else metadata&.debug_trace_enabled? end @@ -227,6 +232,10 @@ def runner_timeout def runner_timeout_set? runner&.maximum_timeout.to_i > 0 end + + def ensure_prototype + prototype || build_prototype(project_id: project_id, partition_id: partition_id) + end end end diff --git a/spec/factories/ci/job_prototypes.rb b/spec/factories/ci/job_prototypes.rb new file mode 100644 index 00000000000000..6ca471e5071f10 --- /dev/null +++ b/spec/factories/ci/job_prototypes.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :ci_job_prototype, class: 'Ci::JobPrototype' do + # TODO: fill this in and link from `ci_processable` factories. + end +end -- GitLab From 30ad6886e16f9dfab19b95a7ca61f15da4a46ce9 Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Wed, 11 Jun 2025 00:15:39 +0100 Subject: [PATCH 04/10] Fix factories --- app/models/ci/build.rb | 5 ---- app/models/ci/build_metadata.rb | 1 - app/models/ci/job_info.rb | 7 +++++ app/models/ci/job_prototype.rb | 12 ++++---- app/models/ci/pipeline.rb | 2 +- app/models/concerns/ci/metadatable.rb | 28 ++++++++++++++----- .../ci/find_exposed_artifacts_service.rb | 2 +- .../20250609194050_create_p_ci_job_infos.rb | 1 + db/structure.sql | 5 ++-- spec/factories/ci/job_prototypes.rb | 17 ++++++++++- spec/factories/ci/processable.rb | 7 +++++ 11 files changed, 63 insertions(+), 24 deletions(-) diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 952fe2ed34e9a5..5cabb7151bdd73 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -168,11 +168,6 @@ class Build < Ci::Processable ) end - scope :with_exposed_artifacts, -> do - joins(:metadata).merge(Ci::BuildMetadata.with_exposed_artifacts) - .includes(:metadata, :job_artifacts_metadata) - end - scope :with_artifacts_not_expired, -> { with_downloadable_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.current) } scope :with_pipeline_locked_artifacts, -> { joins(:pipeline).where('pipeline.locked': Ci::Pipeline.lockeds[:artifacts_locked]) } scope :last_month, -> { where('created_at > ?', Date.today - 1.month) } diff --git a/app/models/ci/build_metadata.rb b/app/models/ci/build_metadata.rb index 1b68e3d6db6d01..8bf43c11fbb680 100644 --- a/app/models/ci/build_metadata.rb +++ b/app/models/ci/build_metadata.rb @@ -43,7 +43,6 @@ class BuildMetadata < Ci::ApplicationRecord end scope :with_interruptible, -> { where(interruptible: true) } - scope :with_exposed_artifacts, -> { where(has_exposed_artifacts: true) } enum :timeout_source, ::Ci::JobInfo.timeout_sources diff --git a/app/models/ci/job_info.rb b/app/models/ci/job_info.rb index a155ce67e9ed28..b081bfefe80b4b 100644 --- a/app/models/ci/job_info.rb +++ b/app/models/ci/job_info.rb @@ -42,5 +42,12 @@ class JobInfo < Ci::ApplicationRecord def enable_debug_trace! update!(debug_trace_enabled: true) end + + # POC NOTES: I'm not convinced about this being in this class because it's processing data + # An alternative would be to store it in `p_ci_builds_metadata` and archive it later but the overhead + # of keeping `p_ci_builds_metadata` (even if with reduced data) may not still justify that. + def set_enqueue_immediately! + update!(enqueue_immediately: true) + end end end diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb index a9ff6dd474a0ac..a2e7a4c26d0398 100644 --- a/app/models/ci/job_prototype.rb +++ b/app/models/ci/job_prototype.rb @@ -46,13 +46,13 @@ def self.calculate_checksum_for_seed(attrs) # while retaining the unique data. end - def options=(value) - update_config(options: value) - end + # def options=(value) + # update_config(options: value) + # end - def yaml_variables=(value) - update_config(yaml_variables: value) - end + # def yaml_variables=(value) + # update_config(yaml_variables: value) + # end private diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index b4e698f95b73f2..51efb09667bc1d 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -1256,7 +1256,7 @@ def has_archive_artifacts? end def has_exposed_artifacts? - complete? && builds.latest.with_exposed_artifacts.exists? + complete? && builds.latest.select_with_exposed_artifacts.any? end def has_erasable_artifacts? diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 89961f2f5ae919..1bfbd8b03b6fbd 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -37,10 +37,18 @@ module Metadatable scope :with_project_and_metadata, -> do joins(:metadata).includes(:metadata).preload(:project) end + + def self.select_with_exposed_artifacts + includes(:metadata, :prototype).select(&:has_exposed_artifacts?) + end end def has_exposed_artifacts? - !!metadata&.has_exposed_artifacts? + if prototype.present? + options.dig(:artifacts, :expose_as).present? + else + !!metadata&.has_exposed_artifacts? + end end def ensure_metadata @@ -112,7 +120,7 @@ def yaml_variables def options=(value) if Feature.enabled?(:ci_job_prototypes, project) - ensure_prototype.options = value + # ensure_prototype.options = value else write_metadata_attribute(:options, :config_options, value) @@ -127,7 +135,7 @@ def options=(value) def yaml_variables=(value) if Feature.enabled?(:ci_job_prototypes, project) - ensure_prototype.yaml_variables = value + # ensure_prototype.yaml_variables = value else write_metadata_attribute(:yaml_variables, :config_variables, value) end @@ -178,13 +186,19 @@ def debug_trace_enabled? end def enqueue_immediately? - !!options[:enqueue_immediately] + return job_info.enqueue_immediately? if job_info.present? + + !!options[:enqueue_immediately] # existing data persisted in metadata end def set_enqueue_immediately! - # ensures that even if `config_options: nil` in the database we set the - # new value correctly. - self.options = options.merge(enqueue_immediately: true) + if prototype.present? + job_info.set_enqueue_immediately! + else + # ensures that even if `config_options: nil` in the database we set the + # new value correctly. + self.options = options.merge(enqueue_immediately: true) + end end private diff --git a/app/services/ci/find_exposed_artifacts_service.rb b/app/services/ci/find_exposed_artifacts_service.rb index abbeb101be2db3..eb26bfe7722265 100644 --- a/app/services/ci/find_exposed_artifacts_service.rb +++ b/app/services/ci/find_exposed_artifacts_service.rb @@ -15,7 +15,7 @@ class FindExposedArtifactsService < ::BaseService def for_pipeline(pipeline, limit: MAX_EXPOSED_ARTIFACTS) results = [] - pipeline.builds.latest.with_exposed_artifacts.find_each do |job| + pipeline.builds.latest.select_with_exposed_artifacts.each do |job| if job_exposed_artifacts = for_job(job) results << job_exposed_artifacts end diff --git a/db/migrate/20250609194050_create_p_ci_job_infos.rb b/db/migrate/20250609194050_create_p_ci_job_infos.rb index c945b730fb49b0..c53352f9bcd6f3 100644 --- a/db/migrate/20250609194050_create_p_ci_job_infos.rb +++ b/db/migrate/20250609194050_create_p_ci_job_infos.rb @@ -29,6 +29,7 @@ def up t.bigint :project_id, null: false t.boolean :interruptible, index: true t.boolean :debug_trace_enabled + t.boolean :enqueue_immediately t.integer :timeout t.integer :timeout_source, limit: 2 t.integer :exit_code, limit: 2 diff --git a/db/structure.sql b/db/structure.sql index a06d450447ff7d..cd2f27f4090d6f 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4735,11 +4735,12 @@ CREATE TABLE p_ci_job_infos ( project_id bigint NOT NULL, interruptible boolean, debug_trace_enabled boolean, + enqueue_immediately boolean, timeout integer, timeout_source smallint, exit_code smallint, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL ) PARTITION BY LIST (partition_id); diff --git a/spec/factories/ci/job_prototypes.rb b/spec/factories/ci/job_prototypes.rb index 6ca471e5071f10..07f90f7eb8db4a 100644 --- a/spec/factories/ci/job_prototypes.rb +++ b/spec/factories/ci/job_prototypes.rb @@ -2,6 +2,21 @@ FactoryBot.define do factory :ci_job_prototype, class: 'Ci::JobPrototype' do - # TODO: fill this in and link from `ci_processable` factories. + # config do + # { + # options: { + # image: 'image:1.0', + # services: ['postgres'], + # script: ['ls -a'] + # }, + # yaml_variables: [ + # { key: 'DB_NAME', value: 'postgres', public: true } + # ] + # } + # end + + # checksum do + # ::Ci::JobPrototype.calculate_checksum_for_seed(config) + # end end end diff --git a/spec/factories/ci/processable.rb b/spec/factories/ci/processable.rb index d5a149b670cee0..976dac061fd30e 100644 --- a/spec/factories/ci/processable.rb +++ b/spec/factories/ci/processable.rb @@ -11,6 +11,13 @@ scheduling_type { 'stage' } partition_id { pipeline.partition_id } + prototype do + ::Ci::JobPrototype.find_or_initialize_for_seed( + partition_id: partition_id, + project_id: project.id, + config: { options: options, yaml_variables: yaml_variables }) + end + options do {} end -- GitLab From 51a9b178d69cf8c7a3f27cc5ad8ab3d3c8cecd6d Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Fri, 20 Jun 2025 21:04:12 +0100 Subject: [PATCH 05/10] iteration 2 --- app/models/ci/build.rb | 12 +++- app/models/ci/build_metadata.rb | 2 +- app/models/ci/job_info.rb | 53 -------------- app/models/ci/job_prototype.rb | 9 ++- app/models/ci/processable.rb | 15 ++-- app/models/concerns/ci/metadatable.rb | 70 +++++++------------ .../concerns/ci/partitionable/testing.rb | 1 - config/initializers/postgres_partitioning.rb | 1 - db/docs/p_ci_job_infos.yml | 14 ---- ...250609131551_create_p_ci_job_prototypes.rb | 8 +-- .../20250609194050_create_p_ci_job_infos.rb | 3 +- ...21504_add_intrinsic_data_to_p_ci_builds.rb | 22 ++++++ db/schema_migrations/20250620121504 | 1 + db/structure.sql | 12 +++- ee/app/models/concerns/ee/ci/metadatable.rb | 2 +- lib/gitlab/ci/pipeline/seed/build.rb | 29 ++------ spec/factories/ci/job_prototypes.rb | 22 ------ 17 files changed, 97 insertions(+), 179 deletions(-) delete mode 100644 app/models/ci/job_info.rb delete mode 100644 db/docs/p_ci_job_infos.yml create mode 100644 db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb create mode 100644 db/schema_migrations/20250620121504 delete mode 100644 spec/factories/ci/job_prototypes.rb diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 5cabb7151bdd73..67dcf47436b4ea 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -13,6 +13,7 @@ class Build < Ci::Processable include Ci::TrackEnvironmentUsage include EachBatch include Ci::Taggable + include ChronicDurationAttribute extend ::Gitlab::Utils::Override @@ -144,6 +145,15 @@ class Build < Ci::Processable serialize :options # rubocop:disable Cop/ActiveRecordSerialize serialize :yaml_variables, coder: Gitlab::Serializer::Ci::Variables # rubocop:disable Cop/ActiveRecordSerialize + chronic_duration_attr_reader :timeout_human_readable, :timeout + + enum :timeout_source, { + unknown_timeout_source: 1, + project_timeout_source: 2, + runner_timeout_source: 3, + job_timeout_source: 4 + } + delegate :name, to: :project, prefix: true validates :coverage, numericality: true, allow_blank: true @@ -240,7 +250,7 @@ def with_preloads preload(:job_artifacts_archive, :job_artifacts, :tags, project: [:namespace]) end - # TODO: need to propagate attributes for prototype_id and job_infos.* + # TODO: need to propagate attributes for prototype_id # TODO: do the same for Ci::Bridge. # TODO: the setter methods from metadatable will fail here! Need to explicitly # propagate the metadata or prototype/info records. diff --git a/app/models/ci/build_metadata.rb b/app/models/ci/build_metadata.rb index 8bf43c11fbb680..b487aa9af4d3f1 100644 --- a/app/models/ci/build_metadata.rb +++ b/app/models/ci/build_metadata.rb @@ -44,7 +44,7 @@ class BuildMetadata < Ci::ApplicationRecord scope :with_interruptible, -> { where(interruptible: true) } - enum :timeout_source, ::Ci::JobInfo.timeout_sources + enum :timeout_source, ::Ci::Build.timeout_sources def enable_debug_trace! self.debug_trace_enabled = true diff --git a/app/models/ci/job_info.rb b/app/models/ci/job_info.rb deleted file mode 100644 index b081bfefe80b4b..00000000000000 --- a/app/models/ci/job_info.rb +++ /dev/null @@ -1,53 +0,0 @@ -# frozen_string_literal: true - -module Ci - class JobInfo < Ci::ApplicationRecord - include Ci::Partitionable - include ChronicDurationAttribute - - self.table_name = :p_ci_job_infos - self.primary_key = :id - - # POC NOTES: copied from Pipeline. There should be 1 unique prototype per partition. - partitionable scope: :job, partitioned: true - query_constraints :id, :partition_id - - belongs_to :project - - belongs_to :job, - ->(info) { in_partition(info) }, - partition_foreign_key: :partition_id, - class_name: 'CommitStatus', - inverse_of: :job_info, - optional: true - - validates :job, presence: true - - chronic_duration_attr_reader :timeout_human_readable, :timeout - - enum :timeout_source, { - unknown_timeout_source: 1, - project_timeout_source: 2, - runner_timeout_source: 3, - job_timeout_source: 4 - } - - scope :with_interruptible, -> { where(interruptible: true) } - - scope :scoped_job, -> do - where(arel_table[:job_id].eq(Ci::Build.arel_table[:id])) - .where(arel_table[:partition_id].eq(Ci::Build.arel_table[:partition_id])) - end - - def enable_debug_trace! - update!(debug_trace_enabled: true) - end - - # POC NOTES: I'm not convinced about this being in this class because it's processing data - # An alternative would be to store it in `p_ci_builds_metadata` and archive it later but the overhead - # of keeping `p_ci_builds_metadata` (even if with reduced data) may not still justify that. - def set_enqueue_immediately! - update!(enqueue_immediately: true) - end - end -end diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb index a2e7a4c26d0398..e12c510f1841f5 100644 --- a/app/models/ci/job_prototype.rb +++ b/app/models/ci/job_prototype.rb @@ -17,6 +17,13 @@ class JobPrototype < Ci::ApplicationRecord attribute :config, ::Gitlab::Database::Type::SymbolizedJsonb.new + scope :with_interruptible, -> { where(interruptible: true) } + + scope :scoped_job, -> do + where(arel_table[:job_id].eq(Ci::Build.arel_table[:id])) + .where(arel_table[:partition_id].eq(Ci::Build.arel_table[:partition_id])) + end + def self.find_or_initialize_for_seed(partition_id:, project_id:, config:) job_checksum = calculate_checksum_for_seed(config) @@ -29,7 +36,7 @@ def self.find_or_initialize_for_seed(partition_id:, project_id:, config:) prototype = find_by(**finder_attributes) return prototype if prototype - new(finder_attributes.merge(config: config)) + new(finder_attributes.merge(config: config, interruptible: config[:interruptible])) end # Used when creating a pipeline. Job datastructure does not match diff --git a/app/models/ci/processable.rb b/app/models/ci/processable.rb index 8e5d85514c4c5c..4f07f80e098a8e 100644 --- a/app/models/ci/processable.rb +++ b/app/models/ci/processable.rb @@ -17,6 +17,7 @@ class Processable < ::CommitStatus has_one :sourced_pipeline, class_name: 'Ci::Sources::Pipeline', foreign_key: :source_job_id, inverse_of: :source_job has_one :trigger, through: :pipeline + belongs_to :prototype, class_name: 'Ci::JobPrototype', autosave: true belongs_to :resource_group, class_name: 'Ci::ResourceGroup', inverse_of: :processables accepts_nested_attributes_for :needs @@ -38,21 +39,21 @@ class Processable < ::CommitStatus # TODO: check if query performance remain acceptable scope :interruptible, -> do - joins(:metadata, :job_info) + joins(:metadata, :prototype) .merge(Ci::BuildMetadata.with_interruptible) - .or(Ci::JobInfo.with_interruptible) + .or(Ci::JobPrototype.with_interruptible) end # TODO: check if query performance remain acceptable scope :not_interruptible, -> do - joins(:metadata, :job_info) + joins(:metadata, :prototype) .where.not( Ci::BuildMetadata.table_name => { id: Ci::BuildMetadata.scoped_build.with_interruptible.select(Ci::BuildMetadata.arel_table[:id]) }) .where.not( - Ci::JobInfo.table_name => { - job_id: Ci::JobInfo.scoped_job.with_interruptible.select(Ci::JobInfo.arel_table[:job_id]) + Ci::JobPrototype.table_name => { + job_id: Ci::JobPrototype.scoped_job.with_interruptible.select(Ci::JobPrototype.arel_table[:job_id]) }) end @@ -274,8 +275,10 @@ def manual_job? self.when == 'manual' end + # POC NOTES: the `playble?` condition embeds the `!archived?` check. + # This will make `manual_confirmation` a processing data. def manual_confirmation_message - options[:manual_confirmation] if manual_job? + options[:manual_confirmation] if manual_job? && playable? end private diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 1bfbd8b03b6fbd..e11ea578fddd5d 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -18,16 +18,6 @@ module Metadatable inverse_of: :build, autosave: true - has_one :job_info, - ->(build) { where(partition_id: build.partition_id) }, - class_name: 'Ci::JobInfo', - foreign_key: :job_id, - partition_foreign_key: :partition_id, - inverse_of: :job, - autosave: true - - belongs_to :prototype, class_name: 'Ci::JobPrototype', autosave: true - accepts_nested_attributes_for :metadata delegate :environment_auto_stop_in, to: :metadata, prefix: false, allow_nil: true @@ -57,7 +47,7 @@ def ensure_metadata def timeout_value if prototype.present? - job_info.timeout + self.timeout else metadata&.timeout end @@ -69,9 +59,9 @@ def update_timeout_state return unless timeout # If we have prototype data we are splitting p_ci_builds_metadata between prototype record - # and `p_ci_job_infos`. + # and `p_ci_builds`. if prototype.present? - job_info.update(timeout: timeout.value, timeout_source: timeout.source) + self.update(timeout: timeout.value, timeout_source: timeout.source) else ensure_metadata.update(timeout: timeout.value, timeout_source: timeout.source) end @@ -92,7 +82,7 @@ def degenerate! def exit_code if prototype.present? - job_info&.exit_code + self[:exit_code] else metadata&.exit_code end @@ -104,7 +94,7 @@ def exit_code=(value) safe_value = value.to_i.clamp(0, Gitlab::Database::MAX_SMALLINT_VALUE) if prototype.present? - job_info.exit_code = safe_value + self[:exit_code] = safe_value else ensure_metadata.exit_code = safe_value end @@ -119,38 +109,34 @@ def yaml_variables end def options=(value) - if Feature.enabled?(:ci_job_prototypes, project) - # ensure_prototype.options = value - else - write_metadata_attribute(:options, :config_options, value) - - # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable - ensure_metadata.tap do |metadata| - # Store presence of exposed artifacts in build metadata to make it easier to query - metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? - metadata.environment_auto_stop_in = value&.dig(:environment, :auto_stop_in) - end + raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? + + write_metadata_attribute(:options, :config_options, value) + + # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable + ensure_metadata.tap do |metadata| + # Store presence of exposed artifacts in build metadata to make it easier to query + metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? + metadata.environment_auto_stop_in = value&.dig(:environment, :auto_stop_in) end end def yaml_variables=(value) - if Feature.enabled?(:ci_job_prototypes, project) - # ensure_prototype.yaml_variables = value - else - write_metadata_attribute(:yaml_variables, :config_variables, value) - end + raise 'prototypes is immutable' if prototype.present? # TODO: do we need feature flag check? + + write_metadata_attribute(:yaml_variables, :config_variables, value) end def interruptible if prototype.present? - job_info&.interruptible + prototype.interruptible else metadata&.interruptible end end def interruptible=(value) - raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? ensure_metadata.interruptible = value end @@ -164,14 +150,14 @@ def id_tokens? end def id_tokens=(value) - raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? ensure_metadata.id_tokens = value end def enable_debug_trace! if prototype.present? - job_info.enable_debug_trace! + update!(debug_trace_enabled: true) else ensure_metadata.enable_debug_trace! end @@ -179,26 +165,18 @@ def enable_debug_trace! def debug_trace_enabled? if prototype.present? - job_info.debug_trace_enabled? + self[:debug_trace_enabled] else metadata&.debug_trace_enabled? end end def enqueue_immediately? - return job_info.enqueue_immediately? if job_info.present? - - !!options[:enqueue_immediately] # existing data persisted in metadata + # We moved this to Redis in a different MR. end def set_enqueue_immediately! - if prototype.present? - job_info.set_enqueue_immediately! - else - # ensures that even if `config_options: nil` in the database we set the - # new value correctly. - self.options = options.merge(enqueue_immediately: true) - end + # We moved this to Redis in a different MR. end private diff --git a/app/models/concerns/ci/partitionable/testing.rb b/app/models/concerns/ci/partitionable/testing.rb index 7a41e69d4ffd07..1cca8f31a6c086 100644 --- a/app/models/concerns/ci/partitionable/testing.rb +++ b/app/models/concerns/ci/partitionable/testing.rb @@ -22,7 +22,6 @@ module Testing Ci::JobAnnotation Ci::JobArtifact Ci::JobArtifactReport - Ci::JobInfo Ci::JobPrototype Ci::JobVariable Ci::Pipeline diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index 3f2903880f3318..cc2b53bd4db5a2 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -25,7 +25,6 @@ Ci::JobAnnotation, Ci::JobArtifact, Ci::JobArtifactReport, - Ci::JobInfo, Ci::JobPrototype, Ci::Pipeline, Ci::PipelineConfig, diff --git a/db/docs/p_ci_job_infos.yml b/db/docs/p_ci_job_infos.yml deleted file mode 100644 index 5be8969735f419..00000000000000 --- a/db/docs/p_ci_job_infos.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -table_name: p_ci_job_infos -classes: -- Ci::JobInfo -feature_categories: -- continuous_integration -description: Extra information about `p_ci_builds` table -introduced_by_url: -milestone: '18.1' -gitlab_schema: gitlab_ci -sharding_key: - project_id: projects -table_size: small -schema_inconsistencies: \ No newline at end of file diff --git a/db/migrate/20250609131551_create_p_ci_job_prototypes.rb b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb index 650dcfa618e053..24f82a705c1b68 100644 --- a/db/migrate/20250609131551_create_p_ci_job_prototypes.rb +++ b/db/migrate/20250609131551_create_p_ci_job_prototypes.rb @@ -18,13 +18,9 @@ def up t.bigserial :id, null: false t.bigint :partition_id, null: false t.bigint :project_id, null: false - # POC NOTES: I did not include interruptible, secrets, id_tokens as separate columns - # I first want to see if they are really needed long term and if they really need to - # be indexed. - # This includes also `yaml_variables` - - # renaming from `options` so we can more transparently see provenance of data + t.boolean :interruptible, default: false, null: false, index: true t.jsonb :config, default: {}, null: false + # TODO: checksum should be bytea: https://docs.gitlab.com/development/database/sha1_as_binary/ t.text :checksum, limit: 255, null: false t.timestamps_with_timezone null: false end diff --git a/db/migrate/20250609194050_create_p_ci_job_infos.rb b/db/migrate/20250609194050_create_p_ci_job_infos.rb index c53352f9bcd6f3..147679dde84bd7 100644 --- a/db/migrate/20250609194050_create_p_ci_job_infos.rb +++ b/db/migrate/20250609194050_create_p_ci_job_infos.rb @@ -22,8 +22,9 @@ class CreatePCiJobInfos < Gitlab::Database::Migration[2.3] def up # TODO: there may be more columns to add to this table. - create_table TABLE_NAME, primary_key: [:job_id, :partition_id], + create_table TABLE_NAME, primary_key: [:id, :partition_id], options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| + t.bigserial :id, null: false t.bigint :job_id, null: false t.bigint :partition_id, null: false t.bigint :project_id, null: false diff --git a/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb b/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb new file mode 100644 index 00000000000000..ef080a6017f685 --- /dev/null +++ b/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class AddIntrinsicDataToPCiBuilds < Gitlab::Database::Migration[2.3] + # disable_ddl_transaction! + milestone '18.1' + + def up + # rubocop:disable Migration/PreventAddingColumns -- PoC: we will create association table + add_column :p_ci_builds, :debug_trace_enabled, :boolean + add_column :p_ci_builds, :timeout, :integer + add_column :p_ci_builds, :timeout_source, :integer, limit: 2 + add_column :p_ci_builds, :exit_code, :integer, limit: 2 + # rubocop:enable Migration/PreventAddingColumns end + end + + def down + remove_column :p_ci_builds, :debug_trace_enabled + remove_column :p_ci_builds, :timeout + remove_column :p_ci_builds, :timeout_source + remove_column :p_ci_builds, :exit_code + end +end diff --git a/db/schema_migrations/20250620121504 b/db/schema_migrations/20250620121504 new file mode 100644 index 00000000000000..a2c36746df4a2c --- /dev/null +++ b/db/schema_migrations/20250620121504 @@ -0,0 +1 @@ +14bbacc5f209a0739098c6267828bf8fb409d5789823a54ddf3dac12230db8c1 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index cd2f27f4090d6f..355be497740c92 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4649,6 +4649,10 @@ CREATE TABLE p_ci_builds ( execution_config_id bigint, upstream_pipeline_partition_id bigint, prototype_id bigint, + debug_trace_enabled boolean, + timeout integer, + timeout_source smallint, + exit_code smallint, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)), CONSTRAINT check_9aa9432137 CHECK ((project_id IS NOT NULL)) ) @@ -4730,6 +4734,7 @@ CREATE TABLE p_ci_job_artifacts ( PARTITION BY LIST (partition_id); CREATE TABLE p_ci_job_infos ( + id bigint NOT NULL, job_id bigint NOT NULL, partition_id bigint NOT NULL, project_id bigint NOT NULL, @@ -4748,10 +4753,11 @@ CREATE TABLE p_ci_job_prototypes ( id bigint NOT NULL, partition_id bigint NOT NULL, project_id bigint NOT NULL, + interruptible boolean DEFAULT false NOT NULL, config jsonb DEFAULT '{}'::jsonb NOT NULL, checksum text NOT NULL, - created_at timestamp(6) without time zone NOT NULL, - updated_at timestamp(6) without time zone NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, CONSTRAINT check_c43b283d76 CHECK ((char_length(checksum) <= 255)) ) PARTITION BY LIST (partition_id); @@ -36523,6 +36529,8 @@ CREATE INDEX index_p_ci_job_artifact_reports_on_project_id ON ONLY p_ci_job_arti CREATE INDEX index_p_ci_job_infos_on_interruptible ON ONLY p_ci_job_infos USING btree (interruptible); +CREATE INDEX index_p_ci_job_prototypes_on_interruptible ON ONLY p_ci_job_prototypes USING btree (interruptible); + CREATE INDEX index_p_ci_pipeline_variables_on_project_id ON ONLY p_ci_pipeline_variables USING btree (project_id); CREATE INDEX index_p_ci_pipelines_config_on_project_id ON ONLY p_ci_pipelines_config USING btree (project_id); diff --git a/ee/app/models/concerns/ee/ci/metadatable.rb b/ee/app/models/concerns/ee/ci/metadatable.rb index caf339d58f0a9e..cfed3cb2349076 100644 --- a/ee/app/models/concerns/ee/ci/metadatable.rb +++ b/ee/app/models/concerns/ee/ci/metadatable.rb @@ -18,7 +18,7 @@ def secrets? end def secrets=(value) - raise "this data is immutable" if Feature.enabled?(:ci_job_prototypes, project) + raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? ensure_metadata.secrets = value end diff --git a/lib/gitlab/ci/pipeline/seed/build.rb b/lib/gitlab/ci/pipeline/seed/build.rb index 6055548f3c7481..6a7a418c3bfe27 100644 --- a/lib/gitlab/ci/pipeline/seed/build.rb +++ b/lib/gitlab/ci/pipeline/seed/build.rb @@ -75,7 +75,6 @@ def attributes .deep_merge(scoped_user_id_attribute) attrs = add_prototype_association(attrs) - attrs = add_job_info_association(attrs) attrs.except(:stage) end @@ -238,19 +237,15 @@ def calculate_yaml_variables! def add_prototype_association(attrs) return attrs if Feature.disabled?(:ci_job_prototypes, @pipeline.project) - # POC NOTES: we delete these because we want this data to move to new home - yaml_variables = attrs.delete(:yaml_variables) - options = attrs.delete(:options) - id_tokens = attrs.delete(:id_tokens) - secrets = attrs.delete(:secrets) - # TODO: need to decide on a structure and stick to it long term # we can consider adding a version number for each record in the future. + # POC NOTES: we delete these because we want this data to move to new home config = { - options: options, - id_tokens: id_tokens, - yaml_variables: yaml_variables, - secrets: secrets + options: attrs.delete(:options), + id_tokens: attrs.delete(:id_tokens), + yaml_variables: attrs.delete(:yaml_variables), + secrets: attrs.delete(:secrets), + interruptible: attrs.delete(:interruptible) } attrs.merge( @@ -260,18 +255,6 @@ def add_prototype_association(attrs) config: config) ) end - - def add_job_info_association(attrs) - return attrs if Feature.disabled?(:ci_job_prototypes, @pipeline.project) - - interruptible = attrs.delete(:interruptible) - attrs.merge( - job_info: Ci::JobInfo.new( - partition_id: attrs.fetch(:partition_id), - project_id: attrs.fetch(:project).id, - interruptible: interruptible) - ) - end end end end diff --git a/spec/factories/ci/job_prototypes.rb b/spec/factories/ci/job_prototypes.rb deleted file mode 100644 index 07f90f7eb8db4a..00000000000000 --- a/spec/factories/ci/job_prototypes.rb +++ /dev/null @@ -1,22 +0,0 @@ -# frozen_string_literal: true - -FactoryBot.define do - factory :ci_job_prototype, class: 'Ci::JobPrototype' do - # config do - # { - # options: { - # image: 'image:1.0', - # services: ['postgres'], - # script: ['ls -a'] - # }, - # yaml_variables: [ - # { key: 'DB_NAME', value: 'postgres', public: true } - # ] - # } - # end - - # checksum do - # ::Ci::JobPrototype.calculate_checksum_for_seed(config) - # end - end -end -- GitLab From 59b567d9217692eb2919f60d2fad67971e19af27 Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Tue, 24 Jun 2025 16:28:50 +0100 Subject: [PATCH 06/10] Use feature flags to switch data sources --- app/models/ci/job_prototype.rb | 10 ++ app/models/concerns/ci/metadatable.rb | 95 +++++++------------ app/presenters/ci/build_metadata_presenter.rb | 12 +-- app/serializers/build_details_entity.rb | 1 + app/serializers/build_metadata_entity.rb | 6 +- .../wip/read_from_ci_job_prototypes.yml | 10 ++ .../wip/stop_writing_ci_builds_metadata.yml | 10 ++ ...pes.yml => write_to_ci_job_prototypes.yml} | 2 +- .../20250609194050_create_p_ci_job_infos.rb | 57 ----------- ...21504_add_intrinsic_data_to_p_ci_builds.rb | 5 +- db/schema_migrations/20250609194050 | 1 - db/structure.sql | 22 +---- ee/app/models/concerns/ee/ci/metadatable.rb | 10 +- lib/gitlab/ci/pipeline/seed/build.rb | 40 ++++---- 14 files changed, 105 insertions(+), 176 deletions(-) create mode 100644 config/feature_flags/wip/read_from_ci_job_prototypes.yml create mode 100644 config/feature_flags/wip/stop_writing_ci_builds_metadata.yml rename config/feature_flags/wip/{ci_job_prototypes.yml => write_to_ci_job_prototypes.yml} (82%) delete mode 100644 db/migrate/20250609194050_create_p_ci_job_infos.rb delete mode 100644 db/schema_migrations/20250609194050 diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb index e12c510f1841f5..56a3693a06cbf4 100644 --- a/app/models/ci/job_prototype.rb +++ b/app/models/ci/job_prototype.rb @@ -7,6 +7,16 @@ class JobPrototype < Ci::ApplicationRecord self.table_name = :p_ci_job_prototypes self.primary_key = :id + # IMPORTANT: append new attributes at the end of this list. Do not change the order! + # Order is important for the checksum calculation. + CONFIG_ATTRIBUTES = [ + :options, + :yaml_variables, + :id_tokens, + :secrets, + :interruptible + ].freeze + # POC NOTES: copied from Pipeline. There should be 1 unique prototype per partition. partitionable scope: ->(_) { Ci::Pipeline.current_partition_value }, partitioned: true diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index e11ea578fddd5d..1ecfb693a3a9c5 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -9,6 +9,8 @@ module Metadatable extend ActiveSupport::Concern include Gitlab::Utils::StrongMemoize + ForbiddenWriteError = Class.new(StandardError) + included do has_one :metadata, ->(build) { where(partition_id: build.partition_id) }, @@ -34,23 +36,20 @@ def self.select_with_exposed_artifacts end def has_exposed_artifacts? - if prototype.present? - options.dig(:artifacts, :expose_as).present? - else - !!metadata&.has_exposed_artifacts? - end + options.dig(:artifacts, :expose_as).present? end def ensure_metadata - metadata || build_metadata(project: project) + metadata || + (can_write_metadata? && build_metadata(project: project)) + end + + def timeout_human_readable + (use_prototype? && super) || metadata&.timeout_human_readable end def timeout_value - if prototype.present? - self.timeout - else - metadata&.timeout - end + (use_prototype? && self.timeout) || metadata&.timeout end def update_timeout_state @@ -58,13 +57,8 @@ def update_timeout_state return unless timeout - # If we have prototype data we are splitting p_ci_builds_metadata between prototype record - # and `p_ci_builds`. - if prototype.present? - self.update(timeout: timeout.value, timeout_source: timeout.source) - else - ensure_metadata.update(timeout: timeout.value, timeout_source: timeout.source) - end + self.update(timeout: timeout.value, timeout_source: timeout.source) + ensure_metadata.update(timeout: timeout.value, timeout_source: timeout.source) if can_write_metadata? end def degenerated? @@ -74,18 +68,14 @@ def degenerated? def degenerate! self.class.transaction do self.update!(options: nil, yaml_variables: nil) - self.needs.all.delete_all - self.metadata&.destroy + self.prototype&.destroy! + self.metadata&.destroy! yield if block_given? end end def exit_code - if prototype.present? - self[:exit_code] - else - metadata&.exit_code - end + (use_prototype? && self[:exit_code]) || metadata&.exit_code end def exit_code=(value) @@ -93,11 +83,8 @@ def exit_code=(value) safe_value = value.to_i.clamp(0, Gitlab::Database::MAX_SMALLINT_VALUE) - if prototype.present? - self[:exit_code] = safe_value - else - ensure_metadata.exit_code = safe_value - end + self[:exit_code] = safe_value + ensure_metadata.exit_code = safe_value if can_write_metadata? end def options @@ -109,11 +96,9 @@ def yaml_variables end def options=(value) - raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? - write_metadata_attribute(:options, :config_options, value) + return unless can_write_metadata? - # TODO: ensure this data gets persisted in `p_ci_builds_info` if intrinsic and mutable ensure_metadata.tap do |metadata| # Store presence of exposed artifacts in build metadata to make it easier to query metadata.has_exposed_artifacts = value&.dig(:artifacts, :expose_as).present? @@ -122,23 +107,15 @@ def options=(value) end def yaml_variables=(value) - raise 'prototypes is immutable' if prototype.present? # TODO: do we need feature flag check? - write_metadata_attribute(:yaml_variables, :config_variables, value) end def interruptible - if prototype.present? - prototype.interruptible - else - metadata&.interruptible - end + (use_prototype? && prototype&.interruptible) || metadata&.interruptible end def interruptible=(value) - raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? - - ensure_metadata.interruptible = value + ensure_metadata.interruptible = value if can_write_metadata? end def id_tokens @@ -150,25 +127,16 @@ def id_tokens? end def id_tokens=(value) - raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? - - ensure_metadata.id_tokens = value + ensure_metadata.id_tokens = value if can_write_metadata? end def enable_debug_trace! - if prototype.present? - update!(debug_trace_enabled: true) - else - ensure_metadata.enable_debug_trace! - end + update!(debug_trace_enabled: true) + ensure_metadata.enable_debug_trace! if can_write_metadata? end def debug_trace_enabled? - if prototype.present? - self[:debug_trace_enabled] - else - metadata&.debug_trace_enabled? - end + (use_prototype? && self[:debug_trace_enabled]) || metadata&.debug_trace_enabled? end def enqueue_immediately? @@ -183,13 +151,14 @@ def set_enqueue_immediately! def read_metadata_attribute(legacy_key, metadata_key, prototype_key, default_value = nil) (legacy_key && read_attribute(legacy_key)) || - (prototype&.config && prototype.config[prototype_key]) || + (use_prototype? && prototype&.config && prototype.config[prototype_key]) || metadata&.read_attribute(metadata_key) || default_value end - # TODO: remove this after feature flag ci_job_prototypes is removed. def write_metadata_attribute(legacy_key, metadata_key, value) + return unless can_write_metadata? + ensure_metadata.write_attribute(metadata_key, value) write_attribute(legacy_key, nil) end @@ -225,9 +194,15 @@ def runner_timeout_set? runner&.maximum_timeout.to_i > 0 end - def ensure_prototype - prototype || build_prototype(project_id: project_id, partition_id: partition_id) + def can_write_metadata? + Feature.disabled?(:stop_writing_ci_builds_metadata, project) + end + strong_memoize_attr :can_write_metadata? + + def use_prototype? + prototype.present? && Feature.enabled?(:read_from_ci_job_prototypes, project) end + strong_memoize_attr :use_prototype? end end diff --git a/app/presenters/ci/build_metadata_presenter.rb b/app/presenters/ci/build_metadata_presenter.rb index d3e2d09c11c5c7..c3248dcde9c80e 100644 --- a/app/presenters/ci/build_metadata_presenter.rb +++ b/app/presenters/ci/build_metadata_presenter.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true module Ci - # TODO: this needs to be Ci::JobInfoPresenter that internally delegates to - # metadata when prototype doesn't exist yet. + # TODO: this needs to present a job object and leverage the methods in Metadatable + # so we can automatically switch the data source (prototypes/builds). class BuildMetadataPresenter < Gitlab::View::Presenter::Delegated TIMEOUT_SOURCES = { unknown_timeout_source: nil, @@ -11,14 +11,14 @@ class BuildMetadataPresenter < Gitlab::View::Presenter::Delegated job_timeout_source: 'job' }.freeze - presents ::Ci::BuildMetadata, as: :metadata + presents ::Ci::Build, as: :build delegator_override :timeout_source def timeout_source - return unless metadata.timeout_source? + return unless build.timeout_source? - TIMEOUT_SOURCES[metadata.timeout_source.to_sym] || - metadata.timeout_source + TIMEOUT_SOURCES[build.timeout_source.to_sym] || + build.timeout_source end end end diff --git a/app/serializers/build_details_entity.rb b/app/serializers/build_details_entity.rb index af0097b61d46ad..b4168120c865ec 100644 --- a/app/serializers/build_details_entity.rb +++ b/app/serializers/build_details_entity.rb @@ -113,6 +113,7 @@ class BuildDetailsEntity < Ci::JobEntity private alias_method :build, :object + alias_method :metadata, :object def build_failed_issue_options { title: "Job Failed ##{build.id}", diff --git a/app/serializers/build_metadata_entity.rb b/app/serializers/build_metadata_entity.rb index 1d73c62512dd9e..e81d8390ec168c 100644 --- a/app/serializers/build_metadata_entity.rb +++ b/app/serializers/build_metadata_entity.rb @@ -1,9 +1,11 @@ # frozen_string_literal: true class BuildMetadataEntity < Grape::Entity - # TODO: add Ci::JobInfoPresenter as abstraction + # POC NOTES: we are leveraging Metadatable methods here to change the source + # of the data when we switch between builds and prototypes. expose :timeout_human_readable expose :timeout_source do |metadata| - metadata.present.timeout_source + # TODO: `metadata` here refers to the job actually. We should improve the readability. + metadata.present.timeout_source # calls Ci::BuildMetadataPresenter#timeout_source end end diff --git a/config/feature_flags/wip/read_from_ci_job_prototypes.yml b/config/feature_flags/wip/read_from_ci_job_prototypes.yml new file mode 100644 index 00000000000000..cdac92d42c7d3d --- /dev/null +++ b/config/feature_flags/wip/read_from_ci_job_prototypes.yml @@ -0,0 +1,10 @@ +--- +name: read_from_ci_job_prototypes +description: +feature_issue_url: +introduced_by_url: +rollout_issue_url: +milestone: '18.1' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/feature_flags/wip/stop_writing_ci_builds_metadata.yml b/config/feature_flags/wip/stop_writing_ci_builds_metadata.yml new file mode 100644 index 00000000000000..5cb05013c69f23 --- /dev/null +++ b/config/feature_flags/wip/stop_writing_ci_builds_metadata.yml @@ -0,0 +1,10 @@ +--- +name: stop_writing_ci_builds_metadata +description: +feature_issue_url: +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/193948 +rollout_issue_url: +milestone: '18.1' +group: group::ci platform +type: wip +default_enabled: false diff --git a/config/feature_flags/wip/ci_job_prototypes.yml b/config/feature_flags/wip/write_to_ci_job_prototypes.yml similarity index 82% rename from config/feature_flags/wip/ci_job_prototypes.yml rename to config/feature_flags/wip/write_to_ci_job_prototypes.yml index e77480c66518dd..63c5bcb6af746c 100644 --- a/config/feature_flags/wip/ci_job_prototypes.yml +++ b/config/feature_flags/wip/write_to_ci_job_prototypes.yml @@ -1,5 +1,5 @@ --- -name: ci_job_prototypes +name: write_to_ci_job_prototypes description: feature_issue_url: introduced_by_url: diff --git a/db/migrate/20250609194050_create_p_ci_job_infos.rb b/db/migrate/20250609194050_create_p_ci_job_infos.rb deleted file mode 100644 index 147679dde84bd7..00000000000000 --- a/db/migrate/20250609194050_create_p_ci_job_infos.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -# See https://docs.gitlab.com/ee/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - -class CreatePCiJobInfos < Gitlab::Database::Migration[2.3] - # When using the methods "add_concurrent_index" or "remove_concurrent_index" - # you must disable the use of transactions - # as these methods can not run in an existing transaction. - # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure - # that either of them is the _only_ method called in the migration, - # any other changes should go in a separate migration. - # This ensures that upon failure _only_ the index creation or removing fails - # and can be retried or reverted easily. - # - # To disable transactions uncomment the following line and remove these - # comments: - disable_ddl_transaction! - milestone '18.1' - - TABLE_NAME = :p_ci_job_infos - - def up - # TODO: there may be more columns to add to this table. - create_table TABLE_NAME, primary_key: [:id, :partition_id], - options: 'PARTITION BY LIST (partition_id)', if_not_exists: true do |t| - t.bigserial :id, null: false - t.bigint :job_id, null: false - t.bigint :partition_id, null: false - t.bigint :project_id, null: false - t.boolean :interruptible, index: true - t.boolean :debug_trace_enabled - t.boolean :enqueue_immediately - t.integer :timeout - t.integer :timeout_source, limit: 2 - t.integer :exit_code, limit: 2 - t.timestamps_with_timezone null: false - end - - # TODO: need to do the proper partitioning steps - with_lock_retries do - connection.execute(<<~SQL) - LOCK TABLE ONLY #{TABLE_NAME} IN ACCESS EXCLUSIVE MODE; - SQL - - connection.execute(<<~SQL) - CREATE TABLE IF NOT EXISTS gitlab_partitions_dynamic.ci_job_infos_100 - PARTITION OF #{TABLE_NAME} - FOR VALUES IN (100); - SQL - end - end - - def down - drop_table TABLE_NAME - end -end diff --git a/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb b/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb index ef080a6017f685..50f9a55fadacb3 100644 --- a/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb +++ b/db/migrate/20250620121504_add_intrinsic_data_to_p_ci_builds.rb @@ -9,7 +9,9 @@ def up add_column :p_ci_builds, :debug_trace_enabled, :boolean add_column :p_ci_builds, :timeout, :integer add_column :p_ci_builds, :timeout_source, :integer, limit: 2 - add_column :p_ci_builds, :exit_code, :integer, limit: 2 + add_column :p_ci_builds, :exit_code, :integer, limit: 2 + # TODO: we need to handle limit by truncating new data and existing migrated data. + add_column :p_ci_builds, :downstream_errors, :text, limit: 255 # rubocop:enable Migration/PreventAddingColumns end end @@ -18,5 +20,6 @@ def down remove_column :p_ci_builds, :timeout remove_column :p_ci_builds, :timeout_source remove_column :p_ci_builds, :exit_code + remove_column :p_ci_builds, :downstream_errors end end diff --git a/db/schema_migrations/20250609194050 b/db/schema_migrations/20250609194050 deleted file mode 100644 index 02dd2da56c000f..00000000000000 --- a/db/schema_migrations/20250609194050 +++ /dev/null @@ -1 +0,0 @@ -89137f98d833f3edfec262e8a6311bfcefccf31047d2ee94c28f7f057d663e04 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 355be497740c92..9bc9c3ace5ebab 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4653,6 +4653,7 @@ CREATE TABLE p_ci_builds ( timeout integer, timeout_source smallint, exit_code smallint, + downstream_errors character varying, CONSTRAINT check_1e2fbd1b39 CHECK ((lock_version IS NOT NULL)), CONSTRAINT check_9aa9432137 CHECK ((project_id IS NOT NULL)) ) @@ -4733,22 +4734,6 @@ CREATE TABLE p_ci_job_artifacts ( ) PARTITION BY LIST (partition_id); -CREATE TABLE p_ci_job_infos ( - id bigint NOT NULL, - job_id bigint NOT NULL, - partition_id bigint NOT NULL, - project_id bigint NOT NULL, - interruptible boolean, - debug_trace_enabled boolean, - enqueue_immediately boolean, - timeout integer, - timeout_source smallint, - exit_code smallint, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL -) -PARTITION BY LIST (partition_id); - CREATE TABLE p_ci_job_prototypes ( id bigint NOT NULL, partition_id bigint NOT NULL, @@ -30602,9 +30587,6 @@ ALTER TABLE ONLY p_ci_job_artifact_reports ALTER TABLE ONLY p_ci_job_artifacts ADD CONSTRAINT p_ci_job_artifacts_pkey PRIMARY KEY (id, partition_id); -ALTER TABLE ONLY p_ci_job_infos - ADD CONSTRAINT p_ci_job_infos_pkey PRIMARY KEY (job_id, partition_id); - ALTER TABLE ONLY p_ci_job_prototypes ADD CONSTRAINT p_ci_job_prototypes_pkey PRIMARY KEY (id, partition_id); @@ -36527,8 +36509,6 @@ CREATE INDEX index_p_ci_job_annotations_on_project_id ON ONLY p_ci_job_annotatio CREATE INDEX index_p_ci_job_artifact_reports_on_project_id ON ONLY p_ci_job_artifact_reports USING btree (project_id); -CREATE INDEX index_p_ci_job_infos_on_interruptible ON ONLY p_ci_job_infos USING btree (interruptible); - CREATE INDEX index_p_ci_job_prototypes_on_interruptible ON ONLY p_ci_job_prototypes USING btree (interruptible); CREATE INDEX index_p_ci_pipeline_variables_on_project_id ON ONLY p_ci_pipeline_variables USING btree (project_id); diff --git a/ee/app/models/concerns/ee/ci/metadatable.rb b/ee/app/models/concerns/ee/ci/metadatable.rb index cfed3cb2349076..11c82935ca7859 100644 --- a/ee/app/models/concerns/ee/ci/metadatable.rb +++ b/ee/app/models/concerns/ee/ci/metadatable.rb @@ -6,11 +6,7 @@ module Metadatable extend ActiveSupport::Concern def secrets - if prototype.present? - prototype.config&.dig(:secrets) - else - metadata&.secrets - end + (use_prototype? && prototype&.config&.dig(:secrets)) || metadata&.secrets end def secrets? @@ -18,9 +14,7 @@ def secrets? end def secrets=(value) - raise "prototype is immutable" if prototype.present? # TODO: do we need feature flag check? - - ensure_metadata.secrets = value + ensure_metadata.secrets = value if can_write_metadata? end end end diff --git a/lib/gitlab/ci/pipeline/seed/build.rb b/lib/gitlab/ci/pipeline/seed/build.rb index 6a7a418c3bfe27..df5b8ad90872d4 100644 --- a/lib/gitlab/ci/pipeline/seed/build.rb +++ b/lib/gitlab/ci/pipeline/seed/build.rb @@ -235,25 +235,27 @@ def calculate_yaml_variables! # TODO: this currently fires N+1 queries when checking for existing prototypes. # We should eventually look into querying prototypes in bulk. def add_prototype_association(attrs) - return attrs if Feature.disabled?(:ci_job_prototypes, @pipeline.project) - - # TODO: need to decide on a structure and stick to it long term - # we can consider adding a version number for each record in the future. - # POC NOTES: we delete these because we want this data to move to new home - config = { - options: attrs.delete(:options), - id_tokens: attrs.delete(:id_tokens), - yaml_variables: attrs.delete(:yaml_variables), - secrets: attrs.delete(:secrets), - interruptible: attrs.delete(:interruptible) - } - - attrs.merge( - prototype: @context.find_or_build_prototype( - partition_id: attrs.fetch(:partition_id), - project_id: attrs.fetch(:project).id, - config: config) - ) + return attrs if Feature.disabled?(:write_to_ci_job_prototypes, @pipeline.project) + + prototype_attributes = ::Ci::JobPrototype::CONFIG_ATTRIBUTES + config = if Feature.enabled?(:stop_writing_ci_builds_metadata, @pipeline.project) + # To stop writing to metadata we need to delete the attributes being moved to prototypes + # Once stop_writing_ci_builds_metadata is deleted we shoul encapsulate CONFIG_ATTRIBUTES in + # a method inside Ci::JobPrototype#extract_attributes! + prototype_attributes.each_with_object({}) do |attribute, hash| + hash[attribute] = attrs.delete(attribute) + end + else + # copy attributes from metadata without deleting original ones + attrs.slice(*prototype_attributes) + end + + prototype = @context.find_or_build_prototype( + partition_id: attrs.fetch(:partition_id), + project_id: attrs.fetch(:project).id, + config: config) + + attrs.merge(prototype: prototype) end end end -- GitLab From 87a2925247cd337eedc01d59ebd8a2098a02c3d7 Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Tue, 24 Jun 2025 17:15:05 +0100 Subject: [PATCH 07/10] Example of how to fix specs --- app/models/concerns/ci/metadatable.rb | 6 +++--- spec/factories/ci/processable.rb | 4 +++- spec/models/ci/build_spec.rb | 17 +++++------------ 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/app/models/concerns/ci/metadatable.rb b/app/models/concerns/ci/metadatable.rb index 1ecfb693a3a9c5..e0004ffaba8702 100644 --- a/app/models/concerns/ci/metadatable.rb +++ b/app/models/concerns/ci/metadatable.rb @@ -169,7 +169,7 @@ def timeout_with_highest_precedence end def project_timeout - BuildTimeout.new(project&.build_timeout, :project_timeout_source) + Ci::BuildMetadata::BuildTimeout.new(project&.build_timeout, :project_timeout_source) end strong_memoize_attr :project_timeout @@ -179,14 +179,14 @@ def job_timeout timeout_from_options = options[:job_timeout] return unless timeout_from_options - BuildTimeout.new(timeout_from_options, :job_timeout_source) if timeout_from_options + Ci::BuildMetadata::BuildTimeout.new(timeout_from_options, :job_timeout_source) if timeout_from_options end strong_memoize_attr :job_timeout def runner_timeout return unless runner_timeout_set? - BuildTimeout.new(runner.maximum_timeout, :runner_timeout_source) + Ci::BuildMetadata::BuildTimeout.new(runner.maximum_timeout, :runner_timeout_source) end strong_memoize_attr :runner_timeout diff --git a/spec/factories/ci/processable.rb b/spec/factories/ci/processable.rb index 976dac061fd30e..e1f5df51fc2ed1 100644 --- a/spec/factories/ci/processable.rb +++ b/spec/factories/ci/processable.rb @@ -11,11 +11,13 @@ scheduling_type { 'stage' } partition_id { pipeline.partition_id } + interruptible { false } + prototype do ::Ci::JobPrototype.find_or_initialize_for_seed( partition_id: partition_id, project_id: project.id, - config: { options: options, yaml_variables: yaml_variables }) + config: { options: options, yaml_variables: yaml_variables, interruptible: interruptible }) end options do diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb index 04dab9db0b9a3c..9afe3305ab4150 100644 --- a/spec/models/ci/build_spec.rb +++ b/spec/models/ci/build_spec.rb @@ -5333,23 +5333,19 @@ def run_job_without_exception context 'when metadata has debug_trace_enabled true' do before do - build.metadata.update!(debug_trace_enabled: true) + build.enable_debug_trace! end it { is_expected.to eq true } end context 'when metadata has debug_trace_enabled false' do - before do - build.metadata.update!(debug_trace_enabled: false) - end - it { is_expected.to eq false } end context 'when metadata does not exist' do before do - build.metadata.destroy! + build.metadata&.destroy! end it { is_expected.to eq false } @@ -5360,10 +5356,7 @@ def run_job_without_exception let(:exit_code) { 1 } let(:options) { {} } - before do - build.options.merge!(options) - build.save! - end + let!(:build) { create(:ci_build, options: options) } subject(:drop_with_exit_code) do build.drop_with_exit_code!(:unknown_failure, exit_code) @@ -5371,7 +5364,7 @@ def run_job_without_exception it 'correctly sets the exit code' do expect { drop_with_exit_code } - .to change { build.reload.metadata&.exit_code }.from(nil).to(1) + .to change { build.reload.exit_code }.from(nil).to(1) end shared_examples 'drops the build without changing allow_failure' do @@ -5467,7 +5460,7 @@ def run_job_without_exception it 'wraps around to max size of a signed smallint' do expect { drop_with_exit_code } - .to change { build.reload.metadata&.exit_code }.from(nil).to(32767) + .to change { build.reload.exit_code }.from(nil).to(32767) end end end -- GitLab From c5c6e1fb101970418737344152eca783fffca670 Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Thu, 26 Jun 2025 13:03:46 +0100 Subject: [PATCH 08/10] Fix failing spec --- ee/spec/services/ci/register_job_service_spec.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ee/spec/services/ci/register_job_service_spec.rb b/ee/spec/services/ci/register_job_service_spec.rb index 156ad8bddc9ba0..564a0156c9a145 100644 --- a/ee/spec/services/ci/register_job_service_spec.rb +++ b/ee/spec/services/ci/register_job_service_spec.rb @@ -235,9 +235,7 @@ rsa_key = OpenSSL::PKey::RSA.generate(3072).to_s stub_application_setting(ci_jwt_signing_key: rsa_key) - pending_build.metadata.update!( - id_tokens: { 'TEST_ID_TOKEN' => { aud: 'https://client.test' } } - ) + allow(pending_build).to receive(:id_tokens).and_return('TEST_ID_TOKEN' => { aud: 'https://client.test' }) create(:ci_variable, project: project, key: 'VAULT_SERVER_URL', value: 'https://vault.example.com') end -- GitLab From 6dbf7de132e716b0f84d0e02ca66b2174d031606 Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Thu, 26 Jun 2025 14:11:23 +0100 Subject: [PATCH 09/10] Refactor JobPrototype --- app/models/ci/job_prototype.rb | 47 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/app/models/ci/job_prototype.rb b/app/models/ci/job_prototype.rb index 56a3693a06cbf4..ba3a5d0973533d 100644 --- a/app/models/ci/job_prototype.rb +++ b/app/models/ci/job_prototype.rb @@ -9,6 +9,10 @@ class JobPrototype < Ci::ApplicationRecord # IMPORTANT: append new attributes at the end of this list. Do not change the order! # Order is important for the checksum calculation. + # TODO: consider including a lot more data initially and scale back later if needed. + # E.g. `when`, `allow_failure`, `coverage_regex`, and other attributes outside `options` + # could be unique across many jobs. Later we could consider dropping columns from `p_ci_builds` + # while retaining the unique data. CONFIG_ATTRIBUTES = [ :options, :yaml_variables, @@ -17,7 +21,6 @@ class JobPrototype < Ci::ApplicationRecord :interruptible ].freeze - # POC NOTES: copied from Pipeline. There should be 1 unique prototype per partition. partitionable scope: ->(_) { Ci::Pipeline.current_partition_value }, partitioned: true # TODO: add schema that includes `options` and `id_tokens`, `secrets`, etc. @@ -35,34 +38,32 @@ class JobPrototype < Ci::ApplicationRecord end def self.find_or_initialize_for_seed(partition_id:, project_id:, config:) - job_checksum = calculate_checksum_for_seed(config) + config, checksum = sanitize_and_checksum(config) finder_attributes = { partition_id: partition_id, project_id: project_id, - checksum: job_checksum + checksum: checksum } prototype = find_by(**finder_attributes) - return prototype if prototype - - new(finder_attributes.merge(config: config, interruptible: config[:interruptible])) + prototype || new(finder_attributes.merge(config: config, interruptible: config[:interruptible])) end - # Used when creating a pipeline. Job datastructure does not match - # the datastructure used for persistence. - def self.calculate_checksum_for_seed(attrs) - # TODO: we need to filter out some attributes such as: - # enqueue_immediately, scoped_user_id, downstream_errors, environment (?) - # TODO: Use SHA256 digest and convert to json before conversion. - attrs.hash.to_s - - # TODO: consider including a lot more data initially and scale back later if needed. - # E.g. `when`, `allow_failure`, `coverage_regex`, and other attributes outside `options` - # could be unique across many jobs. Later we could consider dropping columns from `p_ci_builds` - # while retaining the unique data. + def self.sanitize_and_checksum(config) + sanitized_config = config.symbolize_keys.slice(*CONFIG_ATTRIBUTES) + + checksum = saitized_config + .then { |data| Gitlab::Json.dump(data) } + .then { |data| Digest::SHA256.hexdigest(data) } + + [sanitized_config, checksum] end + # POC NOTES: commenting these to see if we can fix specs without having these methods. + # The prototype must be immutable so these should not be used. + # Consider maybe adding a spec helper that implements `update_config` + # # def options=(value) # update_config(options: value) # end @@ -73,11 +74,13 @@ def self.calculate_checksum_for_seed(attrs) private + # POC NOTES: this should only be used in specs, if necessary. def update_config(attrs) - # TODO: this should eventually raise an exception. Allowing only because the specs - # contain a lot of factory code that uses the setter methods. - self.config = (config || {}).merge(attrs) - self.checksum = self.class.calculate_checksum_for_seed(config) + new_attrs = (config || {}).merge(attrs) + new_config, new_checksum = sanitize_and_checksum(new_attrs) + + self.config = new_config + self.checksum = new_checksum end end end -- GitLab From c70b5a94f31b5dfc84e78b33b427f9b06f4fd32a Mon Sep 17 00:00:00 2001 From: Fabio Pitino Date: Fri, 27 Jun 2025 15:32:00 +0100 Subject: [PATCH 10/10] Add refactoring note --- app/services/ci/pipelines/add_job_service.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/services/ci/pipelines/add_job_service.rb b/app/services/ci/pipelines/add_job_service.rb index dfbb37cf0dc175..2da6e8a8cbdcba 100644 --- a/app/services/ci/pipelines/add_job_service.rb +++ b/app/services/ci/pipelines/add_job_service.rb @@ -43,6 +43,7 @@ def assign_pipeline_attributes(job) # update metadata since it might have been lazily initialised before this call # metadata is present on `Ci::Processable` + # POC NOTES: remove this when removing `stop_writing_ci_builds_metadata` feature flag if job.respond_to?(:metadata) && job.metadata job.metadata.project = pipeline.project job.metadata.partition_id = pipeline.partition_id -- GitLab