diff --git a/app/models/project_repository.rb b/app/models/project_repository.rb index c11ceb039b8f1b9dc43477d6bd7f350ac2547e30..4d634bbfa316c1d576ccfbe3aaabe67fb80892ba 100644 --- a/app/models/project_repository.rb +++ b/app/models/project_repository.rb @@ -16,3 +16,5 @@ def find_project(disk_path) end end end + +ProjectRepository.prepend_mod diff --git a/db/migrate/20250825100342_create_project_repository_states.rb b/db/migrate/20250825100342_create_project_repository_states.rb index deadc7b45cfd4afff9d263c72f52c5d1239438af..70833908ad3c8f5e0b250e3c6a37240707b68b8c 100644 --- a/db/migrate/20250825100342_create_project_repository_states.rb +++ b/db/migrate/20250825100342_create_project_repository_states.rb @@ -10,7 +10,7 @@ class CreateProjectRepositoryStates < Gitlab::Database::Migration[2.3] NEEDS_VERIFICATION_INDEX_NAME = "index_project_repository_states_needs_verification" def up - create_table :project_repository_states do |t| # rubocop:disable Migration/EnsureFactoryForTable -- will be added via https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194051 + create_table :project_repository_states do |t| t.datetime_with_timezone :verification_started_at t.datetime_with_timezone :verification_retry_at t.datetime_with_timezone :verified_at diff --git a/ee/app/models/ee/project_repository.rb b/ee/app/models/ee/project_repository.rb new file mode 100644 index 0000000000000000000000000000000000000000..a8a04e631ddfa08ba65f195b330793dee2b3cc4f --- /dev/null +++ b/ee/app/models/ee/project_repository.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module EE + # ProjectRepository EE mixin + # + # This module is intended to encapsulate EE-specific model logic + # and be prepended in the `ProjectRepository` model + module ProjectRepository # rubocop:disable Gitlab/BoundedContexts -- EE module for existing model + extend ActiveSupport::Concern + + prepended do + include ::Geo::ReplicableModel + include ::Geo::VerifiableModel + + with_replicator Geo::ProjectRepositoryReplicator + + has_one :project_repository_state, + autosave: false, + inverse_of: :project_repository, + foreign_key: :project_repository_id, + class_name: 'Geo::ProjectRepositoryState' + + # Delegate repository-related methods to the associated project + delegate( + :repository, + :repository_storage, + to: :project) + + delegate(*::Geo::VerificationState::VERIFICATION_METHODS, + to: :project_repository_state) + + scope :available_replicables, -> { all } + + scope :with_verification_state, ->(state) do + joins(:project_repository_state) + .where(project_repository_states: { + verification_state: verification_state_value(state) + }) + end + + def verification_state_object + project_repository_state + end + end + + class_methods do + extend ::Gitlab::Utils::Override + + # @return [ActiveRecord::Relation] scope observing selective sync + # settings of the given node + def selective_sync_scope(node, **params) + return all unless node.selective_sync? + + project_scope = ::Project.selective_sync_scope(node, **params) + where(project_id: project_scope.select(:id)) + end + + override :verification_state_model_key + def verification_state_model_key + :project_repository_id + end + + override :verification_state_table_class + def verification_state_table_class + ::Geo::ProjectRepositoryState + end + end + + def project_repository_state + super || build_project_repository_state + end + end # rubocop:enable Gitlab/BoundedContexts +end diff --git a/ee/app/models/geo/project_repository_state.rb b/ee/app/models/geo/project_repository_state.rb new file mode 100644 index 0000000000000000000000000000000000000000..03bd7535f50da272bd8c1e0081905e890a3c1346 --- /dev/null +++ b/ee/app/models/geo/project_repository_state.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Geo + class ProjectRepositoryState < ApplicationRecord + include ::Geo::VerificationStateDefinition + include ::Gitlab::Geo::ProjectRepositoryFkHelper + + before_validation :populate_missing_foreign_keys + + self.primary_key = :project_repository_id + + belongs_to :project_repository, inverse_of: :project_repository_state + belongs_to :project + + validates :verification_state, :project_repository, presence: true + + def populate_missing_foreign_keys + model_class = + Gitlab::Geo.geo_project_repository_replication_v2_enabled? ? ::ProjectRepository : ::Project + + populate_missing_foreign_keys_for_model(model_klass: model_class) + end + end +end diff --git a/ee/app/replicators/geo/project_repository_replicator.rb b/ee/app/replicators/geo/project_repository_replicator.rb index 632b0a0c7a759de7caa8e8b59b4083eb655905fe..cc601b64a5c075f43c29d222048a0102ddbbc2a3 100644 --- a/ee/app/replicators/geo/project_repository_replicator.rb +++ b/ee/app/replicators/geo/project_repository_replicator.rb @@ -28,6 +28,19 @@ def repository model_record.repository end + override :should_publish_replication_event? + def should_publish_replication_event? + return false unless super + + if ::Gitlab::Geo.geo_project_repository_replication_v2_enabled? + # V2: Only create events for ProjectRepository objects, not Projects + model_record.is_a?(::ProjectRepository) + else + # V1: Only create events for Project objects, not ProjectRepositories + model_record.is_a?(::Project) + end + end + private def pool_repository diff --git a/ee/config/feature_flags/ops/geo_project_repository_replication_v2.yml b/ee/config/feature_flags/ops/geo_project_repository_replication_v2.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff75ab3b40e38de16927f73c4b0f735b8cf697d3 --- /dev/null +++ b/ee/config/feature_flags/ops/geo_project_repository_replication_v2.yml @@ -0,0 +1,8 @@ +--- +name: geo_project_repository_replication_v2 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/194051 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/549772 +milestone: '18.6' +type: ops +group: group::geo +default_enabled: false diff --git a/ee/lib/gitlab/geo.rb b/ee/lib/gitlab/geo.rb index 4fbab4c1ad4a98ba3a9aa84980406dd587e3b43e..39849af9624955f1d77a9f24a70bf7bbe69285c6 100644 --- a/ee/lib/gitlab/geo.rb +++ b/ee/lib/gitlab/geo.rb @@ -364,5 +364,9 @@ def self.org_mover_extend_selective_sync_to_primary_checksumming? def self.geo_selective_sync_by_organizations_enabled? ::Feature.enabled?(:geo_selective_sync_by_organizations, :instance) end + + def self.geo_project_repository_replication_v2_enabled? + ::Feature.enabled?(:geo_project_repository_replication_v2, :instance) + end end end diff --git a/ee/lib/gitlab/geo/project_repository_fk_helper.rb b/ee/lib/gitlab/geo/project_repository_fk_helper.rb new file mode 100644 index 0000000000000000000000000000000000000000..62a9e8e1569fd44cbde0393557ef77869477807b --- /dev/null +++ b/ee/lib/gitlab/geo/project_repository_fk_helper.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Gitlab + module Geo + module ProjectRepositoryFkHelper + # While switching between the legacy replication and replication V2, + # it is possible that one of the foreign keys in ProjectRepositoryState or + # ProjectRepositoryRegistry is populated while the other is not. + # This method ensures that both foreign keys are populated if possible, + # by using the existing populated foreign key to find the missing one. + # For example, if `project_id` is present but `project_repository_id` is not, + # it will try to find the `project_repository_id` from the `project`. + def populate_missing_foreign_keys_for_model(model_klass:, remove_invalid_record: true) + return unless should_populate_fk? + + # When a corresponding Git repository doesn't exist + # we don't need to replicate it, so we don't need the registry/state object any more + if should_remove_record?(model_klass, remove_invalid_record) + log_info("Destroying ProjectRepositoryRegistry##{id} without a corresponding project repository") + + return destroy + end + + self.project_repository ||= project&.project_repository + self.project ||= project_repository&.project + end + + def should_populate_fk? + # Skip population if we don't have access to the necessary attributes + # This can happen during batch operations that only select specific columns + return false unless has_attribute?(:project_id) && has_attribute?(:project_repository_id) + + false if project.present? && project_repository.present? + end + + def should_remove_record?(model_klass, remove_invalid_record) + remove_invalid_record && + model_klass == ::ProjectRepository && + project && + !project&.project_repository.present? + end + end + end +end diff --git a/ee/lib/gitlab/geo/replicator.rb b/ee/lib/gitlab/geo/replicator.rb index 29a584c889ac7d9556565a323db39d22fa10388f..1a83864f4194ebcb65dfec9b0633c95dfdacc144 100644 --- a/ee/lib/gitlab/geo/replicator.rb +++ b/ee/lib/gitlab/geo/replicator.rb @@ -353,6 +353,8 @@ def resource_exists? protected + # This method can be overriden in a replicator class to add other + # conditionals for accepting events def should_publish_replication_event? self.class.replication_enabled? end diff --git a/ee/spec/factories/geo/project_repository_states.rb b/ee/spec/factories/geo/project_repository_states.rb new file mode 100644 index 0000000000000000000000000000000000000000..f8376a22354bb6b5c4f29d22ffe55883ceeb7e5d --- /dev/null +++ b/ee/spec/factories/geo/project_repository_states.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :geo_project_repository_state, class: 'Geo::ProjectRepositoryState' do + project_repository + + trait :checksummed do + verification_checksum { 'abc' } + end + + trait :checksum_failure do + verification_failure { 'Could not calculate the checksum' } + end + end +end diff --git a/ee/spec/factories/project_repositories.rb b/ee/spec/factories/project_repositories.rb new file mode 100644 index 0000000000000000000000000000000000000000..6d74bc18f46cad987a5fd4f21a52fa20e082f0a8 --- /dev/null +++ b/ee/spec/factories/project_repositories.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +FactoryBot.modify do + factory :project_repository do + project + + trait :verification_succeeded do + repository + verification_checksum { 'abc' } + verification_state { ProjectRepository.verification_state_value(:verification_succeeded) } + end + + trait :verification_failed do + repository + verification_failure { 'Could not calculate the checksum' } + verification_state { ProjectRepository.verification_state_value(:verification_failed) } + + # Geo::VerifiableReplicator#after_verifiable_update tries to verify the replicable async and + # marks it as verification pending when the model record is created/updated. + # + # Tip: You must set current node to primary, or else you can get a PG::ForeignKeyViolation + # because save_verification_details is returning early. + after(:create) do |instance, evaluator| + instance.verification_failure = evaluator.verification_failure + instance.verification_failed! + end + end + end +end diff --git a/ee/spec/models/ee/project_repository_spec.rb b/ee/spec/models/ee/project_repository_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..831e50e40f4a2cfb5554b3aa19786d17f74ecb6b --- /dev/null +++ b/ee/spec/models/ee/project_repository_spec.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ProjectRepository, feature_category: :geo_replication do + include EE::GeoHelpers + + describe 'associations' do + it 'has one project_repository_state association' do + is_expected + .to have_one(:project_repository_state) + .class_name('Geo::ProjectRepositoryState') + .inverse_of(:project_repository) + .autosave(false) + end + end + + include_examples 'a verifiable model for verification state' do + let(:skip_unverifiable_model_record_tests) { true } + + let(:verifiable_model_record) { build(:project_repository) } + let(:unverifiable_model_record) { nil } + end +end diff --git a/ee/spec/models/geo/project_repository_state_spec.rb b/ee/spec/models/geo/project_repository_state_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..7044226b077a9bdf124d24506957af93c4268830 --- /dev/null +++ b/ee/spec/models/geo/project_repository_state_spec.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Geo::ProjectRepositoryState, feature_category: :geo_replication do + include EE::GeoHelpers + + let_it_be(:project) { create(:project_with_repo) } + let_it_be(:project_repository) { project.project_repository } + + before do + stub_current_geo_node(create(:geo_node, :primary)) + end + + describe 'associations' do + it { is_expected.to belong_to(:project_repository).inverse_of(:project_repository_state) } + it { is_expected.to belong_to(:project) } + end + + describe 'validations' do + it { is_expected.to validate_presence_of(:verification_state) } + it { is_expected.to validate_presence_of(:project_repository) } + end + + describe 'primary key' do + it 'uses project_repository_id as primary key' do + expect(described_class.primary_key).to eq('project_repository_id') + end + end + + describe '#populate_missing_foreign_keys' do + context 'when project_id is missing but project_repository is present' do + it 'populates project_id from project_repository' do + state = build(:geo_project_repository_state, + project_repository: project_repository, + project: nil) + + # Trigger validation which calls populate_missing_foreign_keys + state.valid? + + expect(state.project).to eq(project) + expect(state.project_id).to eq(project.id) + end + + it 'saves successfully after foreign key population' do + state = build(:geo_project_repository_state, + project_repository: project_repository, + project: nil) + + expect(state.save).to be_truthy + expect(state.reload.project_id).to eq(project.id) + end + end + + context 'when project_repository_id is missing but project is present' do + it 'populates project_repository from project' do + state = build(:geo_project_repository_state, + project: project, + project_repository: nil) + + # Trigger validation which calls populate_missing_foreign_keys + state.valid? + + expect(state.project_repository).to eq(project_repository) + expect(state.project_repository_id).to eq(project_repository.id) + end + + it 'saves successfully after foreign key population' do + state = build(:geo_project_repository_state, + project: project, + project_repository: nil) + + expect(state.save).to be_truthy + expect(state.reload.project_repository_id).to eq(project_repository.id) + end + end + + context 'when both foreign keys are present' do + it 'does not modify existing values' do + state = build(:geo_project_repository_state, + project: project, + project_repository: project_repository) + + original_project_id = state.project_id + original_repo_id = state.project_repository_id + + state.valid? + + expect(state.project_id).to eq(original_project_id) + expect(state.project_repository_id).to eq(original_repo_id) + end + end + + # This is an edge case that shouldn't happen in practice + # If there is no project_repository record there should be no project_repository_state record + context 'when project does not have a project_repository' do + let(:project_without_repo) { create(:project) } + + it 'handles gracefully when project has no project_repository' do + state = build(:geo_project_repository_state, + project: project_without_repo, + project_repository: nil) + + expect(state.valid?).to be_falsey + + expect(state.project_repository).to be_nil + expect(state.project_repository_id).to be_nil + end + end + + context 'when project_repository has no project (edge case)' do + it 'handles orphaned project_repository gracefully' do + # This is an edge case that shouldn't happen in practice + state = build(:geo_project_repository_state, + project: nil, + project_repository_id: 99999) + + expect { state.valid? }.not_to raise_error + expect(state.project).to be_nil + end + end + end + + describe 'verification state behavior' do + let(:state) { create(:geo_project_repository_state, project_repository: project_repository) } + + it 'inherits verification state methods from VerificationStateDefinition' do + expect(state).to respond_to(:verification_pending!) + expect(state).to respond_to(:verification_succeeded!) + expect(state).to respond_to(:verification_failed!) + end + + it 'can transition between verification states' do + expect(state.verification_pending?).to be_truthy + + # Use the proper method with checksum for succeeded state + state.update!(verification_checksum: 'abc123') + state.verification_started! + state.verification_succeeded! + expect(state.verification_succeeded?).to be_truthy + + state.verification_started! + state.before_verification_failed + state.update!(verification_failure: "Verification failed") + state.verification_failed! + expect(state.verification_failed?).to be_truthy + end + + it 'includes VerificationStateDefinition module' do + expect(described_class.included_modules).to include(Geo::VerificationStateDefinition) + end + end +end diff --git a/ee/spec/replicators/geo/project_repository_replicator_spec.rb b/ee/spec/replicators/geo/project_repository_replicator_spec.rb index d543a83aff5d8025588f995932f5721af7fa2e19..564aa35fa1fb5ee6ccfb71ed353162693c985c25 100644 --- a/ee/spec/replicators/geo/project_repository_replicator_spec.rb +++ b/ee/spec/replicators/geo/project_repository_replicator_spec.rb @@ -3,9 +3,27 @@ require 'spec_helper' RSpec.describe Geo::ProjectRepositoryReplicator, feature_category: :geo_replication do - let(:model_record) { create(:project, :repository) } + include EE::GeoHelpers + + let(:secondary) { create(:geo_node) } + let(:primary) { create(:geo_node, :primary) } + + before do + stub_current_geo_node(secondary) + end + + context 'with legacy project repository replication (V1)' do + let(:model_record) { create(:project_with_repo) } + let(:project) { model_record } + + subject(:replicator) { model_record.replicator } + + before do + stub_feature_flags(geo_project_repository_replication_v2: false) + end + + it_behaves_like 'a repository replicator' - include_examples 'a repository replicator' do describe 'housekeeping implementation' do let_it_be(:pool_repository) { create(:pool_repository) } let_it_be(:model_record) { create(:project, pool_repository: pool_repository) } @@ -21,8 +39,15 @@ expect(service).to receive(:execute) end + replicator = described_class.new(model_record_id: model_record.id) replicator.before_housekeeping end end + + include_examples 'a verifiable replicator' do + before do + stub_current_geo_node(primary) + end + end end end diff --git a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb index 93b51b47c00381dab63d8598a65eaf93502f6e88..96a242f6687e4f53787815c7630426365e04b3c8 100644 --- a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb @@ -18,6 +18,11 @@ subject(:replicator) { model_record.replicator } before do + # These tests create projects and expect the Geo::Event counts to change + # including project creation related events. + # Disabling this flag will allow legacy project repository replication to + # work, as these tests expect. + stub_feature_flags(geo_project_repository_replication_v2: false) stub_current_geo_node(primary) end diff --git a/spec/factories/project_repositories.rb b/spec/factories/project_repositories.rb index 39e8ea2e11eeba93cc04daad1d3200d7485b9c05..de2a5cc5f5bea55fc88fd90f5e71155687b586e0 100644 --- a/spec/factories/project_repositories.rb +++ b/spec/factories/project_repositories.rb @@ -2,11 +2,12 @@ FactoryBot.define do factory :project_repository do - project + project { association(:project_with_repo) } + + sequence(:disk_path) { |n| "@hashed/unique_#{n}_#{SecureRandom.hex(8)}" } after(:build) do |project_repository, _| project_repository.shard_name = project_repository.project.repository_storage - project_repository.disk_path = project_repository.project.disk_path end end end