diff --git a/ee/app/models/ci/instance_runner_failed_jobs.rb b/ee/app/models/ci/instance_runner_failed_jobs.rb new file mode 100644 index 0000000000000000000000000000000000000000..b7e8c6a4fd6cae9e93d04d161abd41490c00a478 --- /dev/null +++ b/ee/app/models/ci/instance_runner_failed_jobs.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Ci + class InstanceRunnerFailedJobs + # Safety margin for situations where there is a mismatch between the async insert order and the finished_at value + JOB_LIMIT_MARGIN = 10 + JOB_LIMIT = 100 + + class << self + def track(build) + return unless track_job?(build) + + with_redis do |redis| + redis.pipelined do |pipeline| + pipeline.lpush(key, build.id) + pipeline.ltrim(key, 0, max_admissible_job_count - 1) + end + end + end + + def recent_jobs + return Ci::Build.none unless License.feature_available?(:runner_performance_insights) + + job_ids = with_redis do |redis| + # Fetch a few more jobs in case there is a mismatch between the async insert order and the finished_at value + redis.lrange(key, 0, max_admissible_job_count - 1) + end + + Ci::Build.id_in(job_ids).order(finished_at: :desc, id: :desc).limit(JOB_LIMIT) + end + + private + + def key + self.class.name + end + + def with_redis(&block) + Gitlab::Redis::SharedState.with(&block) + end + + def max_admissible_job_count + JOB_LIMIT + JOB_LIMIT_MARGIN + end + + def track_job?(build) + License.feature_available?(:runner_performance_insights) && + build.failure_reason == 'runner_system_failure' && + build.runner.instance_type? + end + end + end +end diff --git a/ee/app/workers/ee/ci/build_finished_worker.rb b/ee/app/workers/ee/ci/build_finished_worker.rb index 7be7444f250e4d360f5132b8ccd7a59cb9d4ef73..a27e7894731419342e9574b5051274fafaacdd01 100644 --- a/ee/app/workers/ee/ci/build_finished_worker.rb +++ b/ee/app/workers/ee/ci/build_finished_worker.rb @@ -16,6 +16,8 @@ def process_build(build) if ::Gitlab.com? && build.has_security_reports? ::Security::TrackSecureScansWorker.perform_async(build.id) end + + ::Ci::InstanceRunnerFailedJobs.track(build) if build.failed? end private diff --git a/ee/spec/models/ci/instance_runner_failed_jobs_spec.rb b/ee/spec/models/ci/instance_runner_failed_jobs_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..99df1913b090f8f5e7a6acd68bd1f9e2b76f614d --- /dev/null +++ b/ee/spec/models/ci/instance_runner_failed_jobs_spec.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ci::InstanceRunnerFailedJobs, :freeze_time, :clean_gitlab_redis_shared_state, + feature_category: :runner_fleet do + before do + stub_licensed_features(runner_performance_insights: runner_performance_insights) + end + + describe '.track' do + subject(:track) { described_class.track(job) } + + let_it_be(:instance_runner) { create(:ci_runner, :instance) } + + let(:job) { create(:ci_build, runner: runner) } + + before do + job.drop!(failure_reason) + end + + context 'when job fails with runner_system_failure' do + let(:failure_reason) { :runner_system_failure } + + context 'with runner_performance_insights licensed feature' do + let(:runner_performance_insights) { true } + + context 'when job is executed in an instance runner' do + let(:runner) { instance_runner } + + it 'saves job id on redis cache' do + track + + expect(redis_stored_job_ids).to match_array(formatted_job_ids_for(job)) + end + + context 'when job fails with different failure_reason' do + let(:failure_reason) { :stuck_or_timeout_failure } + + it 'does not save job' do + expect { track }.not_to change { redis_stored_job_ids } + end + end + end + + context 'when job is executed in a project runner' do + let_it_be(:project_runner) { create(:ci_runner, :project) } + + let(:runner) { project_runner } + + it 'does not save job' do + expect { track }.not_to change { redis_stored_job_ids } + end + end + end + + context 'without runner_performance_insights licensed feature' do + let(:runner_performance_insights) { false } + let(:runner) { instance_runner } + + it 'does not save job' do + expect { track }.not_to change { redis_stored_job_ids } + end + end + end + end + + describe '.recent_jobs' do + subject(:recent_jobs) { described_class.recent_jobs } + + let_it_be(:runner) { create(:ci_runner, :instance) } + let_it_be(:job_args) { { runner: runner, failure_reason: :runner_system_failure } } + let_it_be(:job) { create(:ci_build, created_at: 10.minutes.ago, finished_at: 3.minutes.ago, **job_args) } + let_it_be(:job2) { create(:ci_build, created_at: 5.minutes.ago, finished_at: 4.minutes.ago, **job_args) } + let_it_be(:job3) { create(:ci_build, created_at: 5.minutes.ago, finished_at: 5.minutes.ago, **job_args) } + + context 'with runner_performance_insights licensed feature' do + let(:runner_performance_insights) { true } + + context 'when content is not set' do + it { is_expected.to be_empty } + end + + context 'when jobs are added' do + before do + described_class.track(job) + end + + it 'returns 3 most recently finished jobs' do + expect(described_class.recent_jobs).to contain_exactly(an_object_having_attributes(id: job.id)) + + described_class.track(job2) + described_class.track(job3) + + expect(described_class.recent_jobs).to match([ + an_object_having_attributes(id: job.id), + an_object_having_attributes(id: job2.id), + an_object_having_attributes(id: job3.id) + ]) + end + + context 'when jobs are added in different order' do + it 'returns 3 most recently finished jobs' do + expect(described_class.recent_jobs).to contain_exactly(an_object_having_attributes(id: job.id)) + + described_class.track(job3) + described_class.track(job2) + + expect(described_class.recent_jobs).to match([ + an_object_having_attributes(id: job.id), + an_object_having_attributes(id: job2.id), + an_object_having_attributes(id: job3.id) + ]) + end + end + + context 'when trimming is required' do + before do + stub_const("#{described_class}::JOB_LIMIT", 1) + stub_const("#{described_class}::JOB_LIMIT_MARGIN", 1) + end + + it 'returns 2 most recently finished jobs and purges the rest', :aggregate_failures do + described_class.track(job3) + described_class.track(job2) + + # Only the last 2 jobs saved will be retained + expect(redis_stored_job_ids).to eq(formatted_job_ids_for(job2, job3)) + # and of those 2, the most recently finished will be returned (JOB_LIMIT) + expect(described_class.recent_jobs).to contain_exactly(an_object_having_attributes(id: job2.id)) + end + end + end + end + + context 'without runner_performance_insights licensed feature' do + let(:runner_performance_insights) { false } + + it { is_expected.to be_empty } + end + end + + def redis_stored_job_ids + Gitlab::Redis::SharedState.with { |redis| redis.lrange(described_class.class.name, 0, -1) } + end + + def formatted_job_ids_for(*builds) + builds.map(&:id).map(&:to_s) + end +end diff --git a/ee/spec/workers/ee/ci/build_finished_worker_spec.rb b/ee/spec/workers/ee/ci/build_finished_worker_spec.rb index 8a0eab2b3990ec67d82c37ed7342e0cf111b372c..5700f28bad6c5e015d9a5c40ddedc298a2a0646a 100644 --- a/ee/spec/workers/ee/ci/build_finished_worker_spec.rb +++ b/ee/spec/workers/ee/ci/build_finished_worker_spec.rb @@ -8,10 +8,6 @@ let_it_be(:project) { build.project } let_it_be(:namespace) { project.shared_runners_limit_namespace } - subject do - described_class.new.perform(build.id) - end - def namespace_stats namespace.namespace_statistics || namespace.create_namespace_statistics end @@ -21,6 +17,10 @@ def project_stats end describe '#perform' do + subject(:perform) do + described_class.new.perform(build.id) + end + context 'when on .com' do before do allow(Gitlab).to receive(:com?).and_return(true) @@ -30,7 +30,7 @@ def project_stats it 'tracks secure scans' do expect(::Security::TrackSecureScansWorker).to receive(:perform_async) - subject + perform end context 'when exception is raised in `super`' do @@ -38,7 +38,7 @@ def project_stats allow(Ci::Build).to receive(:find_by_id).with(build.id).and_return(build) allow(build).to receive(:execute_hooks).and_raise(ArgumentError) - expect { subject }.to raise_error(ArgumentError) + expect { perform }.to raise_error(ArgumentError) expect(::Security::TrackSecureScansWorker).not_to receive(:perform_async) end @@ -50,9 +50,15 @@ def project_stats it 'does not track secure scans' do expect(::Security::TrackSecureScansWorker).not_to receive(:perform_async) - subject + perform end end + + it 'does not track job on InstanceRunnerFailedJobs' do + expect(Ci::InstanceRunnerFailedJobs).not_to receive(:track) + + perform + end end context 'when not on .com' do @@ -63,13 +69,19 @@ def project_stats it 'does not notify the owners of Groups' do expect(::Ci::Minutes::EmailNotificationService).not_to receive(:new) - subject + perform end it 'does not track secure scans' do expect(::Security::TrackSecureScansWorker).not_to receive(:perform_async) - subject + perform + end + + it 'does not track job on InstanceRunnerFailedJobs' do + expect(Ci::InstanceRunnerFailedJobs).not_to receive(:track) + + perform end end @@ -83,14 +95,14 @@ def project_stats it 'does not scan security reports for token revocation' do expect(ScanSecurityReportSecretsWorker).not_to receive(:perform_async) - subject + perform end end it 'does not schedule processing of requirement reports by default' do expect(RequirementsManagement::ProcessRequirementsReportsWorker).not_to receive(:perform_async) - subject + perform end context 'with requirements' do @@ -106,7 +118,7 @@ def project_stats it do expect(RequirementsManagement::ProcessRequirementsReportsWorker).not_to receive(:perform_async) - subject + perform end end @@ -118,7 +130,7 @@ def project_stats it 'schedules processing of requirement reports' do expect(RequirementsManagement::ProcessRequirementsReportsWorker).to receive(:perform_async) - subject + perform end context 'when user has insufficient permissions to create test reports' do @@ -138,5 +150,21 @@ def project_stats it_behaves_like 'does not schedule processing of requirement reports' end end + + it 'does not save job on Ci::InstanceRunnerFailedJobs by default' do + expect(Ci::InstanceRunnerFailedJobs).not_to receive(:track) + + perform + end + + context 'when job failed', feature_category: :runner_fleet do + let(:build) { create(:ee_ci_build, :sast, :failed, runner: ci_runner) } + + it 'tracks job on InstanceRunnerFailedJobs' do + expect(Ci::InstanceRunnerFailedJobs).to receive(:track).once + + perform + end + end end end