diff --git a/db/post_migrate/20250115095728_idx_vulnerability_statistics_on_traversal_ids_and_letter_grade.rb b/db/post_migrate/20250115095728_idx_vulnerability_statistics_on_traversal_ids_and_letter_grade.rb new file mode 100644 index 0000000000000000000000000000000000000000..5c87bfaa2f46bd18b45ae1fb79f4c30ef9c04769 --- /dev/null +++ b/db/post_migrate/20250115095728_idx_vulnerability_statistics_on_traversal_ids_and_letter_grade.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class IdxVulnerabilityStatisticsOnTraversalIdsAndLetterGrade < Gitlab::Database::Migration[2.2] + disable_ddl_transaction! + milestone '17.9' + + INDEX_NAME = 'idx_vulnerability_statistics_on_traversal_ids_and_letter_grade' + + def up + add_concurrent_index :vulnerability_statistics, %i[traversal_ids letter_grade], name: INDEX_NAME, + where: 'archived = FALSE' + end + + def down + remove_concurrent_index_by_name :vulnerability_statistics, INDEX_NAME + end +end diff --git a/db/schema_migrations/20250115095728 b/db/schema_migrations/20250115095728 new file mode 100644 index 0000000000000000000000000000000000000000..bbf058d30bdf2195771c86633e87c5fae9d3015e --- /dev/null +++ b/db/schema_migrations/20250115095728 @@ -0,0 +1 @@ +992aaca50dcf7bd21dabd4d0cd5f534b0c514e2a6b3392ef58cc9155966632e1 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 8d01aa9c25cee2f39994e818ad9b5fec9462d30f..2a42e8e150b9285467b2f243d2d75b7822c148d6 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -30217,6 +30217,8 @@ CREATE INDEX idx_vulnerability_reads_for_traversal_ids_queries_srt_severity ON v CREATE INDEX idx_vulnerability_reads_project_id_scanner_id_vulnerability_id ON vulnerability_reads USING btree (project_id, scanner_id, vulnerability_id); +CREATE INDEX idx_vulnerability_statistics_on_traversal_ids_and_letter_grade ON vulnerability_statistics USING btree (traversal_ids, letter_grade) WHERE (archived = false); + CREATE UNIQUE INDEX idx_wi_type_custom_fields_on_ns_id_wi_type_id_custom_field_id ON work_item_type_custom_fields USING btree (namespace_id, work_item_type_id, custom_field_id); CREATE INDEX index_p_ci_builds_on_execution_config_id ON ONLY p_ci_builds USING btree (execution_config_id) WHERE (execution_config_id IS NOT NULL); diff --git a/ee/app/models/vulnerabilities/projects_grade.rb b/ee/app/models/vulnerabilities/projects_grade.rb index 995ba2264deedcd080903cd45559d02baf563fff..b715a46b7cd6ca22bb1d9328ec59a8b43f7445d0 100644 --- a/ee/app/models/vulnerabilities/projects_grade.rb +++ b/ee/app/models/vulnerabilities/projects_grade.rb @@ -2,8 +2,6 @@ module Vulnerabilities class ProjectsGrade - BATCH_SIZE = 500 - attr_reader :vulnerable, :grade, :project_ids, :include_subgroups # project_ids can contain IDs from projects that do not belong to vulnerable, they will be filtered out in `projects` method @@ -24,62 +22,53 @@ def projects projects.with_vulnerability_statistics.inc_routes.where(id: project_ids) end + # rubocop:disable Metrics/PerceivedComplexity -- temporary until we remove the feature flag and refactor + # rubocop:disable Metrics/CyclomaticComplexity -- temporary until we remove the feature flag and refactor def self.grades_for(vulnerables, filter: nil, include_subgroups: false) - if vulnerables.all? { |v| v.is_a?(Group) && Feature.enabled?(:remove_cross_join_from_vulnerabilities_projects_grade, v) } - return grades_for_vulnerables_no_cross_join(vulnerables, filter: filter, include_subgroups: include_subgroups) - end - - projects = vulnerables.map do |v| - collection = include_subgroups ? v.all_projects : v.projects - collection.non_archived + # collect vulnerability statistics as relations + relations = vulnerables.map do |vulnerable| + if vulnerable.is_a?(Group) && Feature.enabled?(:remove_cross_join_from_vulnerabilities_projects_grade, vulnerable) + if include_subgroups + ::Vulnerabilities::Statistic.by_group(vulnerable).unarchived + else + ::Vulnerabilities::Statistic.by_group_excluding_subgroups(vulnerable).unarchived + end + else + collection = include_subgroups ? vulnerable.all_projects : vulnerable.projects + collection = collection.non_archived + ::Vulnerabilities::Statistic.for_project([collection].reduce(&:or)) + .allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/503387') + end end - relation = ::Vulnerabilities::Statistic.for_project(projects.reduce(&:or)) - relation = relation.by_grade(filter) if filter - relation = relation.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/503387') - - relation.group(:letter_grade) - .select(:letter_grade, 'array_agg(project_id) project_ids') - .then do |statistics| - vulnerables.index_with do |vulnerable| - statistics.map { |statistic| new(vulnerable, statistic.letter_grade, statistic.project_ids, include_subgroups: include_subgroups) } + # collect hashes that map letter grades to project IDs + grades_maps = relations.map do |relation| + relation = relation.by_grade(filter) if filter + relation.group(:letter_grade) + .select(:letter_grade, 'array_agg(project_id) project_ids') + .then do |rows| + rows.each_with_object({}) do |row, statistics| + statistics[row.letter_grade] ||= [] + statistics[row.letter_grade] += row.project_ids + end end - end - end - - def self.grades_for_vulnerables_no_cross_join(vulnerables, filter: nil, include_subgroups: false) - statistics = {} - - vulnerables.each do |group| - iterator = if include_subgroups - cursor = { current_id: group.id, depth: [group.id] } - Gitlab::Database::NamespaceEachBatch.new(namespace_class: Group, cursor: cursor) - else - Group.where(id: group.id) - end - - iterator.each_batch do |sub_group_ids| - Project.in_namespace(sub_group_ids).non_archived.each_batch do |projects| - project_ids = projects.pluck_primary_key - relation = ::Vulnerabilities::Statistic.for_project(project_ids) - relation = relation.by_grade(filter) if filter + end - relation.group(:letter_grade) - .select(:letter_grade, 'array_agg(project_id) project_ids') - .each do |row| - statistics[row.letter_grade] ||= [] - statistics[row.letter_grade].concat(row.project_ids) - end - end + # map letter grades to project IDs across all vulnerables + grades_to_project_ids = grades_maps.each_with_object({}) do |result, stats| + result.each do |grade, project_ids| + stats[grade] ||= [] + stats[grade] += project_ids end end # Currently all vulnerables get the same grades, but this behavior should be changed # as part of https://gitlab.com/gitlab-org/gitlab/-/issues/507992. vulnerables.index_with do |vulnerable| - statistics.map { |letter_grade, project_ids| new(vulnerable, letter_grade, project_ids, include_subgroups: include_subgroups) } + grades_to_project_ids.map { |letter_grade, project_ids| new(vulnerable, letter_grade, project_ids, include_subgroups: include_subgroups) } end end - private_class_method :grades_for_vulnerables_no_cross_join + # rubocop:enable Metrics/CyclomaticComplexity + # rubocop:enable Metrics/PerceivedComplexity end end diff --git a/ee/app/models/vulnerabilities/statistic.rb b/ee/app/models/vulnerabilities/statistic.rb index 260fec2e90e34a5235c5868e9783bc8322824b88..c5a3947bf7c298ddbfc98fbc4d506843b291a1e6 100644 --- a/ee/app/models/vulnerabilities/statistic.rb +++ b/ee/app/models/vulnerabilities/statistic.rb @@ -24,6 +24,14 @@ class Statistic < Gitlab::Database::SecApplicationRecord scope :by_projects, ->(values) { where(project_id: values) } scope :by_grade, ->(grade) { where(letter_grade: grade) } + scope :by_group_excluding_subgroups, ->(group) { where(traversal_ids: group.traversal_ids) } + scope :by_group, ->(group) do + traversal_ids_gteq(group.traversal_ids).traversal_ids_lt(group.next_traversal_ids) + end + scope :traversal_ids_gteq, ->(traversal_ids) { where(arel_table[:traversal_ids].gteq(traversal_ids)) } + scope :traversal_ids_lt, ->(traversal_ids) { where(arel_table[:traversal_ids].lt(traversal_ids)) } + scope :unarchived, -> { where(archived: false) } + class << self # Takes an object which responds to `#[]` method call # like an instance of ActiveRecord::Base or a Hash and diff --git a/ee/spec/factories/vulnerabilities/statistics.rb b/ee/spec/factories/vulnerabilities/statistics.rb index 34e87f0f4fec2b0a24b53864539ac97d0f7129cd..2f14abb23789d252b7bb8e2a7002311211063b3f 100644 --- a/ee/spec/factories/vulnerabilities/statistics.rb +++ b/ee/spec/factories/vulnerabilities/statistics.rb @@ -5,6 +5,11 @@ project initialize_with { Vulnerabilities::Statistic.find_or_initialize_by(project_id: project.id) } + after(:build) do |vulnerability_statistic, _| + vulnerability_statistic.archived = vulnerability_statistic.project&.archived + vulnerability_statistic.traversal_ids = vulnerability_statistic.project&.namespace&.traversal_ids + end + trait :grade_a do info { 1 } end diff --git a/ee/spec/models/vulnerabilities/projects_grade_spec.rb b/ee/spec/models/vulnerabilities/projects_grade_spec.rb index bdc8c1f59d2116a69f6a3b84fd09ff996152a5fb..5f1b50652d81903ef58358e972094cb4704adb45 100644 --- a/ee/spec/models/vulnerabilities/projects_grade_spec.rb +++ b/ee/spec/models/vulnerabilities/projects_grade_spec.rb @@ -118,26 +118,6 @@ expect(projects_grades[vulnerable].map(&compare_key)).to match_array(expected_projects_grades[vulnerable].map(&compare_key)) end end - - context 'when multiple batches are required' do - before do - stub_const("#{described_class}::BATCH_SIZE", 2) - end - - it 'returns the letter grades for given vulnerable' do - expect(projects_grades[vulnerable].map(&compare_key)).to match_array(expected_projects_grades[vulnerable].map(&compare_key)) - end - - context 'when remove_cross_join_from_vulnerabilities_projects_grade is disabled' do - before do - stub_feature_flags(remove_cross_join_from_vulnerabilities_projects_grade: false) - end - - it 'returns the letter grades for given vulnerable' do - expect(projects_grades[vulnerable].map(&compare_key)).to match_array(expected_projects_grades[vulnerable].map(&compare_key)) - end - end - end end context 'when the filter is given' do diff --git a/ee/spec/models/vulnerabilities/statistic_spec.rb b/ee/spec/models/vulnerabilities/statistic_spec.rb index 9ec72e3ba419ba199c5914fd31d9c3c72957237e..0c6d3c2a821376fdbe4fa6a641c5bda438e4fab3 100644 --- a/ee/spec/models/vulnerabilities/statistic_spec.rb +++ b/ee/spec/models/vulnerabilities/statistic_spec.rb @@ -46,6 +46,53 @@ it { is_expected.to match_array([statistic_grade_a]) } end + describe '.by_group' do + let_it_be(:group_1) { create(:group) } + let_it_be(:group_2) { create(:group) } + let_it_be(:group_1_1) { create(:group, parent: group_1) } + let_it_be(:project_1) { create(:project, group: group_1) } + let_it_be(:project_1_1) { create(:project, group: group_1_1) } + let_it_be(:project_2) { create(:project, group: group_2) } + let_it_be(:vulnerability_statistic_1) { create(:vulnerability_statistic, project: project_1) } + let_it_be(:vulnerability_statistic_1_1) { create(:vulnerability_statistic, project: project_1_1) } + let_it_be(:vulnerability_statistic_2) { create(:vulnerability_statistic, project: project_2) } + + subject { described_class.by_group(group_1) } + + it 'returns all records within the group hierarchy' do + is_expected.to contain_exactly(vulnerability_statistic_1, vulnerability_statistic_1_1) + end + end + + describe '.by_group_excluding_subgroups' do + let_it_be(:group_1) { create(:group) } + let_it_be(:group_2) { create(:group) } + let_it_be(:group_1_1) { create(:group, parent: group_1) } + let_it_be(:project_1) { create(:project, group: group_1) } + let_it_be(:project_1_1) { create(:project, group: group_1_1) } + let_it_be(:project_2) { create(:project, group: group_2) } + let_it_be(:vulnerability_statistic_1) { create(:vulnerability_statistic, project: project_1) } + let_it_be(:vulnerability_statistic_1_1) { create(:vulnerability_statistic, project: project_1_1) } + let_it_be(:vulnerability_statistic_2) { create(:vulnerability_statistic, project: project_2) } + + subject { described_class.by_group_excluding_subgroups(group_1) } + + it 'returns all records within the group hierarchy' do + is_expected.to contain_exactly(vulnerability_statistic_1) + end + end + + describe '.unarchived' do + let_it_be(:active_project) { create(:project) } + let_it_be(:archived_project) { create(:project, :archived) } + let_it_be(:archived_vulnerability_statistic) { create(:vulnerability_statistic, project: archived_project) } + let_it_be(:unarchived_vulnerability_statistic) { create(:vulnerability_statistic, project: active_project) } + + subject(:unarchived) { described_class.unarchived } + + it { is_expected.to contain_exactly(unarchived_vulnerability_statistic) } + end + describe '.letter_grade_for' do subject { described_class.letter_grade_for(object) }