diff --git a/config/initializers/postgres_partitioning.rb b/config/initializers/postgres_partitioning.rb index a26ca2cf2472b250ef082ab96c5a92b35db249de..cdfea5754c1e6c04ff19c8856bd66dc5a6bfd685 100644 --- a/config/initializers/postgres_partitioning.rb +++ b/config/initializers/postgres_partitioning.rb @@ -49,6 +49,7 @@ Analytics::ValueStreamDashboard::Count, Ci::FinishedBuildChSyncEvent, Search::Zoekt::Task, + Ai::UsageEvent, Ai::CodeSuggestionEvent, Ai::DuoChatEvent, Ai::TroubleshootJobEvent, diff --git a/db/click_house/main.sql b/db/click_house/main.sql index 9c1462ec8ae723b3b03bd5b4871e7d4d6b534cc5..3fc8c7eb0e9a0ebf981c36736ed0d437b38147c3 100644 --- a/db/click_house/main.sql +++ b/db/click_house/main.sql @@ -1,3 +1,16 @@ +CREATE TABLE ai_usage_events +( + `user_id` UInt64, + `event` UInt16, + `timestamp` DateTime64(6, 'UTC'), + `namespace_path` String DEFAULT '0/', + `extras` String DEFAULT '{}' +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(timestamp) +ORDER BY (namespace_path, event, timestamp, user_id) +SETTINGS index_granularity = 8192; + CREATE TABLE ci_finished_builds ( `id` UInt64 DEFAULT 0, diff --git a/db/click_house/migrate/main/20250611061212_create_ai_usage_events_table.rb b/db/click_house/migrate/main/20250611061212_create_ai_usage_events_table.rb new file mode 100644 index 0000000000000000000000000000000000000000..983d59e576389371a9437e7220d45d377182997d --- /dev/null +++ b/db/click_house/migrate/main/20250611061212_create_ai_usage_events_table.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class CreateAiUsageEventsTable < ClickHouse::Migration + def up + execute <<~SQL + CREATE TABLE IF NOT EXISTS ai_usage_events + ( + user_id UInt64, + event UInt16, + timestamp DateTime64(6, 'UTC'), + namespace_path String DEFAULT '0/', + extras String DEFAULT '{}' + ) ENGINE = ReplacingMergeTree + PARTITION BY toYYYYMM(timestamp) + ORDER BY (namespace_path, event, timestamp, user_id) + SQL + end + + def down + execute <<~SQL + DROP TABLE IF EXISTS ai_usage_events + SQL + end +end diff --git a/db/click_house/migrate/main/20250612061212_migrate_code_suggestion_usage_events.rb b/db/click_house/migrate/main/20250612061212_migrate_code_suggestion_usage_events.rb new file mode 100644 index 0000000000000000000000000000000000000000..9b1593c3c7eba2cc46c09c52fd841e70f931c2c3 --- /dev/null +++ b/db/click_house/migrate/main/20250612061212_migrate_code_suggestion_usage_events.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +class MigrateCodeSuggestionUsageEvents < ClickHouse::Migration + QUERY = <<~SQL + INSERT INTO ai_usage_events + SELECT user_id, event, timestamp, namespace_path, + CAST( + (unique_tracking_id, language, suggestion_size, branch_name) AS + Tuple( + unique_tracking_id String, + language String, + suggestion_size UInt64, + branch_name String + ) + ) as extras + FROM code_suggestion_events + SQL + + def up + from = connection.select("SELECT minOrNull(timestamp) as min_timestamp FROM code_suggestion_events") + .first.fetch("min_timestamp", nil) + return unless from + + from = from.beginning_of_month + to = DateTime.current.end_of_month + 1.month + + each_week(from, to) do |month_start, month_end| + execute(query_for(month_start, month_end)) + end + + execute(query_for(to, nil)) + end + + def down + # no-op + end + + private + + def each_week(from, to) + current = from + while current <= to + yield(current, [current + 1.week, to].min) + current += 1.week + end + end + + def query_for(month_start, month_end = nil) + full_query = if month_end + "#{QUERY} WHERE timestamp >= %{start} AND timestamp < %{end}" + else + "#{QUERY} WHERE timestamp >= %{start}" + end + + full_query % { start: month_start.to_f, end: month_end&.to_f }.compact + end +end diff --git a/db/click_house/migrate/main/20250612061213_migrate_duo_chat_usage_events.rb b/db/click_house/migrate/main/20250612061213_migrate_duo_chat_usage_events.rb new file mode 100644 index 0000000000000000000000000000000000000000..7bf09d15cefdf90fcfec433ff12ea68af13f21b4 --- /dev/null +++ b/db/click_house/migrate/main/20250612061213_migrate_duo_chat_usage_events.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +class MigrateDuoChatUsageEvents < ClickHouse::Migration + QUERY = <<~SQL + INSERT INTO ai_usage_events + SELECT user_id, 6 as event, timestamp, namespace_path, map() as extras + FROM duo_chat_events + SQL + + def up + from = connection.select("SELECT minOrNull(timestamp) as min_timestamp FROM duo_chat_events") + .first.fetch("min_timestamp", nil) + return unless from + + from = from.beginning_of_month + to = DateTime.current.end_of_month + 1.month + + each_week(from, to) do |month_start, month_end| + execute(query_for(month_start, month_end)) + end + + execute(query_for(to, nil)) + end + + def down + # no-op + end + + private + + def each_week(from, to) + current = from + while current <= to + yield(current, [current + 1.week, to].min) + current += 1.week + end + end + + def query_for(month_start, month_end = nil) + full_query = if month_end + "#{QUERY} WHERE timestamp >= %{start} AND timestamp < %{end}" + else + "#{QUERY} WHERE timestamp >= %{start}" + end + + full_query % { start: month_start.to_f, end: month_end&.to_f }.compact + end +end diff --git a/db/click_house/migrate/main/20250612061214_migrate_troubleshoot_job_usage_events.rb b/db/click_house/migrate/main/20250612061214_migrate_troubleshoot_job_usage_events.rb new file mode 100644 index 0000000000000000000000000000000000000000..d6dd4a8bd601405aed8ee390076d0e3cf1a105f6 --- /dev/null +++ b/db/click_house/migrate/main/20250612061214_migrate_troubleshoot_job_usage_events.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +class MigrateTroubleshootJobUsageEvents < ClickHouse::Migration + QUERY = <<~SQL + INSERT INTO ai_usage_events + SELECT user_id, 7 as event, timestamp, namespace_path, + CAST( + (job_id, project_id, pipeline_id, merge_request_id) AS + Tuple( + job_id UInt64, + project_id UInt64, + pipeline_id UInt64, + merge_request_id UInt64 + ) + ) as extras + FROM troubleshoot_job_events + SQL + + def up + from = connection.select("SELECT minOrNull(timestamp) as min_timestamp FROM troubleshoot_job_events") + .first.fetch("min_timestamp", nil) + return unless from + + from = from.beginning_of_month + to = DateTime.current.end_of_month + 1.month + + each_week(from, to) do |month_start, month_end| + execute(query_for(month_start, month_end)) + end + + execute(query_for(to, nil)) + end + + def down + # no-op + end + + private + + def each_week(from, to) + current = from + while current <= to + yield(current, [current + 1.week, to].min) + current += 1.week + end + end + + def query_for(month_start, month_end = nil) + full_query = if month_end + "#{QUERY} WHERE timestamp >= %{start} AND timestamp < %{end}" + else + "#{QUERY} WHERE timestamp >= %{start}" + end + + full_query % { start: month_start.to_f, end: month_end&.to_f }.compact + end +end diff --git a/db/click_house/schema_migrations/main/20250611061212 b/db/click_house/schema_migrations/main/20250611061212 new file mode 100644 index 0000000000000000000000000000000000000000..fb7a9e5e02d3616d0660713d47042fdc120c4f98 --- /dev/null +++ b/db/click_house/schema_migrations/main/20250611061212 @@ -0,0 +1 @@ +8a3f178abb90ad07956fc5722a7943fa93f8269b38d7dd5ea66a0ff9c64b63f9 \ No newline at end of file diff --git a/db/click_house/schema_migrations/main/20250612061212 b/db/click_house/schema_migrations/main/20250612061212 new file mode 100644 index 0000000000000000000000000000000000000000..cd789166a89b509dbee72b5ea97ad559ee503037 --- /dev/null +++ b/db/click_house/schema_migrations/main/20250612061212 @@ -0,0 +1 @@ +109b5f0ccc2b0a3db4cb4e907a10ebd15f242f70c7e34a01d57531caeb2f084e \ No newline at end of file diff --git a/db/click_house/schema_migrations/main/20250612061213 b/db/click_house/schema_migrations/main/20250612061213 new file mode 100644 index 0000000000000000000000000000000000000000..34c4807317a046746d6c868a32d6ffbdf9938037 --- /dev/null +++ b/db/click_house/schema_migrations/main/20250612061213 @@ -0,0 +1 @@ +026186c2c271d5fb896d85887962749503b536c09e26ce7f0f3f26dcd6b7423c \ No newline at end of file diff --git a/db/click_house/schema_migrations/main/20250612061214 b/db/click_house/schema_migrations/main/20250612061214 new file mode 100644 index 0000000000000000000000000000000000000000..f52d14283288bbf8af13fa22d36829b5e2e2bfa8 --- /dev/null +++ b/db/click_house/schema_migrations/main/20250612061214 @@ -0,0 +1 @@ +b33cf52b7b6f85f739ad0424fd49feeb0241eb39f3b465b23e3a263fe943a0da \ No newline at end of file diff --git a/db/docs/ai_usage_events.yml b/db/docs/ai_usage_events.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d8bfa28f6ce30b9e0c3144166632b0f25947f22 --- /dev/null +++ b/db/docs/ai_usage_events.yml @@ -0,0 +1,13 @@ +--- +table_name: ai_usage_events +classes: +- Ai::UsageEvent +feature_categories: +- value_stream_management +description: Database storage for raw AI usage events. Partitioned by month. +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163514 +milestone: '18.2' +gitlab_schema: gitlab_main_cell +sharding_key: + organization_id: organizations +table_size: small diff --git a/db/migrate/20250613075331_create_ai_usage_events.rb b/db/migrate/20250613075331_create_ai_usage_events.rb new file mode 100644 index 0000000000000000000000000000000000000000..9e765977aa6c82dcda37f3e011f47c809d773ebb --- /dev/null +++ b/db/migrate/20250613075331_create_ai_usage_events.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class CreateAiUsageEvents < Gitlab::Database::Migration[2.3] + disable_ddl_transaction! + milestone '18.2' + + def change + # rubocop:disable Migration/Datetime -- "timestamp" is a column name + create_table :ai_usage_events, + options: 'PARTITION BY RANGE (timestamp)', + primary_key: [:id, :timestamp] do |t| + t.bigserial :id, null: false + t.datetime_with_timezone :timestamp, null: false + t.belongs_to :user, null: false + t.references :organization, foreign_key: { on_delete: :cascade }, null: false + t.datetime_with_timezone :created_at, null: false + t.integer :event, null: false, limit: 5 + t.text :namespace_path, limit: 255 + t.jsonb :extras, default: {}, null: false + end + # rubocop:enable Migration/Datetime + end +end diff --git a/db/schema_migrations/20250613075331 b/db/schema_migrations/20250613075331 new file mode 100644 index 0000000000000000000000000000000000000000..540ce5e69224dac4ac837b8fd7d9b6a089febee0 --- /dev/null +++ b/db/schema_migrations/20250613075331 @@ -0,0 +1 @@ +6e25611a83aa1fb903d8fd38dcd28815b36867bb2ddae224d1c52f1fe3b9b210 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 9d3409cc42a53e2c3e69c65dbb6eab0ecc678177..97547d6936239837004caf41df673913e6a7fc00 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -4527,6 +4527,19 @@ CREATE TABLE ai_troubleshoot_job_events ( ) PARTITION BY RANGE ("timestamp"); +CREATE TABLE ai_usage_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + organization_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + event bigint NOT NULL, + namespace_path text, + extras jsonb DEFAULT '{}'::jsonb NOT NULL, + CONSTRAINT check_ed9abb9565 CHECK ((char_length(namespace_path) <= 255)) +) +PARTITION BY RANGE ("timestamp"); + CREATE TABLE audit_events ( id bigint NOT NULL, author_id bigint NOT NULL, @@ -8156,6 +8169,15 @@ CREATE SEQUENCE ai_troubleshoot_job_events_id_seq ALTER SEQUENCE ai_troubleshoot_job_events_id_seq OWNED BY ai_troubleshoot_job_events.id; +CREATE SEQUENCE ai_usage_events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE ai_usage_events_id_seq OWNED BY ai_usage_events.id; + CREATE TABLE ai_user_metrics ( user_id bigint NOT NULL, last_duo_activity_on date NOT NULL @@ -27027,6 +27049,8 @@ ALTER TABLE ONLY ai_settings ALTER COLUMN id SET DEFAULT nextval('ai_settings_id ALTER TABLE ONLY ai_troubleshoot_job_events ALTER COLUMN id SET DEFAULT nextval('ai_troubleshoot_job_events_id_seq'::regclass); +ALTER TABLE ONLY ai_usage_events ALTER COLUMN id SET DEFAULT nextval('ai_usage_events_id_seq'::regclass); + ALTER TABLE ONLY ai_vectorizable_files ALTER COLUMN id SET DEFAULT nextval('ai_vectorizable_files_id_seq'::regclass); ALTER TABLE ONLY alert_management_alert_assignees ALTER COLUMN id SET DEFAULT nextval('alert_management_alert_assignees_id_seq'::regclass); @@ -29069,6 +29093,9 @@ ALTER TABLE ONLY ai_testing_terms_acceptances ALTER TABLE ONLY ai_troubleshoot_job_events ADD CONSTRAINT ai_troubleshoot_job_events_pkey PRIMARY KEY (id, "timestamp"); +ALTER TABLE ONLY ai_usage_events + ADD CONSTRAINT ai_usage_events_pkey PRIMARY KEY (id, "timestamp"); + ALTER TABLE ONLY ai_user_metrics ADD CONSTRAINT ai_user_metrics_pkey PRIMARY KEY (user_id); @@ -34066,6 +34093,10 @@ CREATE INDEX index_ai_troubleshoot_job_events_on_project_id ON ONLY ai_troublesh CREATE INDEX index_ai_troubleshoot_job_events_on_user_id ON ONLY ai_troubleshoot_job_events USING btree (user_id); +CREATE INDEX index_ai_usage_events_on_organization_id ON ONLY ai_usage_events USING btree (organization_id); + +CREATE INDEX index_ai_usage_events_on_user_id ON ONLY ai_usage_events USING btree (user_id); + CREATE INDEX index_ai_vectorizable_files_on_project_id ON ai_vectorizable_files USING btree (project_id); CREATE INDEX index_alert_assignees_on_alert_id ON alert_management_alert_assignees USING btree (alert_id); @@ -45286,6 +45317,9 @@ ALTER TABLE ONLY audit_events_amazon_s3_configurations ALTER TABLE ONLY boards_epic_user_preferences ADD CONSTRAINT fk_rails_851fe1510a FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; +ALTER TABLE ai_usage_events + ADD CONSTRAINT fk_rails_852725a860 FOREIGN KEY (organization_id) REFERENCES organizations(id) ON DELETE CASCADE; + ALTER TABLE ONLY value_stream_dashboard_aggregations ADD CONSTRAINT fk_rails_859b4f86f3 FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md index 4cad1bf71a17bca8c6204a5f3ee41ff15eabb8b6..701f61aa1f2f9be8a6a67dd5a0a15d51f4bacd7d 100644 --- a/doc/api/graphql/reference/_index.md +++ b/doc/api/graphql/reference/_index.md @@ -13688,6 +13688,29 @@ The edge type for [`AiSelfHostedModel`](#aiselfhostedmodel). | `cursor` | [`String!`](#string) | A cursor for use in pagination. | | `node` | [`AiSelfHostedModel`](#aiselfhostedmodel) | The item at the end of the edge. | +#### `AiUsageEventConnection` + +The connection type for [`AiUsageEvent`](#aiusageevent). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `edges` | [`[AiUsageEventEdge]`](#aiusageeventedge) | A list of edges. | +| `nodes` | [`[AiUsageEvent]`](#aiusageevent) | A list of nodes. | +| `pageInfo` | [`PageInfo!`](#pageinfo) | Information to aid in pagination. | + +#### `AiUsageEventEdge` + +The edge type for [`AiUsageEvent`](#aiusageevent). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `cursor` | [`String!`](#string) | A cursor for use in pagination. | +| `node` | [`AiUsageEvent`](#aiusageevent) | The item at the end of the edge. | + #### `AiUserMetricsConnection` The connection type for [`AiUserMetrics`](#aiusermetrics). @@ -21625,8 +21648,20 @@ Usage data for events stored in the default PostgreSQL database. Data retained f | Name | Type | Description | | ---- | ---- | ----------- | +| `all` | [`AiUsageEventConnection`](#aiusageeventconnection) | All Duo usage events. (see [Connections](#connections)) | | `codeSuggestionEvents` | [`CodeSuggestionEventConnection`](#codesuggestioneventconnection) | Events related to code suggestions. (see [Connections](#connections)) | +### `AiUsageEvent` + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `event` | [`AiUsageEventType!`](#aiusageeventtype) | Type of the event. | +| `id` | [`ID!`](#id) | ID of the code suggestion event. | +| `timestamp` | [`Time!`](#time) | When the event happened. | +| `user` | [`UserCore!`](#usercore) | User associated with the event. | + ### `AiUserMetrics` Pre-aggregated per-user metrics for GitLab Code Suggestions and GitLab Duo Chat. Require ClickHouse to be enabled and GitLab Ultimate with the Duo Enterprise add-on. @@ -43711,6 +43746,18 @@ Type of code suggestion event. | `CODE_SUGGESTION_REJECTED_IN_IDE` | Code suggestion rejected. | | `CODE_SUGGESTION_SHOWN_IN_IDE` | Code suggestion shown. | +### `AiUsageEventType` + +Type of code suggestion event. + +| Value | Description | +| ----- | ----------- | +| `CODE_SUGGESTION_ACCEPTED_IN_IDE` | Code suggestion accepted in ide. | +| `CODE_SUGGESTION_REJECTED_IN_IDE` | Code suggestion rejected in ide. | +| `CODE_SUGGESTION_SHOWN_IN_IDE` | Code suggestion shown in ide. | +| `REQUEST_DUO_CHAT_RESPONSE` | Request duo chat response. | +| `TROUBLESHOOT_JOB` | Troubleshoot job. | + ### `AlertManagementAlertSort` Values for sorting alerts. diff --git a/ee/app/finders/ai/usage_events_finder.rb b/ee/app/finders/ai/usage_events_finder.rb new file mode 100644 index 0000000000000000000000000000000000000000..10c891c9cfd4ea024b2a386e1b30aa3d08575d07 --- /dev/null +++ b/ee/app/finders/ai/usage_events_finder.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Ai + class UsageEventsFinder + include Gitlab::Utils::StrongMemoize + + attr_reader :resource, :current_user + + def initialize(current_user, resource:) + @current_user = current_user + @resource = resource + end + + def execute + return ::Ai::UsageEvent.none unless Ability.allowed?(current_user, :read_enterprise_ai_analytics, + resource) + + # rubocop: disable CodeReuse/ActiveRecord -- TODO + ::Ai::UsageEvent.where("namespace_path LIKE(?)", "#{resource.traversal_path}%") + # rubocop: enable CodeReuse/ActiveRecord + end + end +end diff --git a/ee/app/graphql/resolvers/analytics/ai_usage/usage_events_resolver.rb b/ee/app/graphql/resolvers/analytics/ai_usage/usage_events_resolver.rb new file mode 100644 index 0000000000000000000000000000000000000000..f11371f5e0b892865013f32b66cd0bc13e9f2a29 --- /dev/null +++ b/ee/app/graphql/resolvers/analytics/ai_usage/usage_events_resolver.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Resolvers + module Analytics + module AiUsage + class UsageEventsResolver < BaseResolver + type ::Types::Analytics::AiUsage::AiUsageEventType.connection_type, null: true + + def ready?(**args) + return super unless should_raise_error? + + raise Gitlab::Graphql::Errors::ArgumentError, 'Not available for this resource.' + end + + def resolve + ::Ai::UsageEventsFinder.new(current_user, resource: object).execute + end + + private + + # In this first iteration this endpoint is limited + # only to top-level groups because still there is no + # way to filter data in a reliable way. + # We can remove this check after namespace_path is populated into ai_code_suggestion_events table, + # for more information check https://gitlab.com/gitlab-org/gitlab/-/issues/490601#note_2122055518. + def should_raise_error? + return true if object.is_a?(Project) + return true unless object.root? + + false + end + end + end + end +end diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb index 3b9fbb3d292593567645285f48b1453a8127b4dc..9d3f195b358f73228c54b99dbf230705d0c74a59 100644 --- a/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb +++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb @@ -14,6 +14,10 @@ class AiUsageDataType < BaseObject field :code_suggestion_events, description: 'Events related to code suggestions.', resolver: ::Resolvers::Analytics::AiUsage::CodeSuggestionEventsResolver + + field :all, + description: 'All Duo usage events.', + resolver: ::Resolvers::Analytics::AiUsage::UsageEventsResolver end end end diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type.rb new file mode 100644 index 0000000000000000000000000000000000000000..6ebd51e7bf42c817c8eec9a4ba38cf3ccdb4afea --- /dev/null +++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Types + module Analytics + module AiUsage + # rubocop:disable Graphql/AuthorizeTypes -- authorized in parent type. + class AiUsageEventType < BaseObject + graphql_name 'AiUsageEvent' + + field :id, GraphQL::Types::ID, + null: false, description: "ID of the code suggestion event." + + field :timestamp, + Types::TimeType, + null: false, + description: 'When the event happened.' + + field :event, + AiUsageEventTypeEnum, + null: false, + description: 'Type of the event.' + + field :user, + Types::UserType, + null: false, + description: 'User associated with the event.' + end + # rubocop:enable Graphql/AuthorizeTypes + end + end +end diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type_enum.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type_enum.rb new file mode 100644 index 0000000000000000000000000000000000000000..1c2cfa5400b1a9dfe5ab16bed48ae4ec6596dc5c --- /dev/null +++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type_enum.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Types + module Analytics + module AiUsage + class AiUsageEventTypeEnum < BaseEnum + graphql_name 'AiUsageEventType' + description 'Type of code suggestion event' + + type_map = Gitlab::Tracking::AiTracking::UnifiedApproach.registered_events.keys.map do |name| + { + key: name, + value: name, + description: name.humanize + } + end + + type_map.each do |type| + value type[:key].upcase, description: type[:description], value: type[:value] + end + end + end + end +end diff --git a/ee/app/models/ai/base_usage_event.rb b/ee/app/models/ai/base_usage_event.rb new file mode 100644 index 0000000000000000000000000000000000000000..69d6a478e8a7aa65c6a05c800c3139ac9133ca03 --- /dev/null +++ b/ee/app/models/ai/base_usage_event.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +module Ai + # Deprecated in favor of Ai::UsageEvent + module BaseUsageEvent + extend ActiveSupport::Concern + include ClickHouseModel + include PartitionedTable + + class_methods do + def related_event?(event_name) + events.key?(event_name) + end + + def payload_attributes + schema_validator = validators_on(:payload).detect { |v| v.is_a?(JsonSchemaValidator) } + schema_validator.schema.value['properties'].keys + end + + def permitted_attributes + %w[user user_id organization organization_id personal_namespace_id namespace_path timestamp event].freeze + end + end + + included do + belongs_to :user + + attribute :timestamp, :datetime, default: -> { DateTime.current } + + partitioned_by :timestamp, strategy: :monthly, retain_for: 3.months + self.primary_key = :id + + validates :timestamp, :user_id, presence: true + validate :validate_recent_timestamp, on: :create + + before_validation :floor_timestamp + + validates :payload, json_schema: { filename: "#{model_name.singular}_payload" }, allow_blank: true # rubocop:disable Database/JsonbSizeLimit -- deprecated code. will be removed soon. + end + + def to_clickhouse_csv_row + { + event: self.class.events[event], + # we round to 3 digits here to avoid floating number inconsistencies. + # until https://gitlab.com/gitlab-org/gitlab/-/issues/527129 + # is resolved + timestamp: Time.zone.parse(timestamp.as_json).to_f.round(3), + user_id: user&.id, + namespace_path: namespace_path + } + end + + # Default to empty hash if payload is empty + def payload + super || {} + end + + def store_to_pg + return false unless valid? + + Ai::UsageEventWriteBuffer.add(self.class.name, attributes.compact) + end + + private + + def floor_timestamp + # we floor to 3 digits here to match current JSON rounding used in Write Buffers. + # That creates consistency between PG and CH until https://gitlab.com/gitlab-org/gitlab/-/issues/527129 + # is resolved + self.timestamp = timestamp&.floor(3) + end + + def validate_recent_timestamp + return unless timestamp && timestamp < self.class.partitioning_strategy.retain_for.ago + + errors.add(:timestamp, _('must be 3 months old at the most')) + end + end +end diff --git a/ee/app/models/ai/code_suggestion_event.rb b/ee/app/models/ai/code_suggestion_event.rb index 99c97cfb698d1de4fbaaa5e4e889598d85f1f381..fcb926be6ec7701cee69061ee1b31b29ae41bc83 100644 --- a/ee/app/models/ai/code_suggestion_event.rb +++ b/ee/app/models/ai/code_suggestion_event.rb @@ -1,9 +1,10 @@ # frozen_string_literal: true module Ai + # Deprecated in favor of Ai::UsageEvent class CodeSuggestionEvent < ApplicationRecord include EachBatch - include UsageEvent + include BaseUsageEvent self.table_name = "ai_code_suggestion_events" self.clickhouse_table_name = "code_suggestion_events" diff --git a/ee/app/models/ai/duo_chat_event.rb b/ee/app/models/ai/duo_chat_event.rb index 348eb782775332126dc1f0f8a98eef885922172c..9e11a1e89aa04be5b39b036cfeae8df863562293 100644 --- a/ee/app/models/ai/duo_chat_event.rb +++ b/ee/app/models/ai/duo_chat_event.rb @@ -1,9 +1,10 @@ # frozen_string_literal: true module Ai + # Deprecated in favor of Ai::UsageEvent class DuoChatEvent < ApplicationRecord include EachBatch - include UsageEvent + include BaseUsageEvent self.table_name = "ai_duo_chat_events" self.clickhouse_table_name = "duo_chat_events" diff --git a/ee/app/models/ai/troubleshoot_job_event.rb b/ee/app/models/ai/troubleshoot_job_event.rb index b298da543618991d5bdf55a3a39e4faba5e19527..b49adb55c94cbe8f088bab06dadc13c34a22f2f4 100644 --- a/ee/app/models/ai/troubleshoot_job_event.rb +++ b/ee/app/models/ai/troubleshoot_job_event.rb @@ -1,8 +1,9 @@ # frozen_string_literal: true module Ai + # Deprecated in favor of Ai::UsageEvent class TroubleshootJobEvent < ApplicationRecord - include UsageEvent + include BaseUsageEvent self.table_name = "ai_troubleshoot_job_events" self.clickhouse_table_name = "troubleshoot_job_events" diff --git a/ee/app/models/ai/usage_event.rb b/ee/app/models/ai/usage_event.rb index 58601b9e9e06e658bce31114fbbf74720e6458dc..8535f1262fef02ed9ab68d3f3327c12309cc8b4a 100644 --- a/ee/app/models/ai/usage_event.rb +++ b/ee/app/models/ai/usage_event.rb @@ -1,41 +1,30 @@ # frozen_string_literal: true module Ai - module UsageEvent - extend ActiveSupport::Concern + class UsageEvent < ApplicationRecord + include EachBatch include ClickHouseModel include PartitionedTable - class_methods do - def related_event?(event_name) - events.key?(event_name) - end + self.table_name = "ai_usage_events" + self.clickhouse_table_name = "ai_usage_events" - def payload_attributes - schema_validator = validators_on(:payload).detect { |v| v.is_a?(JsonSchemaValidator) } - schema_validator.schema.value['properties'].keys - end + partitioned_by :timestamp, strategy: :monthly, retain_for: 3.months + self.primary_key = :id - def permitted_attributes - %w[user user_id organization organization_id personal_namespace_id namespace_path timestamp event].freeze - end - end - - included do - belongs_to :user - - attribute :timestamp, :datetime, default: -> { DateTime.current } + populate_sharding_key(:organization_id) { Gitlab::Current::Organization.new(user: user).organization&.id } - partitioned_by :timestamp, strategy: :monthly, retain_for: 3.months - self.primary_key = :id + belongs_to :user + belongs_to :organization, class_name: 'Organizations::Organization' + attribute :timestamp, :datetime, default: -> { DateTime.current } - validates :timestamp, :user_id, presence: true - validate :validate_recent_timestamp, on: :create + enum :event, ::Gitlab::Tracking::AiTracking::UnifiedApproach.registered_events - before_validation :floor_timestamp + validates :timestamp, :user_id, :organization_id, presence: true + validates :extras, json_schema: { filename: "ai_usage_event_extras", size_limit: 16.kilobytes } + validate :validate_recent_timestamp, on: :create - validates :payload, json_schema: { filename: "#{model_name.singular}_payload" }, allow_blank: true - end + before_validation :floor_timestamp def to_clickhouse_csv_row { @@ -45,15 +34,11 @@ def to_clickhouse_csv_row # is resolved timestamp: Time.zone.parse(timestamp.as_json).to_f.round(3), user_id: user&.id, - namespace_path: namespace_path + namespace_path: namespace_path, + extras: extras.to_json } end - # Default to empty hash if payload is empty - def payload - super || {} - end - def store_to_pg return false unless valid? diff --git a/ee/app/validators/json_schemas/ai_usage_event_extras.json b/ee/app/validators/json_schemas/ai_usage_event_extras.json new file mode 100644 index 0000000000000000000000000000000000000000..d97109a70130f454331b0f454529ac8dabda8786 --- /dev/null +++ b/ee/app/validators/json_schemas/ai_usage_event_extras.json @@ -0,0 +1,7 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "AI usage event extra properties", + "type": "object", + "properties": {}, + "additionalProperties": true +} diff --git a/ee/app/workers/click_house/dump_all_write_buffers_cron_worker.rb b/ee/app/workers/click_house/dump_all_write_buffers_cron_worker.rb index fb3b3124cbfba409f6e1313ea3297261d90f42fe..0d6d0aa91f2693396d1a7a9633624a6e0082d21b 100644 --- a/ee/app/workers/click_house/dump_all_write_buffers_cron_worker.rb +++ b/ee/app/workers/click_house/dump_all_write_buffers_cron_worker.rb @@ -12,7 +12,8 @@ class DumpAllWriteBuffersCronWorker TABLES = [ Ai::CodeSuggestionEvent, Ai::DuoChatEvent, - Ai::TroubleshootJobEvent + Ai::TroubleshootJobEvent, + Ai::UsageEvent ].map(&:clickhouse_table_name).freeze def perform diff --git a/ee/app/workers/usage_events/dump_write_buffer_cron_worker.rb b/ee/app/workers/usage_events/dump_write_buffer_cron_worker.rb index d446770e0ec9c8d073fbce9c02cc807a1c7147ea..8428dc486c1fb97bc01652ff88bbeea8fc23c87e 100644 --- a/ee/app/workers/usage_events/dump_write_buffer_cron_worker.rb +++ b/ee/app/workers/usage_events/dump_write_buffer_cron_worker.rb @@ -13,7 +13,7 @@ class DumpWriteBufferCronWorker MAX_RUNTIME = 200.seconds BATCH_SIZE = 1000 - MODELS = [Ai::DuoChatEvent, Ai::CodeSuggestionEvent, Ai::TroubleshootJobEvent].freeze + MODELS = [Ai::DuoChatEvent, Ai::CodeSuggestionEvent, Ai::TroubleshootJobEvent, Ai::UsageEvent].freeze def perform total_inserted_rows = 0 diff --git a/ee/config/events/troubleshoot_job.yml b/ee/config/events/troubleshoot_job.yml new file mode 100644 index 0000000000000000000000000000000000000000..51eeb34d5b5c063a0037376787136c3b6e7c76cc --- /dev/null +++ b/ee/config/events/troubleshoot_job.yml @@ -0,0 +1,19 @@ +--- +description: TODO +internal_events: true +action: troubleshoot_job +additional_properties: + label: + description: not actually used - to be fixed in https://gitlab.com/gitlab-org/gitlab/-/issues/501387 + property: + description: not actually used - to be fixed in https://gitlab.com/gitlab-org/gitlab/-/issues/501387 +identifiers: + - user +product_group: code_creation +milestone: '17.1' +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/153881 +tiers: + - premium + - ultimate +extra_trackers: + - tracking_class: Gitlab::Tracking::AiTracking diff --git a/ee/config/feature_flags/gitlab_com_derisk/ai_events_unified_approach.yml b/ee/config/feature_flags/gitlab_com_derisk/ai_events_unified_approach.yml new file mode 100644 index 0000000000000000000000000000000000000000..e1f00ab07c59b8de84b7491796db9ee80da21077 --- /dev/null +++ b/ee/config/feature_flags/gitlab_com_derisk/ai_events_unified_approach.yml @@ -0,0 +1,10 @@ +--- +name: ai_events_unified_approach +description: +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/538343 +introduced_by_url: +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/549508 +milestone: '18.2' +group: group::optimize +type: gitlab_com_derisk +default_enabled: false diff --git a/ee/lib/gitlab/tracking/ai_tracking.rb b/ee/lib/gitlab/tracking/ai_tracking.rb index eac3f380c9c7b47d0cd2a8191ff65cd1fb0f5d23..017a9b5f2f2b2d38ebe4cbd687c4b26b76c0b662 100644 --- a/ee/lib/gitlab/tracking/ai_tracking.rb +++ b/ee/lib/gitlab/tracking/ai_tracking.rb @@ -3,59 +3,18 @@ module Gitlab module Tracking module AiTracking - POSSIBLE_MODELS = [::Ai::CodeSuggestionEvent, ::Ai::DuoChatEvent, ::Ai::TroubleshootJobEvent].freeze - class << self def track_event(event_name, **context_hash) - event = build_event_model(event_name, context_hash) - - return unless event + OldApproach.track_event(event_name, **context_hash) - store_to_clickhouse(event) - store_to_postgres(event) + return unless Feature.enabled?(:ai_events_unified_approach, context_hash[:user]) - track_user_activity(context_hash[:user]) + UnifiedApproach.track_event(event_name, **context_hash) end def track_user_activity(user) ::Ai::UserMetrics.refresh_last_activity_on(user) end - - private - - def build_event_model(event_name, context_hash = {}) - matched_model = POSSIBLE_MODELS.detect { |model| model.related_event?(event_name) } - return unless matched_model - - context_hash = context_hash.with_indifferent_access - - context_hash[:event] = event_name - context_hash[:project] ||= ::Project.find(context_hash[:project_id]) if context_hash[:project_id] - context_hash[:namespace] ||= ::Namespace.find(context_hash[:namespace_id]) if context_hash[:namespace_id] - - context_hash[:namespace_path] ||= build_traversal_path(context_hash) - - basic_attributes = context_hash.slice(*matched_model.permitted_attributes) - payload_attributes = context_hash.slice(*matched_model.payload_attributes) - - matched_model.new(basic_attributes.merge(payload: payload_attributes)) - end - - def store_to_clickhouse(event) - return unless ::Gitlab::ClickHouse.globally_enabled_for_analytics? - - event.store_to_clickhouse - end - - def store_to_postgres(event) - return unless event.respond_to?(:store_to_pg) - - event.store_to_pg - end - - def build_traversal_path(context_hash) - context_hash[:project]&.project_namespace&.traversal_path || context_hash[:namespace]&.traversal_path - end end end end diff --git a/ee/lib/gitlab/tracking/ai_tracking/old_approach.rb b/ee/lib/gitlab/tracking/ai_tracking/old_approach.rb new file mode 100644 index 0000000000000000000000000000000000000000..f42bd1205abd1d24a59316103213783f2e1223b6 --- /dev/null +++ b/ee/lib/gitlab/tracking/ai_tracking/old_approach.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module Tracking + module AiTracking + module OldApproach + POSSIBLE_MODELS = [::Ai::CodeSuggestionEvent, ::Ai::DuoChatEvent, ::Ai::TroubleshootJobEvent].freeze + class << self + def track_event(event_name, **context_hash) + event = build_event_model(event_name, context_hash) + + return unless event + + store_to_clickhouse(event) + store_to_postgres(event) + + ::Ai::UserMetrics.refresh_last_activity_on(context_hash[:user]) + end + + def track_user_activity(user) + ::Ai::UserMetrics.refresh_last_activity_on(user) + end + + private + + def build_event_model(event_name, context_hash = {}) + matched_model = POSSIBLE_MODELS.detect { |model| model.related_event?(event_name) } + return unless matched_model + + context_hash = context_hash.with_indifferent_access + + context_hash[:event] = event_name + context_hash[:project] ||= ::Project.find(context_hash[:project_id]) if context_hash[:project_id] + context_hash[:namespace] ||= ::Namespace.find(context_hash[:namespace_id]) if context_hash[:namespace_id] + + context_hash[:namespace_path] ||= build_traversal_path(context_hash) + + basic_attributes = context_hash.slice(*matched_model.permitted_attributes) + payload_attributes = context_hash.slice(*matched_model.payload_attributes) + + matched_model.new(basic_attributes.merge(payload: payload_attributes)) + end + + def store_to_clickhouse(event) + return unless ::Gitlab::ClickHouse.globally_enabled_for_analytics? + + event.store_to_clickhouse + end + + def store_to_postgres(event) + return unless event.respond_to?(:store_to_pg) + + event.store_to_pg + end + + def build_traversal_path(context_hash) + context_hash[:project]&.project_namespace&.traversal_path || context_hash[:namespace]&.traversal_path + end + end + end + end + end +end diff --git a/ee/lib/gitlab/tracking/ai_tracking/unified_approach.rb b/ee/lib/gitlab/tracking/ai_tracking/unified_approach.rb new file mode 100644 index 0000000000000000000000000000000000000000..40b78819c0888b5b1aa23eaa5d5b8f254a1b8648 --- /dev/null +++ b/ee/lib/gitlab/tracking/ai_tracking/unified_approach.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +module Gitlab + module Tracking + module AiTracking + module UnifiedApproach + extend AiUsageEventsRegistryDsl + + register do + events( + # code_suggestions_requested: 1, old data https://gitlab.com/gitlab-org/gitlab/-/issues/462809 + code_suggestion_shown_in_ide: 2, + code_suggestion_accepted_in_ide: 3, + code_suggestion_rejected_in_ide: 4 + # code_suggestion_direct_access_token_refresh: 5 old data https://gitlab.com/gitlab-org/gitlab/-/issues/462809 + ) do |context| + context.slice(*%w[unique_tracking_id suggestion_size language branch_name]) + end + + events(request_duo_chat_response: 6) + + events(troubleshoot_job: 7) do |context| + { + job_id: context['job'].id, + project_id: context['job'].project_id, + pipeline_id: context['job'].pipeline.id, + merge_request: context['job'].pipeline.merge_request_id + } + end + + transformation(:troubleshoot_job) do + { foo: 'bar' } + end + end + + class << self + def track_event(event_name, **context_hash) + return unless registered_events.key?(event_name.to_s) + + event = build_event_model(event_name, context_hash) + + store_to_postgres(event) + store_to_clickhouse(event) + + ::Ai::UserMetrics.refresh_last_activity_on(context_hash[:user]) + end + + private + + def base_attributes + %w[user timestamp event namespace_path].freeze + end + + def build_event_model(event_name, context_hash = {}) + context_hash = context_hash.with_indifferent_access + + attributes = apply_transformations(event_name, context_hash) + + basic_attributes = context_hash.slice(*base_attributes).merge(attributes.slice(*base_attributes)) + extra_attributes = attributes.except(*base_attributes) + + ::Ai::UsageEvent.new(basic_attributes.merge(event: event_name, extras: extra_attributes)) + end + + def store_to_clickhouse(event) + return unless ::Gitlab::ClickHouse.globally_enabled_for_analytics? + + event.store_to_clickhouse + end + + def store_to_postgres(event) + event.store_to_pg + end + + def apply_transformations(event_name, context_hash) + event_transformations = registered_transformations(event_name) + + attributes = event_transformations.inject({}.with_indifferent_access) do |acc, block| + acc.merge(block.call(context_hash.merge(acc))) + end.compact + + attributes[:namespace_path] ||= guess_namespace_path(context_hash.merge(attributes)) + attributes + end + + def guess_namespace_path(context_hash) + related_namespace(context_hash)&.traversal_path + end + + def related_namespace(context_hash) + # Order matters. project should take precedence over namespace + project = if context_hash[:project] + context_hash[:project] + elsif context_hash[:project_id] + ::Project.find_by_id(context_hash[:project_id]) + end + + return project.project_namespace if project + + if context_hash[:namespace] + context_hash[:namespace] + elsif context_hash[:namespace_id] + ::Namespace.find_by_id(context_hash[:namespace_id]) + end + end + end + end + end + end +end diff --git a/ee/lib/gitlab/tracking/ai_usage_events_registry_dsl.rb b/ee/lib/gitlab/tracking/ai_usage_events_registry_dsl.rb new file mode 100644 index 0000000000000000000000000000000000000000..65b9f050ac9df0ed008a2e647fb73634c58faf9a --- /dev/null +++ b/ee/lib/gitlab/tracking/ai_usage_events_registry_dsl.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Gitlab + module Tracking + # rubocop:disable Gitlab/ModuleWithInstanceVariables -- it's a class level DSL. It's intended to be a module. + module AiUsageEventsRegistryDsl + def register(&block) + @registered_events ||= {}.with_indifferent_access + instance_eval(&block) + end + + def events(names_with_ids, &event_transformation) + names_with_ids.each do |name, id| + guard_internal_event_existence!(name) + guard_duplicated_event!(name, id) + @registered_events[name] ||= { id: id, transformations: [] } + transformation(name, &event_transformation) if event_transformation + end + end + + def transformation(*names, &block) + return unless block + + names.each do |name| + @registered_events[name][:transformations] << block + end + end + + def registered_events + return {} unless @registered_events + + @registered_events.transform_values { |options| options[:id] } + end + + def registered_transformations(event_name) + return [] unless @registered_events + + @registered_events[event_name]&.fetch(:transformations) + end + + private + + def guard_internal_event_existence!(event_name) + return if Gitlab::Tracking::EventDefinition.internal_event_exists?(event_name.to_s) + + raise "Event #{event_name} is not defined in InternalEvents" + end + + def guard_duplicated_event!(name, id) + return unless @registered_events + raise "Event with name `#{name}` was already registered" if @registered_events[name] + raise "Event with id `#{id}` was already registered" if @registered_events.detect { |_n, e| e[:id] == id } + end + end + # rubocop:enable Gitlab/ModuleWithInstanceVariables + end +end diff --git a/ee/spec/factories/ai/usage_events.rb b/ee/spec/factories/ai/usage_events.rb new file mode 100644 index 0000000000000000000000000000000000000000..6978743347e38b0aacdfefe836b7bd89205908d9 --- /dev/null +++ b/ee/spec/factories/ai/usage_events.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :ai_usage_event, class: '::Ai::UsageEvent' do + event { 'request_duo_chat_response' } + association :user, :with_namespace + payload { {} } + end +end