diff --git a/ee/config/feature_flags/wip/ai_gateway_docs_search.yml b/ee/config/feature_flags/wip/ai_gateway_docs_search.yml new file mode 100644 index 0000000000000000000000000000000000000000..7c6e18b4608de42ec259a866e81075113df8313d --- /dev/null +++ b/ee/config/feature_flags/wip/ai_gateway_docs_search.yml @@ -0,0 +1,9 @@ +--- +name: ai_gateway_docs_search +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/451215 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/149804 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/456681 +milestone: '17.0' +group: group::duo chat +type: wip +default_enabled: false diff --git a/ee/lib/gitlab/llm/ai_gateway/docs_client.rb b/ee/lib/gitlab/llm/ai_gateway/docs_client.rb new file mode 100644 index 0000000000000000000000000000000000000000..f8b5c5be90415a9e227f47c68682280d8433664c --- /dev/null +++ b/ee/lib/gitlab/llm/ai_gateway/docs_client.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module Gitlab + module Llm + module AiGateway + class DocsClient + include ::Gitlab::Llm::Concerns::ExponentialBackoff + include ::Gitlab::Llm::Concerns::EventTracking + include ::Gitlab::Utils::StrongMemoize + include ::API::Helpers::CloudConnector + + DEFAULT_TIMEOUT = 30.seconds + DEFAULT_TYPE = 'search-docs' + DEFAULT_SOURCE = 'GitLab EE' + + def initialize(user, tracking_context: {}) + @user = user + @tracking_context = tracking_context + @logger = Gitlab::Llm::Logger.build + end + + def search(query:, **options) + return unless enabled? + + perform_search_request(query: query, options: options.except(:stream)) + end + + private + + attr_reader :user, :logger, :tracking_context + + def perform_search_request(query:, options:) + logger.info(message: "Searching docs from AI Gateway", options: options) + timeout = options.delete(:timeout) || DEFAULT_TIMEOUT + + response = Gitlab::HTTP.post( + "#{Gitlab::AiGateway.url}/v1/search/docs", + headers: request_headers, + body: request_body(query: query).to_json, + timeout: timeout, + allow_local_requests: true + ) + + logger.info_or_debug(user, message: "Searched docs from AI Gateway", response: response) + + response + end + + def enabled? + access_token.present? + end + + def request_headers + { + 'X-Gitlab-Host-Name' => Gitlab.config.gitlab.host, + 'X-Gitlab-Authentication-Type' => 'oidc', + 'Authorization' => "Bearer #{access_token}", + 'Content-Type' => 'application/json', + 'X-Request-ID' => Labkit::Correlation::CorrelationId.current_or_new_id + }.merge(cloud_connector_headers(user)) + end + + def access_token + Gitlab::Llm::AiGateway::Client.access_token(scopes: [:documentation_search]) + end + strong_memoize_attr :access_token + + def request_body(query:) + { + type: DEFAULT_TYPE, + metadata: { + source: DEFAULT_SOURCE, + version: Gitlab.version_info.to_s + }, + payload: { + query: query + } + } + end + end + end + end +end diff --git a/ee/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot.rb b/ee/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot.rb index 9533e44f81caaf92b3ef0dbff16338881abc563a..9a5e96faaebeb08a1ca76b300f463a68d799f709 100644 --- a/ee/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot.rb +++ b/ee/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot.rb @@ -11,8 +11,9 @@ class TanukiBot < Gitlab::Llm::BaseResponseModifier CONTENT_ID_REGEX = /CNT-IDX-(?\d+)/ NO_ANSWER_REGEX = /i do.*n.+know/i - def initialize(ai_response, current_user) + def initialize(ai_response, current_user, search_documents: nil) @current_user = current_user + @search_documents = search_documents&.map(&:with_indifferent_access) super(ai_response) end @@ -32,7 +33,7 @@ def errors private - attr_reader :current_user + attr_reader :current_user, :search_documents def parsed_response text = ai_response&.dig(:completion).to_s.strip @@ -42,24 +43,42 @@ def parsed_response message, source_ids = text.split("#{CONTENT_ID_FIELD}:") message.strip! + sources = if source_ids.blank? + [] + elsif message.match?(NO_ANSWER_REGEX) + [] + elsif search_documents + find_sources_with_search_documents(source_ids) + else + find_sources(source_ids) + end + { content: message, extras: { - sources: message.match?(NO_ANSWER_REGEX) ? [] : find_sources(source_ids) + sources: sources } } end strong_memoize_attr :parsed_response def find_sources(source_ids) - return [] if source_ids.blank? - ids = source_ids.match(CONTENT_ID_REGEX).captures.map(&:to_i) documents = ::Embedding::Vertex::GitlabDocumentation.id_in(ids).select(:url, :metadata) documents.map do |doc| { source_url: doc.url }.merge(doc.metadata) end.uniq end + + def find_sources_with_search_documents(source_ids) + ids = source_ids.scan(/CNT-IDX-(?[0-9a-z]+)/).flatten + documents = search_documents.select { |doc| ids.include?(doc[:id]) } + documents.map! do |doc| + { source_url: doc[:metadata]['filename'] }.merge(doc[:metadata]).symbolize_keys + end + documents.uniq! + documents + end end end end diff --git a/ee/lib/gitlab/llm/tanuki_bot.rb b/ee/lib/gitlab/llm/tanuki_bot.rb index 64b2b65951aa0ec14b5171cea7c176add2954379..2b701c58d770c0be817181a3cbece3a0b5a5de3e 100644 --- a/ee/lib/gitlab/llm/tanuki_bot.rb +++ b/ee/lib/gitlab/llm/tanuki_bot.rb @@ -41,6 +41,14 @@ def execute(&block) return empty_response unless question.present? return empty_response unless self.class.enabled_for?(user: current_user) + if ::Feature.enabled?(:ai_gateway_docs_search) + search_documents = get_search_results(question) + + return empty_response if search_documents.blank? + + return get_completions_ai_gateway(search_documents, &block) + end + unless ::Embedding::Vertex::GitlabDocumentation.table_exists? logger.info_or_debug(current_user, message: "Embeddings database does not exist") @@ -94,6 +102,13 @@ def get_nearest_neighbors(embedding) end traceable :get_nearest_neighbors, name: 'Retrieve GitLab documents', run_type: 'retriever' + def get_search_results(question) + response = Gitlab::Llm::AiGateway::DocsClient.new(current_user) + .search(query: question) || {} + + response.dig('response', 'results')&.map(&:with_indifferent_access) + end + private attr_reader :current_user, :question, :logger, :correlation_id, :tracking_context @@ -106,6 +121,10 @@ def anthropic_client @anthropic_client ||= ::Gitlab::Llm::Anthropic::Client.new(current_user, tracking_context: tracking_context) end + def ai_gateway_client + @ai_gateway_client ||= ::Gitlab::Llm::AiGateway::Client.new(current_user, tracking_context: tracking_context) + end + def get_completions(search_documents) final_prompt = Gitlab::Llm::Anthropic::Templates::TanukiBot .final_prompt(question: question, documents: search_documents) @@ -126,6 +145,26 @@ def get_completions(search_documents) ) end + def get_completions_ai_gateway(search_documents) + final_prompt = Gitlab::Llm::Anthropic::Templates::TanukiBot + .final_prompt(question: question, documents: search_documents) + + final_prompt_result = ai_gateway_client.stream( + prompt: final_prompt[:prompt] + ) do |data| + yield data if block_given? + end + + logger.info_or_debug(current_user, + message: "Got Final Result", prompt: final_prompt[:prompt], response: final_prompt_result) + + Gitlab::Llm::Anthropic::ResponseModifiers::TanukiBot.new( + { completion: final_prompt_result }.to_json, + current_user, + search_documents: search_documents + ) + end + def empty_response Gitlab::Llm::ResponseModifiers::EmptyResponseModifier.new( _("I'm sorry, I was not able to find any documentation to answer your question.") diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/docs_client_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/docs_client_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..50717e465720a9d335f9643f84c63e321402eca5 --- /dev/null +++ b/ee/spec/lib/gitlab/llm/ai_gateway/docs_client_spec.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Llm::AiGateway::DocsClient, feature_category: :ai_abstraction_layer do + include StubRequests + + let_it_be(:user) { create(:user) } + let_it_be(:token) { create(:service_access_token, :active) } + + let(:options) { {} } + let(:expected_request_body) { default_body_params } + let(:gitlab_global_id) { API::Helpers::GlobalIds::Generator.new.generate(user) } + + let(:expected_access_token) { token.token } + let(:expected_gitlab_realm) { Gitlab::CloudConnector::GITLAB_REALM_SELF_MANAGED } + let(:expected_gitlab_host_name) { Gitlab.config.gitlab.host } + let(:expected_instance_id) { gitlab_global_id.first } + let(:expected_user_id) { gitlab_global_id.second } + let(:expected_request_headers) do + { + 'X-Gitlab-Instance-Id' => expected_instance_id, + 'X-Gitlab-Global-User-Id' => expected_user_id, + 'X-Gitlab-Host-Name' => expected_gitlab_host_name, + 'X-Gitlab-Realm' => expected_gitlab_realm, + 'X-Gitlab-Authentication-Type' => 'oidc', + 'Authorization' => "Bearer #{expected_access_token}", + 'Content-Type' => 'application/json', + 'X-Request-ID' => Labkit::Correlation::CorrelationId.current_or_new_id + } + end + + let(:default_body_params) do + { + type: described_class::DEFAULT_TYPE, + metadata: { + source: described_class::DEFAULT_SOURCE, + version: Gitlab.version_info.to_s + }, + payload: { + query: "anything" + } + } + end + + let(:expected_response) do + { "foo" => "bar" } + end + + let(:request_url) { "#{Gitlab::AiGateway.url}/v1/search/docs" } + let(:tracking_context) { { request_id: 'uuid', action: 'chat' } } + let(:response_body) { expected_response.to_json } + let(:http_status) { 200 } + let(:response_headers) { { 'Content-Type' => 'application/json' } } + + include StubRequests + + describe '#search' do + before do + stub_request(:post, request_url) + .with( + body: expected_request_body, + headers: expected_request_headers + ) + .to_return( + status: http_status, + body: response_body, + headers: response_headers + ) + end + + subject(:result) do + described_class.new(user, tracking_context: tracking_context).search(query: 'anything', **options) + end + + it 'returns response' do + expect(Gitlab::HTTP).to receive(:post).with( + anything, + hash_including(timeout: described_class::DEFAULT_TIMEOUT) + ).and_call_original + expect(result.parsed_response).to eq(expected_response) + end + + context 'when passing stream: true' do + let(:options) { { stream: true } } + + it 'does not pass stream: true as we do not want to retrieve SSE events' do + expect(Gitlab::HTTP).to receive(:post).with( + anything, + hash_excluding(:stream_body) + ).and_call_original + expect(result.parsed_response).to eq(expected_response) + end + end + + context 'when token is expired' do + before do + token.update!(expires_at: 1.day.ago) + end + + it 'returns empty hash' do + expect(Gitlab::HTTP).not_to receive(:post) + expect(result).to eq(nil) + end + end + end +end diff --git a/ee/spec/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot_spec.rb b/ee/spec/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot_spec.rb index 0068ca568de8f717935e2d757a9769dd1021ca1d..34a63f01e2cf0384a835ff8c2372acdfcc209540 100644 --- a/ee/spec/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot_spec.rb +++ b/ee/spec/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot_spec.rb @@ -56,4 +56,55 @@ end end end + + describe '#extras with search_documents' do + subject(:result) { described_class.new(ai_response, current_user, search_documents: search_documents).extras } + + let(:metadata) { { foo: 'bar', 'filename' => 'baz.md' } } + let(:search_documents) do + [ + { id: "abc123", content: '', metadata: metadata }, + { id: "efg456", content: '', metadata: metadata } + ] + end + + let(:ai_response) do + { completion: "#{text} ATTRS: CNT-IDX-abc123 ATTRS: CNT-IDX-efg456 #{text}" }.to_json + end + + context 'when the ids match existing documents' do + it 'fills sources' do + expect(result).to eq(sources: [{ source_url: 'baz.md', foo: 'bar', filename: 'baz.md' }]) + end + end + + context "when the ids don't match any documents" do + let(:search_documents) do + [ + { id: "xyz789", content: '', metadata: metadata } + ] + end + + it 'sets extras as empty' do + expect(subject).to eq(sources: []) + end + end + + context "when the there are no ids" do + let(:ai_response) { { completion: "#{text} ATTRS:" }.to_json } + + it 'sets extras as empty' do + expect(subject).to eq(sources: []) + end + end + + context "when the message contains the text I don't know" do + let(:text) { "I don't know the answer to your question" } + let(:record_id) { non_existing_record_id } + + it 'sets extras as empty' do + expect(subject).to eq(sources: []) + end + end + end end diff --git a/ee/spec/lib/gitlab/llm/tanuki_bot_spec.rb b/ee/spec/lib/gitlab/llm/tanuki_bot_spec.rb index 0b888d900c9520910f7cc748df1ee1abd78096de..a3cd98d7ad7e9bac21da7f1cbffa412b8f1e31e5 100644 --- a/ee/spec/lib/gitlab/llm/tanuki_bot_spec.rb +++ b/ee/spec/lib/gitlab/llm/tanuki_bot_spec.rb @@ -3,6 +3,7 @@ require 'spec_helper' RSpec.describe Gitlab::Llm::TanukiBot, feature_category: :duo_chat do + # rubocop:disable RSpec/MultipleMemoizedHelpers -- after ai_gateway_docs_search flag removal many let can be removed describe '#execute' do let_it_be(:user) { create(:user) } let_it_be(:embeddings) { create_list(:vertex_gitlab_documentation, 2) } @@ -21,6 +22,7 @@ let(:vertex_model) { ::Embedding::Vertex::GitlabDocumentation } let(:vertex_args) { { content: question } } let(:vertex_client) { ::Gitlab::Llm::VertexAi::Client.new(user) } + let(:ai_gateway_client) { ::Gitlab::Llm::AiGateway::Client.new(user) } let(:anthropic_client) { ::Gitlab::Llm::Anthropic::Client.new(user) } let(:embedding) { Array.new(1536, 0.5) } let(:vertex_embedding) { Array.new(768, 0.5) } @@ -31,6 +33,22 @@ let(:attrs) { embeddings.map(&:id).map { |x| "CNT-IDX-#{x}" }.join(", ") } let(:completion_response) { "#{answer} ATTRS: #{attrs}" } + let(:docs_search_client) { ::Gitlab::Llm::AiGateway::DocsClient.new(user) } + let(:docs_search_args) { { query: question } } + let(:docs_search_response) do + { + 'response' => { + 'results' => [ + { + 'id' => 1, + 'content' => 'content', + 'metadata' => 'metadata' + } + ] + } + } + end + let(:status_code) { 200 } let(:success) { true } @@ -122,6 +140,7 @@ before do allow(License).to receive(:feature_available?).and_return(true) allow(logger).to receive(:info_or_debug) + stub_feature_flags(ai_gateway_docs_search: false) end context 'when on Gitlab.com' do @@ -286,5 +305,81 @@ end end end + + describe 'execute with ai_gateway_docs_search enabled' do + before do + stub_feature_flags(ai_gateway_docs_search: true) + allow(License).to receive(:feature_available?).and_return(true) + allow(logger).to receive(:info_or_debug) + + allow(described_class).to receive(:enabled_for?).and_return(true) + + allow(::Gitlab::Llm::AiGateway::Client).to receive(:new).and_return(ai_gateway_client) + allow(::Gitlab::Llm::AiGateway::DocsClient).to receive(:new).and_return(docs_search_client) + + allow(ai_gateway_client).to receive(:stream).and_return(completion_response) + allow(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response) + end + + it 'executes calls and returns ResponseModifier' do + expect(ai_gateway_client).to receive(:stream).once.and_return(completion_response) + expect(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response) + + expect(execute).to be_an_instance_of(::Gitlab::Llm::Anthropic::ResponseModifiers::TanukiBot) + end + + it 'yields the streamed response to the given block' do + expect(ai_gateway_client) + .to receive(:stream).once + .and_yield(answer) + .and_return(completion_response) + + expect(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response) + + expect { |b| instance.execute(&b) }.to yield_with_args(answer) + end + + it 'raises an error when request failed' do + expect(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response) + allow(ai_gateway_client).to receive(:stream).once.and_yield({ "error" => { "message" => "some error" } }) + + execute + end + + context 'when user has AI features disabled' do + before do + allow(described_class).to receive(:enabled_for?).with(user: user).and_return(false) + end + + it 'returns an empty response message' do + expect(execute.response_body).to eq(empty_response_message) + end + end + + context 'when the question is not provided' do + let(:question) { nil } + + it 'returns an empty response message' do + expect(execute.response_body).to eq(empty_response_message) + end + end + + context 'when no documents are found' do + let(:docs_search_response) { {} } + + it 'returns an empty response message' do + expect(execute.response_body).to eq(empty_response_message) + end + end + + context 'when DocsClient returns nil' do + let(:docs_search_response) { nil } + + it 'returns an empty response message' do + expect(execute.response_body).to eq(empty_response_message) + end + end + end end + # rubocop:enable RSpec/MultipleMemoizedHelpers end