diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index bda4f90e7e91ff045908ddb9bb712a221087c88f..a3b3493e2e5847cf7aad89caab6be801021138f4 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -1315,6 +1315,15 @@ production: &base # Default is '.gitlab_workhorse_secret' relative to Rails.root (i.e. root of the GitLab app). # secret_file: /home/git/gitlab/.gitlab_workhorse_secret + database_traffic_capture: + config: + storage: + connector: + provider: Gcs + project_id: 'my-project' + credentials: '/path/to/keyfile.json' + bucket: 'my-bucket' + cell: # enabled: false # id: null diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index a2ff67a533f5eeb22fe766d750dea5b70efcccd3..8a7eaa7d971c7a247fed939bad5feb6f135abe94 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -1138,6 +1138,15 @@ Settings['workhorse'] ||= {} Settings.workhorse['secret_file'] ||= Rails.root.join('.gitlab_workhorse_secret') +# +# Database Traffic Capture Settings +# + +Settings['database_traffic_capture'] ||= {} +Settings.database_traffic_capture['config'] ||= {} +Settings.database_traffic_capture.config['storage'] ||= {} +Settings.database_traffic_capture.config.storage['connector'] ||= {} + # # Cells # diff --git a/lib/gitlab/database/capture/storage.rb b/lib/gitlab/database/capture/storage.rb new file mode 100644 index 0000000000000000000000000000000000000000..f897f9751aee300588e92f2ec34f8bfecf183d43 --- /dev/null +++ b/lib/gitlab/database/capture/storage.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Capture + class Storage + CONNECTORS = { + 'Gcs' => StorageConnectors::Gcs + }.freeze + + def self.upload(...) + new.upload(...) + end + + def upload(filename, data) + log("Upload request for database capture", filename) + start_monotonic_time = ::Gitlab::Metrics::System.monotonic_time + + result = connector.upload(filename, data) + + duration_s = ::Gitlab::Metrics::System.monotonic_time - start_monotonic_time + log("Database capture upload completed", filename, duration_s) + + result + rescue StandardError => error + log("Database capture upload failed: #{error}", filename) + + raise + end + + private + + # Fetches the configured provider or uses +StorageConnectors::Local+ as fallback connector. + def connector + CONNECTORS.fetch(connector_provider, StorageConnectors::Local).new(connector_settings) + end + + def connector_provider + connector_settings.try(:provider) + end + + def connector_settings + Settings.database_traffic_capture.config.storage.connector + end + + def log(message, filename, duration = nil) + info = { message: message, connector: connector_provider, filename: filename, duration: duration } + + Gitlab::AppLogger.info(info.compact) + end + end + end + end +end diff --git a/lib/gitlab/database/capture/storage_connectors/gcs.rb b/lib/gitlab/database/capture/storage_connectors/gcs.rb new file mode 100644 index 0000000000000000000000000000000000000000..8a1d20a92938813bf7bd7e643d40202e43fd6e5f --- /dev/null +++ b/lib/gitlab/database/capture/storage_connectors/gcs.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require 'google/cloud/storage' + +module Gitlab + module Database + module Capture + module StorageConnectors + # Google Cloud Storage Connector + # https://cloud.google.com/ruby/docs/reference/google-cloud-storage/latest + class Gcs + def initialize(settings) + @settings = settings + end + + # We have to escape "/" from the filename to avoid gcp to interpret as a subfolder. This can be a problem + # if we use the primary write location compose the filename, which can include an address like +"1F/4BE69098"+ + def upload(filename, data) + bucket.create_file( + StringIO.new(data), + CGI.escape(filename), + metadata: { + original_filename: filename, + encoded: true + } + ) + end + + private + + attr_reader :settings + + def client + @client ||= Google::Cloud::Storage.new(project_id: settings.project_id, credentials: settings.credentials) + end + + # Permission 'storage.buckets.get' must be granted to access to the Google Cloud Storage bucket + def bucket + @bucket ||= client.bucket(settings.bucket) + end + end + end + end + end +end diff --git a/lib/gitlab/database/capture/storage_connectors/local.rb b/lib/gitlab/database/capture/storage_connectors/local.rb new file mode 100644 index 0000000000000000000000000000000000000000..1b3077e850334d235adb90071c68cba772a8b39f --- /dev/null +++ b/lib/gitlab/database/capture/storage_connectors/local.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Capture + module StorageConnectors + # To simplify testing and development + class Local + TEMP_PATH = Rails.root.join('tmp/database-traffic-capture/storage') + + def initialize(settings) + unless Rails.env.development? || Rails.env.test? + raise ConfigurationError, "Local connector provider it's not intended to be used in production" + end + + @settings = settings + end + + def upload(filename, data) + Gitlab::PathTraversal.check_path_traversal!(filename) + + filepath = Rails.root.join(TEMP_PATH, File.basename(filename)) + + Gitlab::PathTraversal.check_allowed_absolute_path!(File.dirname(filepath), [TEMP_PATH.to_s]) + + FileUtils.mkdir_p(File.dirname(filepath)) + + File.open(filepath, 'w') do |file| + file.write(data) + end + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/capture/storage_connectors/gcs_spec.rb b/spec/lib/gitlab/database/capture/storage_connectors/gcs_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..7e1a6b978142de29878d56ba4979e6936bd430bb --- /dev/null +++ b/spec/lib/gitlab/database/capture/storage_connectors/gcs_spec.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Capture::StorageConnectors::Gcs, feature_category: :database do + let(:connector) { described_class.new(settings) } + let(:client) { instance_double(Google::Cloud::Storage::Project) } + let(:bucket) { instance_double(Google::Cloud::Storage::Bucket) } + let(:settings) do + GitlabSettings::Options.build( + provider: 'Gcs', + project_id: 'my-project', + credentials: '/path/to/keyfile.json', + bucket: 'my-bucket' + ) + end + + let(:data) do + <<~NDJSON + {"id": 1, "sql": "SELECT 1 FROM \"public\".\"users\" LIMIT 1;"} + {"id": 2, "sql": "SELECT * FROM \"public\".\"projects\" WHERE \"projects\".\"id\" = 1;"} + {"id": 3, "sql": "DELETE FROM \"public\".\"users\" WHERE \"users\".\"id\" = 1;"} + NDJSON + end + + before do + allow(Google::Cloud::Storage).to receive(:new).and_return(client) + allow(client).to receive(:bucket).and_return(bucket) + end + + describe '#upload' do + let(:filename) { "v1-main-cadf8f5a--1F/4BEAABE0" } + let(:encoded_filename) { "v1-main-cadf8f5a--1F%2F4BEAABE0" } + let(:metadata) { { original_filename: filename, encoded: true } } + + it 'uploads data to the specified bucket' do + expect(bucket).to receive(:create_file).with(instance_of(StringIO), encoded_filename, metadata: metadata) + + connector.upload(filename, data) + end + + context 'when upload fails' do + let(:error) { ArgumentError.new('Upload failed') } + + before do + allow(bucket).to receive(:create_file).and_raise(error) + end + + it 'propagates the error' do + expect { connector.upload(filename, data) }.to raise_error(error) + end + end + end +end diff --git a/spec/lib/gitlab/database/capture/storage_spec.rb b/spec/lib/gitlab/database/capture/storage_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..1804b523e13d9719341ea9cb7ddbd95ee69c9ab1 --- /dev/null +++ b/spec/lib/gitlab/database/capture/storage_spec.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Capture::Storage, feature_category: :database do + let(:filename) { 'wal_capture' } + let(:storage) { described_class.new } + let(:local_connector) { instance_double(Gitlab::Database::Capture::StorageConnectors::Local, upload: true) } + let(:gcs_connector) { instance_double(Gitlab::Database::Capture::StorageConnectors::Gcs, upload: true) } + let(:configured_settings) { {} } + let(:data) do + <<~NDJSON + {"id": 1, "sql": "SELECT 1 FROM \"public\".\"users\" LIMIT 1;"} + {"id": 2, "sql": "SELECT * FROM \"public\".\"projects\" WHERE \"projects\".\"id\" = 1;"} + {"id": 3, "sql": "DELETE FROM \"public\".\"users\" WHERE \"users\".\"id\" = 1;"} + NDJSON + end + + before do + allow(Gitlab::AppLogger).to receive(:info) + allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(0.0, 1.5) + allow(Gitlab::Database::Capture::StorageConnectors::Local).to receive(:new).and_return(local_connector) + allow(Gitlab::Database::Capture::StorageConnectors::Gcs).to receive(:new).and_return(gcs_connector) + + allow(Settings).to( + receive_message_chain(:database_traffic_capture, :config, :storage, :connector).and_return( + GitlabSettings::Options.build(configured_settings) + ) + ) + end + + describe '#upload' do + context 'when using Google connector' do + let(:configured_settings) do + { + provider: 'Gcs', + project_id: 'my-project', + credentials: '/path/to/keyfile.json', + bucket: 'my-bucket' + } + end + + it 'uses the GCS connector' do + expect(Gitlab::Database::Capture::StorageConnectors::Gcs).to( + receive(:new).with(Settings.database_traffic_capture.config.storage.connector).and_return(gcs_connector) + ) + expect(gcs_connector).to receive(:upload).with(filename, data) + + storage.upload(filename, data) + end + + it 'logs the upload request and completion' do + expect(Gitlab::AppLogger).to receive(:info).with( + { + message: 'Upload request for database capture', + connector: 'Gcs', + filename: filename, + duration: nil + }.compact + ) + + expect(Gitlab::AppLogger).to receive(:info).with( + { + message: 'Database capture upload completed', + connector: 'Gcs', + filename: filename, + duration: 1.5 + }.compact + ) + + storage.upload(filename, data) + end + end + + context 'when Settings are not configured' do + it 'falls back to Local connector' do + expect(Gitlab::Database::Capture::StorageConnectors::Local).to( + receive(:new).with(Settings.database_traffic_capture.config.storage.connector).and_return(local_connector) + ) + expect(local_connector).to receive(:upload).with(filename, data) + + storage.upload(filename, data) + end + end + + context 'when the connection fails to upload the file' do + let(:configured_settings) do + { + provider: 'Gcs', + project_id: 'my-project', + credentials: '/path/to/keyfile.json', + bucket: 'my-bucket' + } + end + + before do + allow(gcs_connector).to receive(:upload).and_raise(StandardError, 'GCS unavailable.') + end + + it 'logs the message and re-raise the error' do + expect(Gitlab::AppLogger).to receive(:info).with( + { + message: 'Database capture upload failed: GCS unavailable.', + connector: 'Gcs', + filename: filename, + duration: nil + }.compact + ) + + expect { storage.upload(filename, data) }.to raise_error('GCS unavailable.') + end + end + end +end