diff --git a/app/controllers/concerns/send_file_upload.rb b/app/controllers/concerns/send_file_upload.rb index 5e44679dbfa8ffdc21039afb90b8be0d4c8f3a9b..634255dff2757f8fce3e7bc20b9fba45979b7642 100644 --- a/app/controllers/concerns/send_file_upload.rb +++ b/app/controllers/concerns/send_file_upload.rb @@ -3,8 +3,8 @@ module SendFileUpload def send_upload( file_upload, send_params: {}, redirect_params: {}, attachment: nil, proxy: false, - disposition: 'attachment', ssrf_params: {}) - content_type = content_type_for(attachment) + disposition: 'attachment', ssrf_params: {}, log_enabled: false) + content_type = content_type_for(attachment, log_enabled) if attachment response_disposition = ActionDispatch::Http::ContentDisposition.format(disposition: disposition, @@ -41,20 +41,10 @@ def send_upload( end end - def content_type_for(attachment) + def content_type_for(attachment, log_enabled) return '' unless attachment - guess_content_type(attachment) - end - - def guess_content_type(filename) - types = MIME::Types.type_for(filename) - - if types.present? - types.first.content_type - else - "application/octet-stream" - end + ::Gitlab::Utils::MimeType.from_filename(attachment, log_enabled: log_enabled) end private diff --git a/app/controllers/projects/packages/package_files_controller.rb b/app/controllers/projects/packages/package_files_controller.rb index 0b3ab1ae1ad99968e51fbf94bf829f741b167436..e8de61f9b98543f0090071de97d5860d497d078f 100644 --- a/app/controllers/projects/packages/package_files_controller.rb +++ b/app/controllers/projects/packages/package_files_controller.rb @@ -13,8 +13,10 @@ def download package_file.package.touch_last_downloaded_at + log_enabled = package_file.package.generic? && Feature.enabled?(:packages_generic_package_content_type, project) + send_upload(package_file.file, attachment: package_file.file_name_for_download, - ssrf_params: ::Packages::SsrfProtection.params_for(package_file.package)) + ssrf_params: ::Packages::SsrfProtection.params_for(package_file.package), log_enabled: log_enabled) end end end diff --git a/app/services/lfs/file_transformer.rb b/app/services/lfs/file_transformer.rb index ef227b6e20f732a43590c861d10372961615bc5b..477c17dcecea0d9a24b47f3a200c686da027a9d4 100644 --- a/app/services/lfs/file_transformer.rb +++ b/app/services/lfs/file_transformer.rb @@ -72,7 +72,7 @@ def cached_attributes # rubocop: disable CodeReuse/ActiveRecord def create_lfs_object!(lfs_pointer_file, file_content, detect_content_type) LfsObject.find_or_create_by(oid: lfs_pointer_file.sha256, size: lfs_pointer_file.size) do |lfs_object| - lfs_object.file = if detect_content_type && (content_type = Gitlab::Utils::MimeType.from_string(file_content)) + lfs_object.file = if detect_content_type && (content_type = ::Gitlab::Utils::MimeType.from_string(file_content)) CarrierWaveStringFile.new_file( file_content: file_content, filename: '', diff --git a/app/uploaders/content_type_whitelist.rb b/app/uploaders/content_type_whitelist.rb index 82c6b9b3a6114d08a7fb6befdbc4cc4c51dea5b0..4c9d180dab4966c23262f0f4feeacecd3b81ea80 100644 --- a/app/uploaders/content_type_whitelist.rb +++ b/app/uploaders/content_type_whitelist.rb @@ -43,7 +43,7 @@ def whitelisted_content_type?(content_type) def mime_magic_content_type(path) if path File.open(path) do |file| - Gitlab::Utils::MimeType.from_io(file) || 'invalid/invalid' + ::Gitlab::Utils::MimeType.from_io(file) || 'invalid/invalid' end end rescue Errno::ENOENT diff --git a/config/feature_flags/gitlab_com_derisk/packages_generic_package_content_type.yml b/config/feature_flags/gitlab_com_derisk/packages_generic_package_content_type.yml new file mode 100644 index 0000000000000000000000000000000000000000..8ebb9a583fc39dd067e9757122fe9556535a082c --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/packages_generic_package_content_type.yml @@ -0,0 +1,10 @@ +--- +name: packages_generic_package_content_type +description: Return generic package file with corresponding content type +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/444768 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/196427 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/547424 +milestone: '18.2' +group: group::package registry +type: gitlab_com_derisk +default_enabled: false diff --git a/lib/api/generic_packages.rb b/lib/api/generic_packages.rb index f77750cdfb1a71868167584b4faec32edeae873e..309394eb1fe3752463f7f11bbe2607dddadf338a 100644 --- a/lib/api/generic_packages.rb +++ b/lib/api/generic_packages.rb @@ -147,6 +147,7 @@ class GenericPackages < ::API::Base package = ::Packages::Generic::PackageFinder.new(project).execute!(params[:package_name], params[:package_version]) package_file = ::Packages::PackageFileFinder.new(package, encoded_file_name).execute! + determine_content_type(package_file) track_package_event('pull_package', :generic, project: project, namespace: project.namespace) present_package_file!(package_file, content_disposition: :attachment) @@ -177,6 +178,12 @@ def encoded_file_name file_name = [declared_params[:path], declared_params[:file_name]].compact.join('/') declared_params[:path].present? ? URI.encode_uri_component(file_name) : file_name end + + def determine_content_type(package_file) + return unless Feature.enabled?(:packages_generic_package_content_type, package_file.project) + + ::Gitlab::Utils::MimeType.from_filename(package_file.file_name, log_enabled: true) + end end end end diff --git a/lib/gitlab/sanitizers/exif.rb b/lib/gitlab/sanitizers/exif.rb index dee9e5f3d0e42f0997d2d628f67563d910171723..08fc9f90ac0ed9dd410ea5287051fdc8bcd048cc 100644 --- a/lib/gitlab/sanitizers/exif.rb +++ b/lib/gitlab/sanitizers/exif.rb @@ -169,7 +169,7 @@ def fetch_upload_to_file(uploader, dir) end def check_for_allowed_types(contents, raise_error: true) - mime_type = Gitlab::Utils::MimeType.from_string(contents) + mime_type = ::Gitlab::Utils::MimeType.from_string(contents) allowed = ALLOWED_MIME_TYPES.include?(mime_type) if !allowed && raise_error diff --git a/lib/gitlab/utils/mime_type.rb b/lib/gitlab/utils/mime_type.rb index d82092a2d2c12d21fcf7697ce5e12a509d9b9328..9a2ec61dfa64e485ca7c2b651b7277fd97b38641 100644 --- a/lib/gitlab/utils/mime_type.rb +++ b/lib/gitlab/utils/mime_type.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true require 'magic' +require 'mime/types' # This wraps calls to a gem which support mime type detection. # We use the `ruby-magic` gem instead of `mimemagic` due to licensing issues @@ -19,6 +20,22 @@ def from_string(string) string.type end + + def from_filename(filename, default: 'application/octet-stream', log_enabled: false) + return default unless filename.is_a?(String) + + types = ::MIME::Types.type_for(filename) + content_type = types.any? ? types.first.content_type : default + + if log_enabled + # Temporarily log the detected content types as JSON to build the allowed content type list + # When we remove this line, `log_enabled` needs to be removed as well + # https://gitlab.com/gitlab-org/gitlab/-/issues/444768 + ::Gitlab::AppJsonLogger.info(determined_content_type: content_type) + end + + content_type + end end end end diff --git a/spec/lib/gitlab/utils/mime_type_spec.rb b/spec/lib/gitlab/utils/mime_type_spec.rb index e8b369362243ccca0a0802ef3001dc8bb27489e3..3b7d600acb0953b3f092b2ffc6547783ce4211cd 100644 --- a/spec/lib/gitlab/utils/mime_type_spec.rb +++ b/spec/lib/gitlab/utils/mime_type_spec.rb @@ -1,57 +1,119 @@ # frozen_string_literal: true -require "fast_spec_helper" -require "rspec/parameterized" +require 'spec_helper' +require 'rspec/parameterized' -RSpec.describe Gitlab::Utils::MimeType do - describe ".from_io" do +RSpec.describe Gitlab::Utils::MimeType, feature_category: :shared do + describe '.from_io' do subject { described_class.from_io(io) } - context "input isn't an IO" do - let(:io) { "test" } + context 'input is not an IO' do + let(:io) { 'test' } - it "returns nil" do + it 'returns nil' do expect(subject).to be_nil end end - context "input is a file" do + context 'input is a file' do using RSpec::Parameterized::TableSyntax where(:fixture, :mime_type) do - "banana_sample.gif" | "image/gif" - "rails_sample.jpg" | "image/jpeg" - "rails_sample.png" | "image/png" - "rails_sample.bmp" | "image/bmp" - "rails_sample.tif" | "image/tiff" - "sample.ico" | "image/vnd.microsoft.icon" - "sample_doc.md" | "text/plain" - "csv_empty.csv" | "application/x-empty" + 'banana_sample.gif' | 'image/gif' + 'rails_sample.jpg' | 'image/jpeg' + 'rails_sample.png' | 'image/png' + 'rails_sample.bmp' | 'image/bmp' + 'rails_sample.tif' | 'image/tiff' + 'sample.ico' | 'image/vnd.microsoft.icon' + 'sample_doc.md' | 'text/plain' + 'csv_empty.csv' | 'application/x-empty' end with_them do - let(:io) { File.open(File.join(__dir__, "../../../fixtures", fixture)) } + let(:io) { File.open(File.join(__dir__, '../../../fixtures', fixture)) } it { is_expected.to eq(mime_type) } end end end - describe ".from_string" do + describe '.from_string' do subject { described_class.from_string(str) } - context "input isn't a string" do + context 'input is not a string' do let(:str) { nil } - it "returns nil" do - expect(subject).to be_nil - end + it { is_expected.to be_nil } end - context "input is a string" do - let(:str) { "plain text" } + context 'input is a string' do + let(:str) { 'plain text' } it { is_expected.to eq('text/plain') } end end + + describe '.from_filename' do + using RSpec::Parameterized::TableSyntax + + shared_examples 'log determination' do + it 'logs the determination' do + expect(Gitlab::AppJsonLogger).to receive(:info).with( + determined_content_type: mime_type + ) + + subject + end + end + + shared_examples 'not log determination' do + it 'does not log the determination' do + expect(Gitlab::AppJsonLogger).not_to receive(:info) + + subject + end + end + + context 'when default value is not given' do + where(:filename, :mime_type, :log_enabled, :log_example) do + 1 | 'application/octet-stream' | false | 'not log determination' + 'test.tf' | 'application/octet-stream' | false | 'not log determination' + 'test.css' | 'text/css' | false | 'not log determination' + 'test.js' | 'text/javascript' | false | 'not log determination' + 1 | 'application/octet-stream' | true | 'not log determination' + 'test.tf' | 'application/octet-stream' | true | 'log determination' + 'test.css' | 'text/css' | true | 'log determination' + 'test.js' | 'text/javascript' | true | 'log determination' + end + + with_them do + subject { described_class.from_filename(filename, log_enabled: log_enabled) } + + it { is_expected.to eq(mime_type) } + + it_behaves_like params[:log_example] + end + end + + context 'when default value is given' do + where(:filename, :default, :mime_type, :log_enabled, :log_example) do + 1 | nil | nil | false | 'not log determination' + 'test.tf' | nil | nil | false | 'not log determination' + 'test.tf' | 'text/plain' | 'text/plain' | false | 'not log determination' + 'test.css' | 'text/plain' | 'text/css' | false | 'not log determination' + 1 | nil | nil | true | 'not log determination' + 'test.tf' | nil | nil | true | 'log determination' + 'test.tf' | 'text/plain' | 'text/plain' | true | 'log determination' + 'test.css' | 'text/plain' | 'text/css' | true | 'log determination' + end + + with_them do + subject { described_class.from_filename(filename, default: default, log_enabled: log_enabled) } + + it { is_expected.to eq(mime_type) } + + it_behaves_like params[:log_example] + end + end + end end diff --git a/spec/requests/api/generic_packages_spec.rb b/spec/requests/api/generic_packages_spec.rb index d08fdbd4c2825ee3c5ca62ce22ee843733a43034..ea361bced0784493b21f4f16e6f07e669b393664 100644 --- a/spec/requests/api/generic_packages_spec.rb +++ b/spec/requests/api/generic_packages_spec.rb @@ -1121,6 +1121,28 @@ def upload_file( end end + shared_examples 'log content type determination' do + it 'logs content type determination' do + expect(Gitlab::AppJsonLogger).to receive(:info).with( + determined_content_type: 'application/gzip' + ) + + subject + end + + context 'when feature flag is disabled' do + before do + stub_feature_flags(packages_generic_package_content_type: false) + end + + it 'does not log content type determination' do + expect(Gitlab::AppJsonLogger).not_to receive(:info) + + subject + end + end + end + context 'when object storage is enabled' do let(:package_file) { create(:package_file, :generic, :object_storage, package: package) } @@ -1139,6 +1161,8 @@ def upload_file( stub_package_file_object_storage end + it_behaves_like 'log content type determination' + it 'includes response-content-disposition and filename in the redirect file URL' do download @@ -1167,6 +1191,8 @@ def upload_file( stub_package_file_object_storage(proxy_download: true) end + it_behaves_like 'log content type determination' + it 'sends a file with response-content-disposition and filename' do expect(::Gitlab::Workhorse).to receive(:send_url) .with(instance_of(String), expected_headers) diff --git a/spec/requests/projects/packages/package_files_controller_spec.rb b/spec/requests/projects/packages/package_files_controller_spec.rb index a68866de8931c5f48af6dbef888d85a2fd9d3b6f..2f832c9e987a4fbbfd03fc622cbf74ba0e7c80f4 100644 --- a/spec/requests/projects/packages/package_files_controller_spec.rb +++ b/spec/requests/projects/packages/package_files_controller_spec.rb @@ -25,6 +25,38 @@ .to eq(%(attachment; filename="#{filename}"; filename*=UTF-8''#{filename})) end + it 'logs content type determination' do + expect(Gitlab::AppJsonLogger).to receive(:info).with( + determined_content_type: 'application/zip' + ) + + subject + end + + shared_examples 'not log' do + it 'does not enable log' do + expect(Gitlab::AppJsonLogger).not_to receive(:info) + + subject + end + end + + context 'when package type is not generic' do + before do + package.update!(package_type: 'npm') + end + + it_behaves_like 'not log' + end + + context 'when feature flag is disabled' do + before do + stub_feature_flags(packages_generic_package_content_type: false) + end + + it_behaves_like 'not log' + end + context 'with remote object storage' do let(:package_file) { create(:package_file, :object_storage, package: package, file_name: filename) }