diff --git a/app/models/blob.rb b/app/models/blob.rb index 6a502997de4157370213ffb850fcca653bef18bb..d6f080b78a815581c6dfbafa6557d5aaa6b69e8a 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -119,7 +119,7 @@ def inspect # If the blob is a text based blob the content is converted to UTF-8 and any # invalid byte sequences are replaced. def data - if binary_in_repo? + if defined?(@binary) ? @binary : binary_in_repo? super else @data ||= super.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) @@ -162,11 +162,17 @@ def raw_size end end + def mark_as_non_binary! + @binary = false + end + # Returns whether the file that this blob represents is binary. If this blob is # an LFS pointer, we assume the file stored in LFS is binary, unless a # text-based rich blob viewer matched on the file's extension. Otherwise, this # depends on the type of the blob itself. def binary? + return @binary if defined?(@binary) + if stored_externally? if rich_viewer rich_viewer.binary? diff --git a/app/models/concerns/blob_like.rb b/app/models/concerns/blob_like.rb index dc80f8d62f4d2786ec6da06ba2b36d43ad7dfd07..ec15ebcf31c76f435390a19c84718970e9a6ca2c 100644 --- a/app/models/concerns/blob_like.rb +++ b/app/models/concerns/blob_like.rb @@ -32,6 +32,10 @@ def binary_in_repo? false end + def binary? + binary_in_repo? + end + def load_all_data!(repository) # No-op end diff --git a/app/models/diff_note.rb b/app/models/diff_note.rb index cdba51e3a0e7c00b6229191e69d0edf5795de3ff..b89eb1a0e4d5dd4b781d3dedc57e8f94db195dae 100644 --- a/app/models/diff_note.rb +++ b/app/models/diff_note.rb @@ -78,7 +78,7 @@ def create_diff_file end creation_params = diff_file.diff.to_hash - .except(:too_large, :generated, :encoded_file_path) + .except(:too_large, :generated, :encoded_file_path, :binary) .merge(diff: diff_file.diff_hunk(diff_line)) create_note_diff_file(creation_params) diff --git a/lib/gitlab/blob_helper.rb b/lib/gitlab/blob_helper.rb index 25d79c1085a7033905d0f0882f5f308da6fab2f8..950129689f474aa8169c4778ae207416c2663c0a 100644 --- a/lib/gitlab/blob_helper.rb +++ b/lib/gitlab/blob_helper.rb @@ -14,7 +14,7 @@ def known_extension? end def viewable? - !large? && text_in_repo? + !large? || !binary? end MEGABYTE = 1024 * 1024 @@ -137,8 +137,11 @@ def encoded_newlines_re end def ruby_encoding - if hash = find_encoding - hash[:ruby_encoding] + strong_memoize(:ruby_encoding) do + next unless data + + result = Gitlab::EncodingHelper.detect_encoding(data, limit: 128) + result[:ruby_encoding] if result end end diff --git a/lib/gitlab/diff/file.rb b/lib/gitlab/diff/file.rb index e5a9f002fa9c695304c972ea02671886d417d0e9..ae079f59690aedfb3a2550a35ab98fc2197f2732 100644 --- a/lib/gitlab/diff/file.rb +++ b/lib/gitlab/diff/file.rb @@ -138,13 +138,13 @@ def old_content_sha def new_blob strong_memoize(:new_blob) do - new_blob_lazy&.itself + decorate_blob new_blob_lazy&.itself end end def old_blob strong_memoize(:old_blob) do - old_blob_lazy&.itself + decorate_blob old_blob_lazy&.itself end end @@ -259,7 +259,11 @@ def file_identifier_hash end def diffable? - diffable_by_attribute? && !text_with_binary_notice? + # NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, + # but they are recognized as text files during encoding detection. + # These files have 'Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. + # We need to handle this special case and avoid displaying incorrect diff. + diffable_by_attribute? && (text? ? !has_binary_notice? : true) end def binary_in_repo? @@ -308,7 +312,7 @@ def empty? def binary? strong_memoize(:is_binary) do - try_blobs(:binary?) + diff.binary? end end @@ -419,13 +423,16 @@ def image_diff? private - def diffable_by_attribute? - repository.attributes(file_path).fetch('diff', true) + def decorate_blob(the_blob) + # 'binary' check is coming from a Gitaly diff response which checks both blobs + # we can't tell if both files are binary when diff is 'binary: true' + # but we can guarantee that both files are text when diff is 'binary: false' + the_blob&.mark_as_non_binary! unless binary? + the_blob end - # NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff. - def text_with_binary_notice? - text? && has_binary_notice? + def diffable_by_attribute? + repository.attributes(file_path).fetch('diff', true) end def fetch_blob(sha, path) @@ -439,9 +446,10 @@ def fetch_blob(sha, path) end def total_blob_lines(blob) + blob_lines = blob.lines @total_lines ||= begin - line_count = blob.lines.size - line_count -= 1 if line_count > 0 && blob.lines.last.blank? + line_count = blob_lines.size + line_count -= 1 if line_count > 0 && blob_lines.last.blank? line_count end end diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index e6cbf3b1dff0e61042883945361f6aa8ba7619d0..9eb2463a952dfd697611098728209dbac61ac8ab 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -43,7 +43,7 @@ def encode!(message) def detect_encoding(data, limit: CharlockHolmes::EncodingDetector::DEFAULT_BINARY_SCAN_LEN) return if data.nil? - CharlockHolmes::EncodingDetector.new(limit).detect(data) + CharlockHolmes::EncodingDetector.new(limit).detect(data[0..limit]) end def detect_binary?(data, detect = nil) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 39d7a60ff79772adc2a6b54a0250a93adcedb591..f779b4407af1e457afeffbcc8873df823b5276a4 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -142,6 +142,7 @@ def initialize(options) def binary_in_repo? @binary.nil? ? super : @binary == true end + alias_method :binary?, :binary_in_repo? def data encode! @data diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index 2323b489e4d3a2678d2f66cda61cadf82dfb4668..69ea264d146f42f1aa87d9078162131ac94f277a 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -10,7 +10,7 @@ class Diff attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff # Stats properties - attr_accessor :new_file, :renamed_file, :deleted_file, :generated, :encoded_file_path + attr_accessor :new_file, :renamed_file, :deleted_file, :generated, :encoded_file_path, :binary alias_method :new_file?, :new_file alias_method :deleted_file?, :deleted_file @@ -44,6 +44,7 @@ class Diff too_large generated encoded_file_path + binary ].freeze BINARY_NOTICE_PATTERN = %r{Binary files (.*) and (.*) differ} @@ -202,7 +203,7 @@ def submodule? def unidiff return diff if diff.blank? - return json_safe_diff if detect_binary?(@diff) || has_binary_notice? + return json_safe_diff if binary? || has_binary_notice? old_path_header = new_file? ? '/dev/null' : "a/#{old_path}" new_path_header = deleted_file? ? '/dev/null' : "b/#{new_path}" @@ -260,16 +261,23 @@ def overflow? end def json_safe_diff - return @diff unless detect_binary?(@diff) + return @diff unless binary? + + old = new_file? ? '/dev/null' : "a/#{@old_path}" + new = deleted_file? ? '/dev/null' : "b/#{@new_path}" # the diff is binary, let's make a message for it - Diff.binary_message(@old_path, @new_path) + Diff.binary_message(old, new) end def has_binary_notice? self.class.has_binary_notice?(@diff) end + def binary? + binary.nil? ? detect_binary?(@diff) : binary + end + private def collapse_generated_file? @@ -283,7 +291,7 @@ def encode_diff_to_utf8(replace_invalid_utf8_chars) end def diff_should_be_converted? - !detect_binary?(@diff) || !@diff&.valid_encoding? + !binary? || !@diff&.valid_encoding? end def init_from_hash(hash) @@ -313,6 +321,7 @@ def init_from_gitaly(gitaly_diff) # Diffs exceeding limits returned from gitaly when "collect_all_paths" are enabled # are already pruned, but should be "collapsed" as they have no content @collapsed = true if @overflow + @binary = gitaly_diff.binary if gitaly_diff.respond_to?(:binary) end def prune_diff_if_eligible diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb index b4e3490796096e02cefbadcb256c2b9e06cbff76..1f06d8faabea0062e3d401f49ccb24a128472554 100644 --- a/lib/gitlab/git/diff_collection.rb +++ b/lib/gitlab/git/diff_collection.rb @@ -160,13 +160,17 @@ def over_safe_limits?(files) @collapsed_safe_files || @collapsed_safe_lines || @collapsed_safe_bytes end - def expand_diff?(raw) + def expand_diff?(raw, binary = nil) # For binary files only check @enforce_limits and @expanded, # for non-binary files also auto-expand when it's a single file diff allow_expansion = !@enforce_limits || @expanded - diff_content = raw.is_a?(Hash) ? raw[:diff] : raw.patch - return allow_expansion if detect_binary?(diff_content) || Gitlab::Git::Diff.has_binary_notice?(diff_content) + if binary.nil? + diff_content = raw.is_a?(Hash) ? raw[:diff] : raw.patch + return allow_expansion if detect_binary?(diff_content) || Gitlab::Git::Diff.has_binary_notice?(diff_content) + elsif binary + return allow_expansion + end @iterator.size == 1 || allow_expansion end @@ -175,7 +179,7 @@ def each_gitaly_patch @iterator.each_with_index do |raw, iterator_index| @empty = false - options = { expanded: expand_diff?(raw) } + options = { expanded: expand_diff?(raw, raw.binary) } options[:generated] = @generated_files.include?(raw.from_path) if @generated_files diff = Gitlab::Git::Diff.new(raw, **options) @@ -213,7 +217,7 @@ def each_serialized_patch break end - expand_diff = expand_diff?(raw) + expand_diff = expand_diff?(raw, raw[:binary]) diff = Gitlab::Git::Diff.new(raw, expanded: expand_diff) if !expand_diff && over_safe_limits?(i) && diff.line_count > 0 diff --git a/lib/gitlab/gitaly_client/diff.rb b/lib/gitlab/gitaly_client/diff.rb index c30b8ab1bc8fd0300e85ad0e2811589c69b047a8..f6840f87e0d9cf7cc0598fd8c3193198912f8712 100644 --- a/lib/gitlab/gitaly_client/diff.rb +++ b/lib/gitlab/gitaly_client/diff.rb @@ -3,7 +3,9 @@ module Gitlab module GitalyClient class Diff - ATTRS = %i[from_path to_path old_mode new_mode from_id to_id patch overflow_marker collapsed too_large].freeze + ATTRS = %i[ + from_path to_path old_mode new_mode from_id to_id patch overflow_marker collapsed too_large binary + ].freeze include AttributesBag end diff --git a/spec/lib/gitlab/blob_helper_spec.rb b/spec/lib/gitlab/blob_helper_spec.rb index 6d1e6ea59a3caa37aa3ea8765f9dc9a2850316fb..0e0ff6e7a9e8a1837e05b01d3e4f842170e37655 100644 --- a/spec/lib/gitlab/blob_helper_spec.rb +++ b/spec/lib/gitlab/blob_helper_spec.rb @@ -5,68 +5,88 @@ RSpec.describe Gitlab::BlobHelper do include FakeBlobHelpers - let(:project) { create(:project) } - let(:blob) { fake_blob(path: 'file.txt') } + let_it_be(:project) { create(:project) } + let(:utf8_text) { "some UTF-8 text with a snowman ☃" } + let(:text_blob) { fake_blob(path: 'file.txt', data: utf8_text) } + let(:binary_blob) { fake_blob(path: 'file.bin', binary: true) } + let(:large_text_blob) { fake_blob(path: 'file.txt', size: 2.megabytes, data: 'a' * 2.megabytes) } + let(:large_binary_blob) { fake_blob(path: 'test.pdf', size: 2.megabytes, binary: true) } + let(:nil_data_blob) { fake_blob(path: 'file.txt', data: nil) } let(:bmp_blob) { fake_blob(path: 'file.bmp') } let(:webp_blob) { fake_blob(path: 'file.webp') } - let(:large_blob) { fake_blob(path: 'test.pdf', size: 2.megabytes, binary: true) } describe '#extname' do it 'returns the extension' do - expect(blob.extname).to eq('.txt') + expect(text_blob.extname).to eq('.txt') end end describe '#known_extension?' do it 'returns true' do - expect(blob.known_extension?).to be_truthy + expect(text_blob.known_extension?).to be_truthy end end - describe '#viewable' do - it 'returns true' do - expect(blob.viewable?).to be_truthy + describe '#viewable?' do + context 'with a small text blob' do + it 'returns true' do + expect(text_blob.viewable?).to be_truthy + end end - it 'returns false' do - expect(large_blob.viewable?).to be_falsey + context 'with a large binary blob' do + it 'returns false' do + expect(large_binary_blob.viewable?).to be_falsey + end + end + + context 'with a large text blob' do + it 'returns true' do + expect(large_text_blob.viewable?).to be_truthy + end + end + + context 'with a small binary blob' do + it 'returns true' do + expect(binary_blob.viewable?).to be_truthy + end end end describe '#large?' do it 'returns false' do - expect(blob.large?).to be_falsey + expect(text_blob.large?).to be_falsey end it 'returns true' do - expect(large_blob.large?).to be_truthy + expect(large_binary_blob.large?).to be_truthy end end describe '#binary?' do it 'returns true' do - expect(large_blob.binary?).to be_truthy + expect(large_binary_blob.binary?).to be_truthy end it 'returns false' do - expect(blob.binary?).to be_falsey + expect(text_blob.binary?).to be_falsey end end - describe '#text?' do + describe '#text_in_repo?' do it 'returns true' do - expect(blob.text_in_repo?).to be_truthy + expect(text_blob.text_in_repo?).to be_truthy end it 'returns false' do - expect(large_blob.text_in_repo?).to be_falsey + expect(large_binary_blob.text_in_repo?).to be_falsey end end describe '#image?' do context 'with a .txt file' do it 'returns false' do - expect(blob.image?).to be_falsey + expect(text_blob.image?).to be_falsey end end @@ -85,57 +105,70 @@ describe '#mime_type' do it 'returns text/plain' do - expect(blob.mime_type).to eq('text/plain') + expect(text_blob.mime_type).to eq('text/plain') end it 'returns application/pdf' do - expect(large_blob.mime_type).to eq('application/pdf') + expect(large_binary_blob.mime_type).to eq('application/pdf') end end describe '#binary_mime_type?' do it 'returns false' do - expect(blob.binary_mime_type?).to be_falsey + expect(text_blob.binary_mime_type?).to be_falsey end end describe '#lines' do it 'returns the payload in an Array' do - expect(blob.lines).to eq(['foo']) + expect(text_blob.lines).to eq([utf8_text]) end end describe '#content_type' do it 'returns text/plain' do - expect(blob.content_type).to eq('text/plain; charset=utf-8') + expect(text_blob.content_type).to eq('text/plain; charset=utf-8') end it 'returns text/plain' do - expect(large_blob.content_type).to eq('application/pdf') + expect(large_binary_blob.content_type).to eq('application/pdf') end end describe '#encoded_newlines_re' do it 'returns a regular expression' do - expect(blob.encoded_newlines_re).to eq(/\r\n|\r|\n/) + expect(text_blob.encoded_newlines_re).to eq(/\r\n|\r|\n/) end end describe '#ruby_encoding' do it 'returns UTF-8' do - expect(blob.ruby_encoding).to eq('UTF-8') + expect(text_blob.ruby_encoding).to eq('UTF-8') + end + + context 'when blob data is nil' do + it 'returns nil' do + expect(nil_data_blob.ruby_encoding).to be_nil + end + end + + it 'memoizes the result' do + expect(Gitlab::EncodingHelper).to receive(:detect_encoding).once.and_call_original + + text_blob.ruby_encoding + text_blob.ruby_encoding end end describe '#encoding' do it 'returns UTF-8' do - expect(blob.ruby_encoding).to eq('UTF-8') + expect(text_blob.ruby_encoding).to eq('UTF-8') end end describe '#empty?' do it 'returns false' do - expect(blob.empty?).to be_falsey + expect(text_blob.empty?).to be_falsey end end end diff --git a/spec/lib/gitlab/diff/file_spec.rb b/spec/lib/gitlab/diff/file_spec.rb index 4c44779a4bd3269d25bd7a00b283a2a6141ee57d..aaa72ee94d46218ccb85de75871f632dc66b716d 100644 --- a/spec/lib/gitlab/diff/file_spec.rb +++ b/spec/lib/gitlab/diff/file_spec.rb @@ -324,10 +324,19 @@ def delete_file(file_name) expect(old_data).to include('raise "System commands must be given as an array of strings"') expect(data).to include('raise RuntimeError, "System commands must be given as an array of strings"') end + + it 'marks text as non binary' do + expect(diff_file.old_blob).not_to receive(:binary_in_repo?) + expect(diff_file.new_blob).not_to receive(:binary_in_repo?) + expect(diff_file.old_blob.binary?).to be(false) + expect(diff_file.new_blob.binary?).to be(false) + end end describe '#diffable?' do context 'when attributes exist' do + # this spec is not properly isolated, we need to create a fresh project to avoid stale state + let_it_be(:project) { create(:project, :repository) } let(:commit) { project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863') } let(:diffs) { commit.diffs } @@ -356,6 +365,7 @@ def delete_file(file_name) let(:diff_file) { commit.diffs.diff_file_with_new_path('VERSION') } it "returns false" do + diff_file.diff.binary = false expect(diff_file.diffable?).to be_falsey end end @@ -857,9 +867,15 @@ def popen(cmd, path=nil) end describe '#binary?' do - it 'returns false' do + it 'returns false for text' do + expect(diff_file.diff).to receive(:binary?) expect(diff_file).not_to be_binary end + + it 'returns true for binary diff' do + allow(diff_file.diff).to receive(:binary?).and_return(true) + expect(diff_file).to be_binary + end end describe '#size' do @@ -900,7 +916,7 @@ def popen(cmd, path=nil) let(:diff_file) { commit.diffs.diff_file_with_new_path('VERSION') } it 'returns a Not Diffable viewer' do - expect(diff_file.simple_viewer).to be_a(DiffViewer::NotDiffable) + expect(diff_file.simple_viewer).to be_a(DiffViewer::NoPreview) end it { expect(diff_file.highlighted_diff_lines).to eq([]) } diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index db7961fc0c9adf646c0f6a6bea6bc71a8c011919..04fff601e602e431e2925849be8477714cfeef79 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -249,28 +249,27 @@ let(:data) { binary_string } let(:kwargs) { {} } - context 'detects encoding' do - it { is_expected.to be_a(Hash) } + it { is_expected.to be_a(Hash) } - it 'correctly detects the binary' do - expect(subject[:type]).to eq(:binary) - end + it 'correctly detects the binary' do + expect(subject[:type]).to eq(:binary) + end - context 'data is nil' do - let(:data) { nil } + it 'calls the detector with a substring up to the specified limit' do + limit = 10 + data = 'a' * 20 + detector_double = instance_double(CharlockHolmes::EncodingDetector) - it { is_expected.to be_nil } - end + allow(CharlockHolmes::EncodingDetector).to receive(:new).with(limit).and_return(detector_double) + expect(detector_double).to receive(:detect).with(data[0..limit]) - context 'limit is provided' do - let(:kwargs) do - { limit: 10 } - end + ext_class.detect_encoding(data, limit: limit) + end - it 'correctly detects the binary' do - expect(subject[:type]).to eq(:binary) - end - end + context 'when data is nil' do + let(:data) { nil } + + it { is_expected.to be_nil } end end diff --git a/spec/lib/gitlab/git/diff_collection_spec.rb b/spec/lib/gitlab/git/diff_collection_spec.rb index e9deee12b62849eb99ae128f47b0b4ae4d186f7d..48308c8193f8f70c208d4fded6bc9f78f56763da 100644 --- a/spec/lib/gitlab/git/diff_collection_spec.rb +++ b/spec/lib/gitlab/git/diff_collection_spec.rb @@ -742,6 +742,23 @@ def each context 'with binary files' do let(:iterator) { [{ diff: "Binary files /dev/null and b/test.bin differ\n" }] } + before do + allow(described_class).to receive(:default_limits) + .and_return({ max_files: 0, max_lines: max_lines, safe_max_bytes: 1 }) + end + + it 'prunes binary diffs even in single file case' do + diff = nil + subject.each do |d| + diff = d + end + expect(diff.diff).to eq('') + end + end + + context 'with binary files when binary flag is provided' do + let(:iterator) { [{ diff: 'arbitrary content', binary: true }] } + before do allow(described_class) .to receive(:default_limits) diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index def52bd16c1cd531eef4195bb4f75c8cb46eca9b..26f337cc1d8ff063412aebf561ef04af82f2fa5f 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -51,7 +51,7 @@ let(:diff) { described_class.new(@raw_diff_hash) } it 'initializes the diff' do - expect(diff.to_hash).to eq(@raw_diff_hash.merge(generated: nil)) + expect(diff.to_hash).to eq(@raw_diff_hash.merge(generated: nil, binary: nil)) end it 'does not prune the diff' do @@ -88,7 +88,7 @@ let(:raw_patch) { @raw_diff_hash[:diff] } it 'initializes the diff' do - expect(diff.to_hash).to eq(@raw_diff_hash.merge(generated: nil)) + expect(diff.to_hash).to eq(@raw_diff_hash.merge(generated: nil, binary: nil)) end it 'does not prune the diff' do @@ -96,6 +96,19 @@ end end + context 'when gitaly diff is binary' do + before do + allow(gitaly_diff).to receive(:binary).and_return(true) + end + + let(:raw_patch) { 'binary' } + + it 'marks the diff as binary' do + expect(diff).to be_binary + expect(diff.to_hash[:binary]).to be(true) + end + end + context 'using a diff that is too large' do let(:raw_patch) { 'a' * 204800 } @@ -241,7 +254,7 @@ let(:diff) { described_class.new(commit_delta) } it 'initializes the diff' do - expect(diff.to_hash).to eq(@raw_diff_hash.merge(diff: '', generated: nil)) + expect(diff.to_hash).to eq(@raw_diff_hash.merge(diff: '', generated: nil, binary: nil)) end it 'is not too large' do @@ -254,6 +267,7 @@ it 'is not a binary' do expect(diff).not_to have_binary_notice + expect(diff).not_to be_binary end end @@ -278,11 +292,9 @@ end context 'when the diff is binary' do - let(:project) { create(:project, :repository) } - it 'will not try to replace characters' do - expect(Gitlab::EncodingHelper).not_to receive(:encode_utf8_with_replacement_character?) - expect(binary_diff(project).diff).not_to be_empty + expect(Gitlab::EncodingHelper).not_to receive(:encode_utf8_with_replacement_character) + expect(binary_diff.diff).not_to be_empty end end end @@ -292,7 +304,7 @@ let(:not_replaced_diff_two) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_two, replace_invalid_utf8_chars: false })) } it 'will not try to convert invalid characters' do - expect(Gitlab::EncodingHelper).not_to receive(:encode_utf8_with_replacement_character?) + expect(Gitlab::EncodingHelper).not_to receive(:encode_utf8_with_replacement_character) end end end @@ -386,17 +398,79 @@ end end + describe '#binary?' do + subject(:diff) { described_class.new(@raw_diff_hash) } + + context 'when binary attribute is set' do + it 'returns true when binary is true' do + diff.binary = true + expect(diff).not_to receive(:detect_binary?) + expect(diff.binary?).to be(true) + end + + it 'returns false when binary is false' do + diff.binary = false + expect(diff).not_to receive(:detect_binary?) + expect(diff.binary?).to be(false) + end + end + + context 'when binary attribute is nil' do + before do + diff.binary = nil + end + + it 'falls back to detect_binary? and returns true' do + allow(diff).to receive(:detect_binary?).with(diff.diff).and_return(true) + expect(diff.binary?).to be(true) + end + + it 'falls back to detect_binary? and returns false' do + allow(diff).to receive(:detect_binary?).with(diff.diff).and_return(false) + expect(diff.binary?).to be(false) + end + end + end + describe '#json_safe_diff' do let(:project) { create(:project, :repository) } - it 'fake binary message when it detects binary' do - diff_message = "Binary files files/images/icn-time-tracking.pdf and files/images/icn-time-tracking.pdf differ\n" + it 'creates a binary message for a modified file' do + diff_message = "Binary files a/files/images/icn-time-tracking.pdf and b/files/images/icn-time-tracking.pdf differ\n" - diff = binary_diff(project) + diff = binary_diff expect(diff.diff).not_to be_empty expect(diff.json_safe_diff).to eq(diff_message) end + it 'creates a binary message for a new file' do + diff_message = "Binary files /dev/null and b/files/images/icn-time-tracking.pdf differ\n" + diff = described_class.new( + @raw_diff_hash.merge( + old_path: 'files/images/icn-time-tracking.pdf', + new_path: 'files/images/icn-time-tracking.pdf', + binary: true, + new_file: true + ) + ) + + expect(diff.json_safe_diff).to eq(diff_message) + end + + it 'creates a binary message for a deleted file' do + diff_message = "Binary files a/files/images/icn-time-tracking.pdf and /dev/null differ\n" + diff = described_class.new( + @raw_diff_hash.merge( + old_path: 'files/images/icn-time-tracking.pdf', + new_path: 'files/images/icn-time-tracking.pdf', + binary: true, + deleted_file: true + ) + ) + + expect(diff.json_safe_diff).to eq(diff_message) + end + it 'leave non-binary diffs as-is' do diff = described_class.new(gitaly_diff) @@ -624,9 +698,13 @@ end end - def binary_diff(project) - # rugged will not detect this as binary, but we can fake it - described_class.between(project.repository, 'add-pdf-text-binary', 'add-pdf-text-binary^').first + def binary_diff + described_class.new(@raw_diff_hash.merge({ + diff: 'binary-content', + old_path: 'files/images/icn-time-tracking.pdf', + new_path: 'files/images/icn-time-tracking.pdf', + binary: true + })) end def create_commit(project, user, params) diff --git a/spec/lib/gitlab/gitaly_client/diff_spec.rb b/spec/lib/gitlab/gitaly_client/diff_spec.rb index 2c1f684c0c5883c45d1251f9ae491c6c0856a6ba..2965c7fa11fbf3cad870e06f158ac66785f83c19 100644 --- a/spec/lib/gitlab/gitaly_client/diff_spec.rb +++ b/spec/lib/gitlab/gitaly_client/diff_spec.rb @@ -13,7 +13,8 @@ to_id: '8e5177d718c561d36efde08bad36b43687ee6bf0', patch: 'a' * 100, collapsed: false, - too_large: false + too_large: false, + binary: false } end @@ -28,6 +29,7 @@ it { is_expected.to respond_to(:patch) } it { is_expected.to respond_to(:collapsed) } it { is_expected.to respond_to(:too_large) } + it { is_expected.to respond_to(:binary) } describe '#==' do it { expect(subject).to eq(described_class.new(diff_fields)) } diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index f5c2ac09da5361499b9be64965a412e70f8ae005..7701b63f5964ff5ef1c11375e0c8a95ebac4a860 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -156,6 +156,16 @@ it_behaves_like '#data checks' end + + context 'when marked as non binary' do + it 'converts the data to UTF-8' do + blob = fake_blob(data: "\n\xFF\xB9\xC3", container: project) + blob.mark_as_non_binary! + + expect(blob).not_to receive(:binary_in_repo?) + expect(blob.data).to eq("\n���") + end + end end describe '#external_storage_error?' do @@ -239,6 +249,29 @@ expect(blob.binary?).to eq(:result) end end + + context 'when marked as non binary' do + let(:blob) { fake_blob(path: 'anything', container: project) } + + before do + blob.mark_as_non_binary! + end + + it 'uses internal value' do + expect(blob).not_to receive(:binary_in_repo?) + expect(blob.binary?).to eq(false) + end + end + end + + describe '#mark_as_non_binary!' do + let(:blob) { fake_blob(path: 'anything', container: project) } + + it 'marks the file as non binary' do + blob.mark_as_non_binary! + expect(blob).not_to receive(:binary_in_repo?) + expect(blob.binary?).to eq(false) + end end describe '#symlink?' do diff --git a/spec/support/helpers/fake_blob_helpers.rb b/spec/support/helpers/fake_blob_helpers.rb index 47fb9a345a39fce2d90f1681c3e3b8276de145e2..1093fa487fda33b5858c24488f104622cae68dcd 100644 --- a/spec/support/helpers/fake_blob_helpers.rb +++ b/spec/support/helpers/fake_blob_helpers.rb @@ -34,6 +34,10 @@ def binary_in_repo? @binary end + def binary? + @binary + end + def external_storage :lfs if @lfs_pointer end