diff --git a/.rubocop_todo/performance/string_identifier_argument.yml b/.rubocop_todo/performance/string_identifier_argument.yml index 047a32c35f726cfa2409fd24206766ee501cf8d6..d067d03315a966bc65495a84bc0a35f374a22e03 100644 --- a/.rubocop_todo/performance/string_identifier_argument.yml +++ b/.rubocop_todo/performance/string_identifier_argument.yml @@ -184,12 +184,12 @@ Performance/StringIdentifierArgument: - 'spec/support/helpers/redis_helpers.rb' - 'spec/support/redis.rb' - 'spec/support/shared_contexts/requests/api/debian_repository_shared_context.rb' - - 'spec/support/shared_examples/banzai/filters/reference_filter_shared_examples.rb' - 'spec/support/shared_examples/ci/jwt_shared_examples.rb' - 'spec/support/shared_examples/controllers/application_settings_shared_examples.rb' - 'spec/support/shared_examples/controllers/githubish_import_controller_shared_examples.rb' - 'spec/support/shared_examples/database_health_status_indicators/prometheus_alert_based_shared_examples.rb' - 'spec/support/shared_examples/graphql/container_expiration_policy_shared_examples.rb' + - 'spec/support/shared_examples/lib/banzai/filters/reference_filter_shared_examples.rb' - 'spec/support/shared_examples/models/active_record_enum_shared_examples.rb' - 'spec/support/shared_examples/models/application_setting_shared_examples.rb' - 'spec/support/shared_examples/models/concerns/cascading_namespace_setting_shared_examples.rb' diff --git a/ee/lib/banzai/filter/references/iteration_reference_filter.rb b/ee/lib/banzai/filter/references/iteration_reference_filter.rb index 91a29bc31bdc48656cf2c28869b56332cb83de96..cb0c4b9c5b5891d0a553ecc0bf848aa74b6949ca 100644 --- a/ee/lib/banzai/filter/references/iteration_reference_filter.rb +++ b/ee/lib/banzai/filter/references/iteration_reference_filter.rb @@ -51,7 +51,10 @@ def parse_symbol(symbol, match_data) # holds the id { iteration_id: symbol.to_i, iteration_name: nil } else - { iteration_id: match_data[:iteration_id]&.to_i, iteration_name: match_data[:iteration_name]&.tr('"', '') } + { + iteration_id: match_data[:iteration_id]&.to_i, + iteration_name: match_data[:iteration_name]&.tr('"', '') + } end end @@ -80,23 +83,10 @@ def references_in(text, pattern = ::Iteration.reference_pattern) # default implementation. return super(text, pattern) if pattern != ::Iteration.reference_pattern - iterations = {} - - unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index| - ident = identifier($~) - iteration = yield match, ident, $~[:project], $~[:namespace], $~ - - if iteration != match - iterations[index] = iteration - "#{::Banzai::Filter::References::AbstractReferenceFilter::REFERENCE_PLACEHOLDER}#{index}" - else - match - end + replace_references_in_text_with_html(text.gsub(pattern)) do |match_data| + ident = identifier(match_data) + yield match_data[0], ident, match_data[:project], match_data[:namespace], match_data end - - return text if iterations.empty? - - escape_with_placeholders(unescaped_html, iterations) end def find_iterations(parent, ids: nil, names: nil) diff --git a/ee/lib/banzai/filter/references/iterations_cadence_reference_filter.rb b/ee/lib/banzai/filter/references/iterations_cadence_reference_filter.rb index 50ccf109e82de62e17bdce86aa7b5d7addb44378..789988e90f07032866032da98e2e749c50b145ab 100644 --- a/ee/lib/banzai/filter/references/iterations_cadence_reference_filter.rb +++ b/ee/lib/banzai/filter/references/iterations_cadence_reference_filter.rb @@ -46,7 +46,10 @@ def parent_records(parent, ids) def parse_symbol(symbol, match_data) return { cadence_id: symbol.to_i, cadence_title: nil } if symbol - { cadence_id: match_data[:cadence_id]&.to_i, cadence_title: match_data[:cadence_title]&.tr('"', '') } + { + cadence_id: match_data[:cadence_id]&.to_i, + cadence_title: match_data[:cadence_title]&.tr('"', '') + } end # This method has the contract that if a string `ref` refers to a @@ -57,22 +60,10 @@ def record_identifier(record) end def references_in(text, pattern = object_class.reference_pattern) - cadences = {} - - unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index| - ident = identifier($~) - cadence = yield match, ident, nil, nil, $~ - - next match if cadence == match - - cadences[index] = cadence - - "#{::Banzai::Filter::References::AbstractReferenceFilter::REFERENCE_PLACEHOLDER}#{index}" + replace_references_in_text_with_html(text.gsub(pattern)) do |match_data| + ident = identifier(match_data) + yield match_data[0], ident, nil, nil, match_data end - - return text if cadences.empty? - - escape_with_placeholders(unescaped_html, cadences) end def url_for_object(cadence, group) diff --git a/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb b/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb index 80ad20809a91b6597414f93ff246b01012f07b6d..a235456503a7f59f9ab70b0d2217e32298b816e4 100644 --- a/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb +++ b/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb @@ -12,13 +12,11 @@ module EpicReferenceFilter extend ActiveSupport::Concern def references_in(text, pattern = object_class.reference_pattern) - ::Gitlab::Utils::Gsub - .gsub_with_limit(text, pattern, limit: ::Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| + replace_references_in_text_with_html(::Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: ::Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| symbol = match_data[object_sym] if object_class.reference_valid?(symbol) yield match_data[0], symbol.to_i, nil, match_data[:group], match_data - else - match_data[0] end end end @@ -32,9 +30,9 @@ def reference_class(object_sym, tooltip: false) super end - def data_attributes_for(text, group, object, link_content: false, link_reference: false) + def data_attributes_for(original, group, object, link_content: false, link_reference: false) { - original: text, + original: original, link: link_content, link_reference: link_reference, group: group.id, diff --git a/ee/lib/ee/banzai/filter/references/label_reference_filter.rb b/ee/lib/ee/banzai/filter/references/label_reference_filter.rb index eee57295f27cb1cc1498cd4a007e9722ea1777f2..6cdab6d09621e090253d4ac4553d97ea0da5ee05 100644 --- a/ee/lib/ee/banzai/filter/references/label_reference_filter.rb +++ b/ee/lib/ee/banzai/filter/references/label_reference_filter.rb @@ -8,7 +8,7 @@ module LabelReferenceFilter extend ::Gitlab::Utils::Override override :data_attributes_for - def data_attributes_for(text, parent, object, link_content: false, link_reference: false) + def data_attributes_for(original, parent, object, link_content: false, link_reference: false) return super unless object.scoped_label? # Enabling HTML tooltips for scoped labels here. diff --git a/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb b/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb index 85611f630856ca93ec39e3e20a6f67b2b58afab4..5b893e7172a59f393ab7e838c19f7265a0e45d6b 100644 --- a/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb +++ b/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb @@ -12,30 +12,22 @@ module VulnerabilityReferenceFilter extend ActiveSupport::Concern def references_in(text, pattern = object_class.reference_pattern) - text.gsub(pattern) do |match| - symbol = $~[object_sym] + replace_references_in_text_with_html(text.gsub(pattern)) do |match_data| + symbol = match_data[object_sym] if object_class.reference_valid?(symbol) - yield match, symbol.to_i, $~[:project], $~[:namespace], $~ - else - match + yield match_data[0], symbol.to_i, match_data[:project], match_data[:namespace], match_data end end end - def unescape_link(href) - return href if object_class.reference_pattern.match?(href) - - super - end - def url_for_object(vulnerability, project) urls = ::Gitlab::Routing.url_helpers urls.project_security_vulnerability_url(project, vulnerability, only_path: context[:only_path]) end - def data_attributes_for(text, project, object, link_content: false, link_reference: false) + def data_attributes_for(original, project, object, link_content: false, link_reference: false) { - original: text, + original: original, link: link_content, link_reference: link_reference, project: project.id, diff --git a/ee/spec/lib/banzai/filter/references/epic_reference_filter_spec.rb b/ee/spec/lib/banzai/filter/references/epic_reference_filter_spec.rb index 81324772abf176154aefe2b3929e8a04c5284b56..549cffe43e9fddfdc1425a57d0ddfe96e70da0ea 100644 --- a/ee/spec/lib/banzai/filter/references/epic_reference_filter_spec.rb +++ b/ee/spec/lib/banzai/filter/references/epic_reference_filter_spec.rb @@ -74,7 +74,7 @@ def doc(reference = nil) link = doc(reference).css('a').first expect(link).to have_attribute('data-original') - expect(link.attr('data-original')).to eq(CGI.escapeHTML(reference)) + expect(link.attr('data-original')).to eq_html(reference) end it 'includes a data-reference-format attribute' do @@ -102,13 +102,13 @@ def doc(reference = nil) it 'ignores invalid epic IIDs' do text = "Check &#{non_existing_record_iid}" - expect(doc(text).to_s).to include(ERB::Util.html_escape_once(text)) + expect(doc(text).to_s).to include_html(text) end it 'ignores out of range epic IDs' do text = "Check &1161452270761535925900804973910297" - expect(doc(text).to_s).to include(ERB::Util.html_escape_once(text)) + expect(doc(text).to_s).to include_html(text) end it 'does not process links containing epic numbers followed by text' do @@ -139,7 +139,7 @@ def doc(reference = nil) it 'ignores invalid epic IIDs' do text = "Check &#{non_existing_record_iid}" - expect(doc(text).to_s).to include(ERB::Util.html_escape_once(text)) + expect(doc(text).to_s).to include(text) end end @@ -203,13 +203,13 @@ def doc(reference = nil) it 'ignores a shorthand reference from another group' do text = "Check &#{epic.iid}" - expect(doc(text).to_s).to include(ERB::Util.html_escape_once(text)) + expect(doc(text).to_s).to include_html(text) end it 'ignores reference with incomplete group path' do text = "Check @#{epic.group.path}&#{epic.iid}" - expect(doc(text).to_s).to include(ERB::Util.html_escape_once(text)) + expect(doc(text).to_s).to include_html(text) end it 'links to a valid reference for full reference' do @@ -354,4 +354,9 @@ def doc(reference = nil) let(:filter_result) { reference_filter(text, project: nil, group: group) } let(:ends_with) { " #{CGI.escapeHTML(epic.to_reference(group))}

" } end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:reference) { epic.to_reference(group) } + let(:filter_instance) { described_class.new(nil, { project: nil, group: group }) } + end end diff --git a/ee/spec/lib/banzai/filter/references/iteration_reference_filter_spec.rb b/ee/spec/lib/banzai/filter/references/iteration_reference_filter_spec.rb index 8d2307e51c54f33b93106e09822d055da57b8772..27c338e245d40c0d6451fbacc916641a2de1274f 100644 --- a/ee/spec/lib/banzai/filter/references/iteration_reference_filter_spec.rb +++ b/ee/spec/lib/banzai/filter/references/iteration_reference_filter_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Banzai::Filter::References::IterationReferenceFilter do +RSpec.describe Banzai::Filter::References::IterationReferenceFilter, feature_category: :markdown do include FilterSpecHelper let(:parent_group) { create(:group, :public) } @@ -331,4 +331,23 @@ end.not_to exceed_all_query_limit(max_count) end end + + it_behaves_like 'a reference which does not unescape its content in data-original' do + let(:context) { { project: nil, group: group } } + let(:iteration_title) { "x<script>alert('xss');</script>" } + let(:resource) { create(:iteration, title: iteration_title, group: group) } + let(:reference) { %(#{resource.class.reference_prefix}"#{iteration_title}") } + + # This is probably bad. + let(:expected_resource_title) { "x" } + + let(:expected_href) { urls.iteration_url(resource) } + let(:expected_replacement_text) { resource.display_text } + end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:iteration) { create(:iteration, :with_title, group: group) } + let(:reference) { %(#{Iteration.reference_prefix}"#{iteration.name}") } + let(:filter_instance) { described_class.new(nil, { project: nil }) } + end end diff --git a/ee/spec/lib/banzai/filter/references/iterations_cadence_reference_filter_spec.rb b/ee/spec/lib/banzai/filter/references/iterations_cadence_reference_filter_spec.rb index e7686c03cda94d146418f6bc97776b919bfd8e66..f0715d8a86b412c6be964c6269d2889d44f8d64f 100644 --- a/ee/spec/lib/banzai/filter/references/iterations_cadence_reference_filter_spec.rb +++ b/ee/spec/lib/banzai/filter/references/iterations_cadence_reference_filter_spec.rb @@ -39,7 +39,7 @@ link = doc(text).css('a').first expect(link).to have_attribute('data-original') - expect(link.attr('data-original')).to eq(reference) + expect(link.attr('data-original')).to eq_html(reference) end end @@ -119,4 +119,21 @@ def doc(text) expect { reference_filter(markdown, context) }.not_to exceed_all_query_limit(1) end end + + it_behaves_like 'a reference which does not unescape its content in data-original' do + let(:context) { { project: nil, group: group } } + let(:cadence_title) { "x" } + let(:cadence_title_escaped) { "x<script>alert('xss');</script>" } + let(:resource) { create(:iterations_cadence, title: cadence_title, group: group) } + let(:reference) { %(#{resource.class.reference_prefix}"#{cadence_title_escaped}"#{resource.class.reference_postfix}) } + + let(:expected_resource_title) { cadence_title } + let(:expected_href) { urls.group_iteration_cadences_url(group, resource) } + let(:expected_replacement_text) { "#{resource.class.reference_prefix}#{resource.id}#{resource.class.reference_postfix}" } + end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:reference) { cadence.to_reference } + let(:filter_instance) { described_class.new(nil, { project: nil }) } + end end diff --git a/ee/spec/lib/banzai/filter/references/vulnerability_reference_filters_spec.rb b/ee/spec/lib/banzai/filter/references/vulnerability_reference_filters_spec.rb index d5fcf17b5e59cf6c7f68021860fd798eef51700d..d67a2971f8825c57970c1be07f890daeb5b594b1 100644 --- a/ee/spec/lib/banzai/filter/references/vulnerability_reference_filters_spec.rb +++ b/ee/spec/lib/banzai/filter/references/vulnerability_reference_filters_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Banzai::Filter::References::VulnerabilityReferenceFilter do +RSpec.describe Banzai::Filter::References::VulnerabilityReferenceFilter, feature_category: :markdown do include FilterSpecHelper let(:urls) { Gitlab::Routing.url_helpers } @@ -198,4 +198,9 @@ def doc(reference = nil) expect { reference_filter(multiple_references).to_html }.not_to exceed_query_limit(control) end end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:reference) { "[vulnerability:#{vulnerability.id}]" } + let(:filter_instance) { described_class.new(nil, { project: }) } + end end diff --git a/lib/banzai/filter/playable_link_filter.rb b/lib/banzai/filter/playable_link_filter.rb index a56961a0c26cdd3fdc9a5697c81213a8af133fa4..77d64965ef2b11592d263a717c9cb64f7c1ace1c 100644 --- a/lib/banzai/filter/playable_link_filter.rb +++ b/lib/banzai/filter/playable_link_filter.rb @@ -55,14 +55,14 @@ def media_element(doc, element) end def download_link(doc, element) - link_content = element['title'] || element['alt'] + link_text = element['title'] || element['alt'] link_element_attrs = { class: 'gl-text-sm gl-text-subtle gl-mb-1', href: element['src'], target: '_blank', rel: 'noopener noreferrer', - title: "Download '#{link_content}'" + title: "Download '#{link_text}'" } # make sure the original non-proxied src carries over @@ -70,7 +70,7 @@ def download_link(doc, element) link_element_attrs['data-canonical-src'] = element['data-canonical-src'] end - doc.document.create_element('a', link_content, link_element_attrs) + doc.document.create_element('a', link_text, link_element_attrs) end def media_node(doc, element) diff --git a/lib/banzai/filter/references/abstract_reference_filter.rb b/lib/banzai/filter/references/abstract_reference_filter.rb index dc21b224b1d3a767297cbcc3eff2c6ba699374cb..b5a0a0d7dfe9c3d67bc195c55c0cb5836ae43268 100644 --- a/lib/banzai/filter/references/abstract_reference_filter.rb +++ b/lib/banzai/filter/references/abstract_reference_filter.rb @@ -16,47 +16,31 @@ def initialize(doc, context = nil, result = nil) @reference_cache = ReferenceCache.new(self, context, result) end - # REFERENCE_PLACEHOLDER is used for re-escaping HTML text except found - # reference (which we replace with placeholder during re-scaping). The - # random number helps ensure it's pretty close to unique. Since it's a - # transitory value (it never gets saved) we can initialize once, and it - # doesn't matter if it changes on a restart. - REFERENCE_PLACEHOLDER = "_reference_#{SecureRandom.hex(16)}_" - REFERENCE_PLACEHOLDER_PATTERN = %r{#{REFERENCE_PLACEHOLDER}(\d+)} - - # Public: Find references in text (like `!123` for merge requests) + # See ReferenceFilter#references_in for requirements on the input, block, + # and return value of this function. # - # references_in(text) do |match, id, project_ref, matches| - # object = find_object(project_ref, id) - # "#{object.to_reference}" - # end - # - # text - String text to search. - # - # Yields the String match, the Integer referenced object ID, an optional String - # of the external project reference, and all of the matchdata. - # - # Returns a String replaced with the return of the block. + # This function yields the String match, the Integer referenced object ID, an optional String + # of the external project reference, an optional String of the namespace reference, + # and the full MatchData. def references_in(text, pattern = object_class.reference_pattern) - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| + replace_references_in_text_with_html(Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| if ident = identifier(match_data) - yield match_data[0], ident, match_data.named_captures['project'], match_data.named_captures['namespace'], - match_data - else - match_data[0] + yield match_data[0], ident, match_data.named_captures['project'], + match_data.named_captures['namespace'], match_data end end end def identifier(match_data) - symbol = symbol_from_match(match_data) + symbol = symbol_from_match_data(match_data) parse_symbol(symbol, match_data) if object_class.reference_valid?(symbol) end - def symbol_from_match(match) + def symbol_from_match_data(match_data) key = object_sym - match[key] if match.names.include?(key.to_s) + match_data[key] if match_data.names.include?(key.to_s) end # Transform a symbol extracted from the text to a meaningful value @@ -133,34 +117,32 @@ def call nodes.each_with_index do |node, index| if text_node?(node) && ref_pattern - replace_text_when_pattern_matches(node, index, ref_pattern) do |content| + replace_node_when_text_matches(node, index, ref_pattern) do |content| object_link_filter(content, ref_pattern) end elsif element_node?(node) - yield_valid_link(node) do |link, inner_html| + yield_valid_link(node) do |link, text, inner_html| if ref_pattern && link =~ ref_pattern_anchor - replace_link_node_with_href(node, index, link) do - object_link_filter(link, ref_pattern_anchor, link_content: inner_html) - end + html = object_link_filter(link, ref_pattern_anchor, link_content_html: inner_html) + replace_node_with_html(node, index, html) if html next end next unless link_pattern - if link == inner_html && inner_html =~ link_pattern_start - replace_link_node_with_text(node, index) do - object_link_filter(inner_html, link_pattern_start, link_reference: true) - end + if link == text && text =~ link_pattern_start + html = object_link_filter(text, link_pattern_start, link_reference: true) + replace_node_with_html(node, index, html) if html next end if link_pattern_anchor.match?(link) - replace_link_node_with_href(node, index, link) do - object_link_filter(link, link_pattern_anchor, link_content: inner_html, link_reference: true) - end + html = object_link_filter(link, link_pattern_anchor, link_content_html: inner_html, + link_reference: true) + replace_node_with_html(node, index, html) if html next end @@ -176,14 +158,21 @@ def call # # text - String text to replace references in. # pattern - Reference pattern to match against. - # link_content - Original content of the link being replaced. + # link_content_html - Original HTML content of the link being replaced. # link_reference - True if this was using the link reference pattern, # false otherwise. # - # Returns a String with references replaced with links. All links - # have `gfm` and `gfm-OBJECT_NAME` class names attached for styling. - def object_link_filter(text, pattern, link_content: nil, link_reference: false) - references_in(text, pattern) do |match, id, project_ref, namespace_ref, matches| + # Returns String HTML with references replaced with links, or nil if no + # replacements were made. All links have `gfm` and `gfm-OBJECT_NAME` + # class names attached for styling. + # + # Note carefully: + # + # * `text` is text, not HTML. + # * `link_content_html`, if provided, is HTML. + # * The return value is HTML (or nil). + def object_link_filter(text, pattern, link_content_html: nil, link_reference: false) + references_in(text, pattern) do |match_text, id, project_ref, namespace_ref, matches| parent_path = if parent_type == :group reference_cache.full_group_path(namespace_ref) elsif parent_type == :namespace @@ -193,54 +182,51 @@ def object_link_filter(text, pattern, link_content: nil, link_reference: false) end parent = from_ref_cached(parent_path) + next unless parent - if parent - object = - if link_reference - find_object_from_link_cached(parent, id) - else - find_object_cached(parent, id) - end - end + object = + if link_reference + find_object_from_link_cached(parent, id) + else + find_object_cached(parent, id) + end - if object - title = object_link_title(object, matches) - klass = reference_class(object_sym) - - data_attributes = data_attributes_for( - link_content || match, - parent, - object, - link_content: !!link_content, - link_reference: link_reference - ) - data_attributes[:reference_format] = matches[:format] if matches.names.include?("format") - data_attributes.merge!(additional_object_attributes(object)) - - data = data_attribute(data_attributes) - - url = - if matches.names.include?("url") && matches[:url] - matches[:url] - else - url_for_object_cached(object, parent) - end + next unless object - url.chomp!(matches[:format]) if matches.names.include?("format") + title = object_link_title(object, matches) + klass = reference_class(object_sym) - content = context[:link_text] || link_content || object_link_text(object, matches) + data_attributes = data_attributes_for( + link_content_html || CGI.escapeHTML(match_text), + parent, + object, + link_content: !!link_content_html, + link_reference: link_reference + ) + data_attributes[:reference_format] = matches[:format] if matches.names.include?("format") + data_attributes.merge!(additional_object_attributes(object)) - link = write_opening_tag("a", { - "href" => url, - "title" => title, - "class" => klass, - **data - }) << content.to_s << "" + data = data_attribute(data_attributes) - wrap_link(link, object) - else - match - end + url = + if matches.names.include?("url") && matches[:url] + matches[:url] + else + url_for_object_cached(object, parent) + end + + url.chomp!(matches[:format]) if matches.names.include?("format") + + content = context[:link_text] || link_content_html || object_link_text(object, matches) + + link = write_opening_tag("a", { + "href" => url, + "title" => title, + "class" => klass, + **data + }) << content.to_s << "" + + wrap_link(link, object) end end @@ -248,7 +234,9 @@ def wrap_link(link, object) link end - def data_attributes_for(text, parent, object, link_content: false, link_reference: false) + # "link_content" is true when "original" is the inner HTML content, or false when "original" + # is HTML-escaped plain text representing the link as written. + def data_attributes_for(original, parent, object, link_content: false, link_reference: false) parent_id = case parent when Group { group: parent.id, namespace: parent.id } @@ -259,7 +247,7 @@ def data_attributes_for(text, parent, object, link_content: false, link_referenc end { - original: text, + original: original, link: link_content, link_reference: link_reference, object_sym => object.id @@ -306,14 +294,6 @@ def parent attr_accessor :reference_cache - def escape_with_placeholders(text, placeholder_data) - escaped = escape_html_entities(text) - - escaped.gsub(REFERENCE_PLACEHOLDER_PATTERN) do |match| - placeholder_data[Regexp.last_match(1).to_i] - end - end - def additional_object_attributes(object) {} end diff --git a/lib/banzai/filter/references/commit_range_reference_filter.rb b/lib/banzai/filter/references/commit_range_reference_filter.rb index 34ba4f7d4edb03361e85f0c1e7e063c026bc1720..38506972b34dba8049dda9c26ba2bd2a6710546d 100644 --- a/lib/banzai/filter/references/commit_range_reference_filter.rb +++ b/lib/banzai/filter/references/commit_range_reference_filter.rb @@ -11,7 +11,9 @@ class CommitRangeReferenceFilter < AbstractReferenceFilter self.object_class = CommitRange def references_in(text, pattern = object_reference_pattern) - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| + replace_references_in_text_with_html( + Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| yield match_data[0], match_data[:commit_range], match_data[:project], match_data[:namespace], match_data end diff --git a/lib/banzai/filter/references/commit_reference_filter.rb b/lib/banzai/filter/references/commit_reference_filter.rb index 27d1fb52a3bf5683da20d27df233c9c8c315b9cc..685c40c280daaa7e1fa8d89fd8c2f8312b11b65c 100644 --- a/lib/banzai/filter/references/commit_reference_filter.rb +++ b/lib/banzai/filter/references/commit_reference_filter.rb @@ -11,7 +11,8 @@ class CommitReferenceFilter < AbstractReferenceFilter self.object_class = Commit def references_in(text, pattern = object_reference_pattern) - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| + replace_references_in_text_with_html(Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| yield match_data[0], match_data[:commit], match_data[:project], match_data[:namespace], match_data end diff --git a/lib/banzai/filter/references/design_reference_filter.rb b/lib/banzai/filter/references/design_reference_filter.rb index db736b7328a20b59a472b350b1163dd101fa032f..edc84fe0a800cfa8057129bccf339634e27d495e 100644 --- a/lib/banzai/filter/references/design_reference_filter.rb +++ b/lib/banzai/filter/references/design_reference_filter.rb @@ -66,7 +66,7 @@ def url_for_object(design, project) Gitlab::Routing.url_helpers.designs_project_issue_path(project, design.issue, path_options) end - def data_attributes_for(_text, _project, design, **_kwargs) + def data_attributes_for(_original, _project, design, **_kwargs) super.merge(issue: design.issue_id) end diff --git a/lib/banzai/filter/references/external_issue_reference_filter.rb b/lib/banzai/filter/references/external_issue_reference_filter.rb index 6a4ee2e6a06174491fb01685736d3a2df91d10b5..d376356a05dabf09f0b0a3bae8a643af271a24a4 100644 --- a/lib/banzai/filter/references/external_issue_reference_filter.rb +++ b/lib/banzai/filter/references/external_issue_reference_filter.rb @@ -24,25 +24,30 @@ def self.default_issues_tracker?(project) # Public: Find `JIRA-123` issue references in text # - # references_in(text, pattern) do |match, issue| + # references_in(text, pattern) do |match_text, issue| # "##{issue}" # end # # text - String text to search. # - # Yields the String match and the String issue reference. + # Yields the String text match and the String issue reference. # - # Returns a String replaced with the return of the block. + # Returns a HTML String replaced with the return of the block. + # + # See ReferenceFilter#references_in for a detailed discussion. def references_in(text, pattern = object_reference_pattern) - case pattern - when Regexp - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| - yield match_data[0], match_data[:issue] - end - when Gitlab::UntrustedRegexp - pattern.replace_gsub(text, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match| - yield match, match[:issue] + enumerator = + case pattern + when Regexp + Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) + when Gitlab::UntrustedRegexp + pattern.replace_gsub(text, limit: Banzai::Filter::FILTER_ITEM_LIMIT) + else + raise ArgumentError, "#{self.class.name} given #{pattern.class.name} pattern; should be Regexp or Gitlab::UntrustedRegexp" end + + replace_references_in_text_with_html(enumerator) do |match_data| + yield match_data[0], match_data[:issue] end end @@ -59,15 +64,17 @@ def call # issue's details page. # # text - String text to replace references in. - # link_content - Original content of the link being replaced. + # link_content_html - Original HTML content of the link being replaced. # - # Returns a String with `JIRA-123` references replaced with links. All + # Returns a HTML String with `JIRA-123` references replaced with links. All # links have `gfm` and `gfm-issue` class names attached for styling. - def object_link_filter(text, pattern, link_content: nil, link_reference: false) - references_in(text) do |match, id| + # + # Returns nil if no replacements are made. + def object_link_filter(text, pattern, link_content_html: nil, link_reference: false) + references_in(text) do |match_text, id| klass = reference_class(:issue) data = data_attribute(project: project.id, external_issue: id) - content = link_content || match + content = link_content_html || CGI.escapeHTML(match_text) write_opening_tag("a", { "title" => issue_title, diff --git a/lib/banzai/filter/references/issue_reference_filter.rb b/lib/banzai/filter/references/issue_reference_filter.rb index 3222f4eea9181b1b7071a97f7a7bbb2ef7f6e610..bb7b48cd79e8ce7e7c04960e26a4b4393ae0e5ae 100644 --- a/lib/banzai/filter/references/issue_reference_filter.rb +++ b/lib/banzai/filter/references/issue_reference_filter.rb @@ -38,7 +38,7 @@ def reference_class(object_sym, tooltip: false) super end - def data_attributes_for(text, parent, object, **data) + def data_attributes_for(original, parent, object, **data) additional_attributes = { iid: object.iid, namespace_path: parent.full_path } if parent.is_a?(Namespaces::ProjectNamespace) || parent.is_a?(Project) additional_attributes[:project_path] = parent.full_path diff --git a/lib/banzai/filter/references/label_reference_filter.rb b/lib/banzai/filter/references/label_reference_filter.rb index 10399f05631430c84def37f3ee495dad77e94a12..07ad15247528d04daeabcbe1c13db0f1b8b6ff86 100644 --- a/lib/banzai/filter/references/label_reference_filter.rb +++ b/lib/banzai/filter/references/label_reference_filter.rb @@ -70,23 +70,10 @@ def record_identifier(record) end def references_in(text, pattern = Label.reference_pattern) - labels = {} - - unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index| - ident = identifier($~) - label = yield match, ident, $~[:project], $~[:namespace], $~ - - if label != match - labels[index] = label - "#{REFERENCE_PLACEHOLDER}#{index}" - else - match - end + replace_references_in_text_with_html(text.gsub(pattern)) do |match_data| + ident = identifier(match_data) + yield match_data[0], ident, match_data[:project], match_data[:namespace], match_data end - - return text if labels.empty? - - escape_with_placeholders(unescaped_html, labels) end def find_labels(parent, absolute_path: false) diff --git a/lib/banzai/filter/references/merge_request_reference_filter.rb b/lib/banzai/filter/references/merge_request_reference_filter.rb index 271c34fe4067481ea9a4c0734a62877b4ce7d0bb..9f11c5134ca8959e46034fec2a958f713a5347d2 100644 --- a/lib/banzai/filter/references/merge_request_reference_filter.rb +++ b/lib/banzai/filter/references/merge_request_reference_filter.rb @@ -52,7 +52,7 @@ def reference_class(object_sym, tooltip: false) super end - def data_attributes_for(text, parent, object, **data) + def data_attributes_for(original, parent, object, **data) super.merge(project_path: parent.full_path, iid: object.iid) end diff --git a/lib/banzai/filter/references/milestone_reference_filter.rb b/lib/banzai/filter/references/milestone_reference_filter.rb index fa1fd9baaf916ac011108011c157c0120a213686..df1d4595ce5530452218e4170ceee7f7a3c00488 100644 --- a/lib/banzai/filter/references/milestone_reference_filter.rb +++ b/lib/banzai/filter/references/milestone_reference_filter.rb @@ -58,7 +58,11 @@ def parse_symbol(symbol, match_data) # holds the iid { milestone_iid: symbol.to_i, milestone_name: nil, absolute_path: absolute_path } else - { milestone_iid: match_data[:milestone_iid]&.to_i, milestone_name: match_data[:milestone_name]&.tr('"', ''), absolute_path: absolute_path } + { + milestone_iid: match_data[:milestone_iid]&.to_i, + milestone_name: match_data[:milestone_name]&.tr('"', ''), + absolute_path: absolute_path + } end end @@ -87,23 +91,10 @@ def references_in(text, pattern = Milestone.reference_pattern) # default implementation. return super(text, pattern) if pattern != Milestone.reference_pattern - milestones = {} - - unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index| - ident = identifier($~) - milestone = yield match, ident, $~[:project], $~[:namespace], $~ - - if milestone != match - milestones[index] = milestone - "#{REFERENCE_PLACEHOLDER}#{index}" - else - match - end + replace_references_in_text_with_html(text.gsub(pattern)) do |match_data| + ident = identifier(match_data) + yield match_data[0], ident, match_data[:project], match_data[:namespace], match_data end - - return text if milestones.empty? - - escape_with_placeholders(unescaped_html, milestones) end def find_milestones(parent, find_by_iid = false, absolute_path: false) @@ -159,7 +150,7 @@ def requires_unescaping? true end - def data_attributes_for(text, parent, object, link_content: false, link_reference: false) + def data_attributes_for(original, parent, object, link_content: false, link_reference: false) object_parent = object.resource_parent return super unless object_parent.is_a?(Group) diff --git a/lib/banzai/filter/references/project_reference_filter.rb b/lib/banzai/filter/references/project_reference_filter.rb index 52688a15245b75d3babd7496dbce8d39fc38680c..3805ac20ee0a8889dede3de5c7ad3079e30fd435 100644 --- a/lib/banzai/filter/references/project_reference_filter.rb +++ b/lib/banzai/filter/references/project_reference_filter.rb @@ -10,17 +10,20 @@ class ProjectReferenceFilter < ReferenceFilter # Public: Find `namespace/project>` project references in text # - # references_in(text) do |match, project| + # references_in(text) do |match_text, project| # "#{project}>" # end # # text - String text to search. # - # Yields the String match, and the String project name. + # Yields the String text match, and the String project name. # - # Returns a String replaced with the return of the block. + # Returns a HTML String with replacements made, or nil if no replacements were made. + # + # See ReferenceFilter#references_in for a detailed discussion. def references_in(text, pattern = object_reference_pattern) - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| + replace_references_in_text_with_html(Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| yield match_data[0], "#{match_data[:namespace]}/#{match_data[:project]}" end end @@ -35,17 +38,15 @@ def object_reference_pattern # project page. # # text - String text to replace references in. - # link_content - Original content of the link being replaced. + # link_content_html - Original HTML content of the link being replaced. # - # Returns a String with `namespace/project>` references replaced with links. All links - # have `gfm` and `gfm-project` class names attached for styling. - def object_link_filter(text, pattern, link_content: nil, link_reference: false) - references_in(text) do |match, project_path| - cached_call(:banzai_url_for_object, match, path: [Project, project_path.downcase]) do + # Returns a String HTML with `namespace/project>` references replaced with links, or nil + # if no replacements were made. All links have `gfm` and `gfm-project` class names attached for styling. + def object_link_filter(text, pattern, link_content_html: nil, link_reference: false) + references_in(text) do |match_text, project_path| + cached_call(:banzai_url_for_object, match_text, path: [Project, project_path.downcase]) do if project = projects_hash[project_path.downcase] - link_to_project(project, link_content: link_content) || match - else - match + link_to_project(project, link_content_html: link_content_html) end end end @@ -85,10 +86,10 @@ def link_class reference_class(:project) end - def link_to_project(project, link_content: nil) + def link_to_project(project, link_content_html: nil) url = urls.project_url(project, only_path: context[:only_path]) data = data_attribute(project: project.id) - content = link_content || project.to_reference + content = link_content_html || CGI.escapeHTML(project.to_reference) write_opening_tag("a", { "href" => url, diff --git a/lib/banzai/filter/references/reference_cache.rb b/lib/banzai/filter/references/reference_cache.rb index 97bb24376c00fa6e21342a832aed56445e483000..82525a978f1ca2d49e9ab80706eebc3482ec0fcb 100644 --- a/lib/banzai/filter/references/reference_cache.rb +++ b/lib/banzai/filter/references/reference_cache.rb @@ -203,16 +203,12 @@ def refs_cache end def prepare_doc_for_scan - filter.requires_unescaping? ? unescape_html_entities(html_content) : html_content + filter.requires_unescaping? ? CGI.unescapeHTML(html_content) : html_content end def html_content result[:rendered_html] ||= filter.doc.to_html end - - def unescape_html_entities(text) - CGI.unescapeHTML(text.to_s) - end end end end diff --git a/lib/banzai/filter/references/reference_filter.rb b/lib/banzai/filter/references/reference_filter.rb index 07916f69e60f01a16d9b59fa598d7bcd10871a99..41d2220498df59ca646fb87b5b0a511b48dfe5f0 100644 --- a/lib/banzai/filter/references/reference_filter.rb +++ b/lib/banzai/filter/references/reference_filter.rb @@ -20,6 +20,13 @@ class ReferenceFilter < HTML::Pipeline::Filter REFERENCE_TYPE_ATTRIBUTE = :reference_type REFERENCE_TYPE_DATA_ATTRIBUTE_NAME = "data-#{REFERENCE_TYPE_ATTRIBUTE.to_s.dasherize}".freeze + # REFERENCE_PLACEHOLDER is used for replacement HTML to be inserted during + # reference matching. The random string helps ensure it's pretty close to unique. + # Since it's a transitory value (it never leaves the filter) we can initialize once, + # and it doesn't matter if it changes on a restart. + REFERENCE_PLACEHOLDER = "_reference_#{SecureRandom.hex(16)}_".freeze + REFERENCE_PLACEHOLDER_PATTERN = %r{#{REFERENCE_PLACEHOLDER}(\d+)} + class << self # Implement in child class # Example: self.reference_type = :merge_request @@ -50,15 +57,14 @@ def call nodes.each_with_index do |node, index| if text_node?(node) - replace_text_when_pattern_matches(node, index, object_reference_pattern) do |content| + replace_node_when_text_matches(node, index, object_reference_pattern) do |content| object_link_filter(content, object_reference_pattern) end elsif element_node?(node) - yield_valid_link(node) do |link, inner_html| + yield_valid_link(node) do |link, _text, inner_html| if ref_pattern_start.match?(link) - replace_link_node_with_href(node, index, link) do - object_link_filter(link, ref_pattern_start, link_content: inner_html) - end + html = object_link_filter(link, ref_pattern_start, link_content_html: inner_html) + replace_node_with_html(node, index, html) if html end end end @@ -67,23 +73,107 @@ def call doc end - # Public: Find references in text (like `!123` for merge requests) + # Public: Find references in text (like `!123` for merge requests). + # + # Simplified usage (see AbstractReferenceFilter#object_link_filter for a full example): # - # references_in(text) do |match, id, project_ref, matches| - # object = find_object(project_ref, id) - # "#{object.to_reference}" + # references_in(text) do |match_text, id, project_ref, namespace_ref, matches| + # object = find_object(namespace_ref, project_ref, id) + # if object + # write_opening_tag("a", { + # "href" => "...", + # # ... other attributes ... + # }) << CGI.escapeHTML(object.to_reference) << "" + # else + # CGI.escapeHTML(match_text) + # end # end # - # text - String text to search. + # text - String text to make replacements in. This is the content of a DOM text + # node, and *cannot* be returned without modification. + # + # Yields each String text match as the first argument; the remaining arguments + # depend on the particular subclass of ReferenceFilter being used. The block + # must return HTML, and not text. + # + # Returns a HTML String replaced with replacements made, or nil if no replacements + # were made. + # + # Notes: + # + # * The input is text -- we expect to match e.g. &1 or %"Git 2.5" without having to + # deal with HTML entities, as our object reference patterns are not compatible + # with entities. # - # Yields the String match, the Integer referenced object ID, an optional String - # of the external project reference, and all of the matchdata. + # * Likewise, any strings yielded to the block are text, but the block must return + # HTML, whether it's a successful resolution or not. Any text input used in the + # block must be escaped for return. # - # Returns a String replaced with the return of the block. + # * The return value is HTML, as the whole point is to create links (and possibly other + # elements). Care must be taken that any input text is escaped before reaching + # the output. This must be respected in all subclasses of ReferenceFilter. + # + # See AbstractReferenceFilter#references_in for a reference implementation that safely + # handles user input. def references_in(text, pattern = object_reference_pattern) raise NotImplementedError, "#{self.class} must implement method: #{__callee__}" end + # A helper to make it easier to implement #references_in correctly. + # + # Pass in the replacement Enumerator of choice, on *plain text* input. + # This will often look like this: + # + # Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) + # + # Or the simpler: + # + # text.gsub(pattern) + # + # In both cases, the subject we're performing replacements on is text, not HTML. + # + # This function will yield MatchData from the underlying enumerator -- $~ if it's available, + # otherwise the yield from the enumerator itself is asserted to be MatchData or RE2::MatchData. + # The given block should return HTML (!) to substitute in place, or nil if no replacement + # is to be made. + # + # The function returns HTML, safely substituting the block results in, while escaping all + # other text. The supplied block MUST escape any input text returned within it, if any. + # + # If no replacements were made --- that is, the enumerator didn't yield anything, or the block + # given to this function only returned nil --- nil is returned. + def replace_references_in_text_with_html(enumerator) + replacements = {} + + # We operate on text, yield text, and substitute back in only text. + replaced_text = enumerator.with_index do |enumerator_yielded, index| + match_data = $~ || enumerator_yielded + unless match_data.is_a?(MatchData) || match_data.is_a?(RE2::MatchData) + raise ArgumentError, "enumerator didn't yield MatchData (is #{match_data.inspect}) and $~ is unavailable" + end + + replacement = yield match_data + if replacement + # The yield returns HTML to us, but we can't substitute it back in yet --- there + # remains unescaped, textual content in unmatched parts of the string which we + # need to escape without affecting the block yields. Instead, store the result, + # and substitute back into the text a placeholder we can replace after escaping. + replacements[index] = replacement + "#{REFERENCE_PLACEHOLDER}#{index}" + else + match_data.to_s + end + end + + return unless replacements.any? + + # Escape the replaced_text --- which doesn't change the placeholders --- and only then + # replace the placeholders with the yielded HTML. + CGI.escapeHTML(replaced_text).gsub(REFERENCE_PLACEHOLDER_PATTERN) do + replacements[Regexp.last_match(1).to_i] + end + end + # Iterates over all and text() nodes in a document. # # Nodes are skipped whenever their ancestor is one of the nodes returned @@ -184,50 +274,44 @@ def reference_class(type, tooltip: true) "#{gfm_klass} has-tooltip" end - # Yields the link's URL and inner HTML whenever the node is a valid tag. + # Yields the link's URL, inner text, and inner HTML whenever the node is a valid tag. + # + # We expect that the URL and inner *text* will be equal when the link was a result of + # an autolink in Markdown. For example: + # + # ```markdown + # + # ``` + # + # produces the HTML: + # + # ```html + # https://gitlab.com/x?y="z"&fox + # ``` + # + # The href DOM attribute, after percent-decoding, is 'https://gitlab.com/x?y="z"&fox'. + # The node's inner text is identical: 'https://gitlab.com/x?y="z"&fox'. + # The node's inner HTML is 'https://gitlab.com/x?y="z"&fox'. def yield_valid_link(node) - link = unescape_link(node.attr('href').to_s) - inner_html = node.inner_html - - return unless link.force_encoding('UTF-8').valid_encoding? - - yield link, inner_html - end - - def unescape_link(href) # We cannot use CGI.unescape here because it also converts `+` to spaces. - # We need to keep the `+` for expanded reference formats. - Addressable::URI.unescape(href) - end - - def unescape_html_entities(text) - CGI.unescapeHTML(text.to_s) - end - - def escape_html_entities(text) - CGI.escapeHTML(text.to_s) - end + # We need to keep the `+` for expanded reference formats; we want to only + # percent-decode here. + link = Addressable::URI.unescape(node.attr('href').to_s) - def replace_text_when_pattern_matches(node, index, pattern) - return if pattern.is_a?(Gitlab::UntrustedRegexp) && !pattern.match?(node.text) - return if pattern.is_a?(Regexp) && !(pattern =~ node.text) - - content = node.to_html - html = yield content + return unless link.force_encoding('UTF-8').valid_encoding? - replace_text_with_html(node, index, html) unless html == content + yield link, node.text, node.inner_html end - def replace_link_node_with_text(node, index) - html = yield + # Replaces a node with HTML obtained by yielding node.text, iff node.text matches the given pattern. + # Doesn't replace anything if the block returns nil. + def replace_node_when_text_matches(node, index, pattern) + node_text = node.text - replace_text_with_html(node, index, html) unless html == node.text - end + return unless pattern.match?(node_text) - def replace_link_node_with_href(node, index, link) - html = yield - - replace_text_with_html(node, index, html) unless html == link + html = yield node_text + replace_node_with_html(node, index, html) if html end def text_node?(node) @@ -250,7 +334,7 @@ def object_sym @object_sym ||= object_name.to_sym end - def object_link_filter(text, pattern, link_content: nil, link_reference: false) + def object_link_filter(text, pattern, link_content_html: nil, link_reference: false) raise NotImplementedError, "#{self.class} must implement method: #{__callee__}" end @@ -261,11 +345,9 @@ def query ]} end - def replace_text_with_html(node, index, html) - replace_and_update_new_nodes(node, index, html) - end + def replace_node_with_html(node, index, html) + return if node.to_html == html - def replace_and_update_new_nodes(node, index, html) previous_node = node.previous next_node = node.next parent_node = node.parent diff --git a/lib/banzai/filter/references/user_reference_filter.rb b/lib/banzai/filter/references/user_reference_filter.rb index f6ea1713bb9f36e573856502bc955f13d76b2efd..6e96fceccc0c13bcb572f2a8ad109681c8a99df1 100644 --- a/lib/banzai/filter/references/user_reference_filter.rb +++ b/lib/banzai/filter/references/user_reference_filter.rb @@ -14,18 +14,21 @@ class UserReferenceFilter < ReferenceFilter # Public: Find `@user` user references in text # - # references_in(text) do |match, username| + # references_in(text) do |match_text, username| # "@#{user}" # end # # text - String text to search. # - # Yields the String match, and the String user name. + # Yields the String match text, and the String user name. # - # Returns a String replaced with the return of the block. + # Returns a HTML String with replacements made, or nil if no replacements were made. + # + # See ReferenceFilter#references_in for a detailed discussion. def references_in(text, pattern = object_reference_pattern) - Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, limit: Banzai::Filter::FILTER_ITEM_LIMIT) do |match_data| - yield match_data[0], match_data['user'] + replace_references_in_text_with_html(Gitlab::Utils::Gsub.gsub_with_limit(text, pattern, + limit: Banzai::Filter::FILTER_ITEM_LIMIT)) do |match_data| + yield match_data[0], match_data[:user] end end @@ -41,23 +44,23 @@ def call # user's profile page. # # text - String text to replace references in. - # link_content - Original content of the link being replaced. + # link_content_html - Original HTML content of the link being replaced. # - # Returns a String with `@user` references replaced with links. All links + # Returns a HTML String with `@user` references replaced with links. All links # have `gfm` and `gfm-project_member` class names attached for styling. - def object_link_filter(text, pattern, link_content: nil, link_reference: false) - references_in(text, pattern) do |match, username| + # + # Returns nil if no replacements were made. + def object_link_filter(text, pattern, link_content_html: nil, link_reference: false) + references_in(text, pattern) do |match_text, username| if Feature.disabled?(:disable_all_mention) && username == 'all' && !skip_project_check? - link_to_all(link_content: link_content) + link_to_all(link_content_html: link_content_html) else - cached_call(:banzai_url_for_object, match, path: [User, username.downcase]) do + cached_call(:banzai_url_for_object, match_text, path: [User, username.downcase]) do # order is important: per-organization usernames should be checked before global namespace if org_user_detail = org_user_details[username.downcase] link_to_org_user_detail(org_user_detail) elsif namespace = namespaces[username.downcase] - link_to_namespace(namespace, link_content: link_content) || match - else - match + link_to_namespace(namespace, link_content_html: link_content_html) end end end @@ -111,56 +114,56 @@ def link_class [reference_class(:project_member, tooltip: false), "js-user-link"].join(" ") end - def link_to_all(link_content: nil) + def link_to_all(link_content_html: nil) author = context[:author] if author && !team_member?(author) - link_content + link_content_html else - parent_url(link_content, author) + parent_url(link_content_html, author) end end - def link_to_namespace(namespace, link_content: nil) + def link_to_namespace(namespace, link_content_html: nil) if namespace.is_a?(Group) - link_to_group(namespace.full_path, namespace, link_content: link_content) + link_to_group(namespace.full_path, namespace, link_content_html: link_content_html) else - link_to_user(namespace.path, namespace, link_content: link_content) + link_to_user(namespace.path, namespace, link_content_html: link_content_html) end end - def link_to_group(group, namespace, link_content: nil) + def link_to_group(group, namespace, link_content_html: nil) url = urls.group_url(group, only_path: context[:only_path]) data = data_attribute(group: namespace.id) - content = link_content || (Group.reference_prefix + group) + html_content = link_content_html || CGI.escapeHTML(Group.reference_prefix + group) - link_tag(url, data, content, namespace.full_name) + link_tag(url, data, html_content, namespace.full_name) end - def link_to_user(user, namespace, link_content: nil) + def link_to_user(user, namespace, link_content_html: nil) url = urls.user_url(user, only_path: context[:only_path]) data = data_attribute(user: namespace.owner_id) - content = link_content || (User.reference_prefix + user) + html_content = link_content_html || CGI.escapeHTML(User.reference_prefix + user) - link_tag(url, data, content, namespace.owner_name) + link_tag(url, data, html_content, namespace.owner_name) end - def link_to_org_user_detail(org_user_detail, link_content: nil) + def link_to_org_user_detail(org_user_detail, link_content_html: nil) user = org_user_detail.user url = urls.user_url(user, only_path: context[:only_path]) data = data_attribute(user: user.id) - content = link_content || org_user_detail.to_reference + html_content = link_content_html || CGI.escapeHTML(org_user_detail.to_reference) - link_tag(url, data, content, org_user_detail.username) + link_tag(url, data, html_content, org_user_detail.username) end - def link_tag(url, data, link_content, title) + def link_tag(url, data, link_content_html, title) write_opening_tag("a", { "href" => url, "title" => title, "class" => link_class, **data - }) << link_content << "" + }) << link_content_html << "" end def organization @@ -182,7 +185,7 @@ def team_member?(user) parent.member?(user) end - def parent_url(link_content, author) + def parent_url(link_content_html, author) if parent_group? url = urls.group_url(parent, only_path: context[:only_path]) data = data_attribute(group: group.id, author: author.try(:id)) @@ -191,8 +194,8 @@ def parent_url(link_content, author) data = data_attribute(project: project.id, author: author.try(:id)) end - content = link_content || (User.reference_prefix + 'all') - link_tag(url, data, content, 'All Project and Group Members') + html_content = link_content_html || CGI.escapeHTML(User.reference_prefix + 'all') + link_tag(url, data, html_content, 'All Project and Group Members') end end end diff --git a/lib/banzai/reference_redactor.rb b/lib/banzai/reference_redactor.rb index 48000185c9bf6903bdc8bcc32f704deea3472004..8a8b7714533d81094bd726fae591d7d188bcfa4e 100644 --- a/lib/banzai/reference_redactor.rb +++ b/lib/banzai/reference_redactor.rb @@ -65,7 +65,6 @@ def redact_document_nodes(all_document_nodes) # def redacted_node_content(node) original_content = node.attr('data-original') - original_content = CGI.escape_html(original_content) if original_content # Build the raw tag just with a link as href and content if # it's originally a link pattern. We shouldn't return a plain text href. diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb index 07c7ab31050a7812729a75d3bd69c51983eb411a..006681654f315bf3a9be9048f99c81341ccd148a 100644 --- a/lib/gitlab/untrusted_regexp.rb +++ b/lib/gitlab/untrusted_regexp.rb @@ -40,6 +40,8 @@ def replace_all(text, rewrite) # There is no built-in replace with block support (like `gsub`). We can accomplish # the same thing by parsing and rebuilding the string with the substitutions. def replace_gsub(text, limit: 0) + return enum_for(:replace_gsub, text, limit:) unless block_given? + new_text = +'' remainder = text count = 0 diff --git a/lib/gitlab/utils/gsub.rb b/lib/gitlab/utils/gsub.rb index 29fcf31968d0dc260028da068834d0c228b11b77..4c59b39bac3ad81038db54536573ef7a2948738c 100644 --- a/lib/gitlab/utils/gsub.rb +++ b/lib/gitlab/utils/gsub.rb @@ -9,6 +9,8 @@ module Gsub # allows us to break out of the replacement loop when the limit is reached. # This is the same algorithm used for Gitlab::UntrustedRegexp.replace_gsub def gsub_with_limit(text, pattern, limit:) + return enum_for(:gsub_with_limit, text, pattern, limit:) unless block_given? + new_text = +'' remainder = text count = 0 diff --git a/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb b/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb index a2e0d92328d7294547fec9be182c3f85d4c20c9a..cbbb34c93fa5f47210bf3fc232734d2251cd0361 100644 --- a/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb @@ -29,7 +29,7 @@ it 'uses gsub_with_limit' do allow(described_class).to receive(:object_class).and_return(Issue) - expect(Gitlab::Utils::Gsub).to receive(:gsub_with_limit).with(anything, anything, limit: Banzai::Filter::FILTER_ITEM_LIMIT).and_call_original + expect(Gitlab::Utils::Gsub).to receive(:gsub_with_limit).with(anything, anything, limit: Banzai::Filter::FILTER_ITEM_LIMIT).twice.and_call_original filter_instance.references_in('text') end @@ -52,4 +52,33 @@ it_behaves_like 'a filter timeout' do let(:text) { 'text' } end + + describe '#references_in' do + let(:reference) { %(~"my #{range.to_reference}" } end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:reference) { range.to_reference } + let(:filter_instance) { described_class.new(nil, { project: nil }) } + end end diff --git a/spec/lib/banzai/filter/references/commit_reference_filter_spec.rb b/spec/lib/banzai/filter/references/commit_reference_filter_spec.rb index c08da88f51efe3a4680081fefd8c909559c41ea8..031ce55baf398ff1b43bf0a113e7329301c43e40 100644 --- a/spec/lib/banzai/filter/references/commit_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/commit_reference_filter_spec.rb @@ -345,4 +345,9 @@ let(:text) { "#{commit.id} #{commit.id} #{commit.id}" } let(:ends_with) { " #{commit.id}" } end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:reference) { commit.id } + let(:filter_instance) { described_class.new(nil, { project: }) } + end end diff --git a/spec/lib/banzai/filter/references/external_issue_reference_filter_spec.rb b/spec/lib/banzai/filter/references/external_issue_reference_filter_spec.rb index 2238d1e5408e8a741f7dd8428ad80a5fba3da846..b8d6fab27a5b6fa844a8843aa475f977710221d4 100644 --- a/spec/lib/banzai/filter/references/external_issue_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/external_issue_reference_filter_spec.rb @@ -373,4 +373,33 @@ def filter(markdown, context = {}) expect { reference_filter(multiple_references).to_html }.not_to exceed_query_limit(control) end end + + context 'ReferenceFilter#references_in' do + let(:issue) { ExternalIssue.new("T-123", project) } + let(:reference) { issue.to_reference } + + let(:filter_instance) { described_class.new(nil, { project: }) } + + context "pattern is Regexp" do + it_behaves_like 'ReferenceFilter#references_in' do + let_it_be(:integration) { create(:redmine_integration, project: project) } + end + end + + context "pattern is Gitlab::UntrustedRegexp" do + it_behaves_like 'ReferenceFilter#references_in' do + let_it_be(:integration) { create(:redmine_integration, project: project) } + + before do + allow_any_instance_of(described_class).to receive(:object_reference_pattern).and_return(Gitlab::UntrustedRegexp.new("\\b[A-Z][A-Z0-9_]*-(?\\d{1,20})")) + end + end + end + + context "pattern is unknown type" do + it 'raises ArgumentError' do + expect { filter_instance.references_in("some text", Object.new) }.to raise_error(ArgumentError) + end + end + end end diff --git a/spec/lib/banzai/filter/references/label_reference_filter_spec.rb b/spec/lib/banzai/filter/references/label_reference_filter_spec.rb index 4568522641fc9cf60cd93b7d92ecf3b7903dcccb..55b1a1da07c22673f85b4cbfa4033f03673c79c3 100644 --- a/spec/lib/banzai/filter/references/label_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/label_reference_filter_spec.rb @@ -923,4 +923,22 @@ end.not_to exceed_all_query_limit(max_count) end end + + it_behaves_like 'a reference which does not unescape its content in data-original' do + let(:context) { { project: project } } + let(:label_title) { "x<script>alert('xss');</script>" } + let(:resource) { create(:label, title: label_title, project: project) } + let(:reference) { %(#{resource.class.reference_prefix}"#{label_title}") } + + # This is probably bad (why are we unescaping entities in the input title like this?), + # but it is the case today, and we need to be safe in spite of this. + let(:expected_resource_title) { "x" } + + let(:expected_href) { urls.project_issues_url(project, label_name: expected_resource_title) } + let(:expected_replacement_text) { expected_resource_title } + end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:filter_instance) { described_class.new(nil, { project: }) } + end end diff --git a/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb b/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb index 37269a0c675a3e0f9f4fb2a123466ce9fe0b6023..7675eacf0cd6f1b7b593db9d2a57c389f02ce1ce 100644 --- a/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb @@ -340,7 +340,7 @@ result = reference_filter("See #{absolute_reference}") expect(result.css('a').first.attr('href')).to eq(urls.milestone_url(milestone)) - expect(result.css('a').first.attr('data-original')).to eq(absolute_reference) + expect(result.css('a').first.attr('data-original')).to eq_html(absolute_reference) expect(result.content).to eq "See %#{milestone.title}" end end @@ -627,4 +627,24 @@ end.not_to exceed_all_query_limit(control_count) end end + + it_behaves_like 'a reference which does not unescape its content in data-original' do + let(:context) { { project: project } } + let(:milestone_title) { "x<script>alert('xss');</script>" } + let(:resource) { create(:milestone, title: milestone_title, project: project) } + let(:reference) { %(#{resource.class.reference_prefix}"#{milestone_title}") } + + # This is probably bad (why are we unescaping entities in the input title like this?), + # but it is the case today, and we need to be safe in spite of this. + let(:expected_resource_title) { "x" } + + let(:expected_href) { urls.milestone_url(resource) } + let(:expected_replacement_text) { %(#{resource.class.reference_prefix}#{expected_resource_title}) } + end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:milestone) { create(:milestone, project: project) } + let(:reference) { milestone.to_reference } + let(:filter_instance) { described_class.new(nil, { project: }) } + end end diff --git a/spec/lib/banzai/filter/references/project_reference_filter_spec.rb b/spec/lib/banzai/filter/references/project_reference_filter_spec.rb index 1539af7d32d5bca38a3e8f8841d9f69d1f399435..5814689d970eeab21d65dcd7808ecbb9749d1640 100644 --- a/spec/lib/banzai/filter/references/project_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/project_reference_filter_spec.rb @@ -133,4 +133,8 @@ def get_reference(project) let(:text) { "#{reference} #{reference} #{reference}" } let(:ends_with) { " #{CGI.escapeHTML(reference)}" } end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:filter_instance) { described_class.new(nil, { project: nil }) } + end end diff --git a/spec/lib/banzai/filter/references/reference_filter_spec.rb b/spec/lib/banzai/filter/references/reference_filter_spec.rb index 075a2e56851c7ab2664f0ae76556a7833803db52..7065d70160b8b65afcd78d0420eba910d3ec2c0a 100644 --- a/spec/lib/banzai/filter/references/reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/reference_filter_spec.rb @@ -81,13 +81,15 @@ RSpec.shared_examples 'replaces text' do |method_name, index| let(:args) { [filter.nodes[index], index, ref_pattern || href_link].compact } + def call_method(method_name, replacement) + filter.send(method_name, *args) { replacement } + end + context 'when content didnt change' do it 'does not replace link node with html' do - filter.send(method_name, *args) do - existing_content - end + call_method(method_name, existing_content) - expect(filter).not_to receive(:replace_text_with_html) + expect(filter).not_to receive(:replace_node_with_html) end end @@ -95,25 +97,19 @@ let(:html) { %(text Reference) } it 'replaces reference node' do - filter.send(method_name, *args) do - html - end + call_method(method_name, html) expect(document.css('a').length).to eq 1 end - it 'calls replace_and_update_new_nodes' do - expect(filter).to receive(:replace_and_update_new_nodes).with(filter.nodes[index], index, html) + it 'calls replace_node_with_html' do + expect(filter).to receive(:replace_node_with_html).with(filter.nodes[index], index, html) - filter.send(method_name, *args) do - html - end + call_method(method_name, html) end it 'stores filtered new nodes' do - filter.send(method_name, *args) do - html - end + call_method(method_name, html) expect(filter.instance_variable_get(:@new_nodes)).to eq({ index => [filter.each_node.to_a[index]] }) end @@ -151,7 +147,7 @@ end end - describe '#replace_text_when_pattern_matches' do + describe '#replace_node_when_text_matches' do include_context 'document nodes' let(:node) { Nokogiri::HTML.fragment('text @reference') } @@ -162,34 +158,15 @@ let(:nodes) { [node] } it 'skips node' do - expect { |b| filter.send(:replace_text_when_pattern_matches, filter.nodes[0], 0, ref_pattern, &b) }.not_to yield_control + expect { |b| filter.send(:replace_node_when_text_matches, filter.nodes[0], 0, ref_pattern, &b) }.not_to yield_control end end - it_behaves_like 'replaces document node', :replace_text_when_pattern_matches do + it_behaves_like 'replaces document node', :replace_node_when_text_matches do let(:existing_content) { node.to_html } end end - describe '#replace_link_node_with_text' do - include_context 'document nodes' - let(:node) { Nokogiri::HTML.fragment('end text') } - - it_behaves_like 'replaces document node', :replace_link_node_with_text do - let(:existing_content) { node.text } - end - end - - describe '#replace_link_node_with_href' do - include_context 'document nodes' - let(:node) { Nokogiri::HTML.fragment('end text') } - let(:href_link) { CGI.unescape(node.attr('href').to_s) } - - it_behaves_like 'replaces document node', :replace_link_node_with_href do - let(:existing_content) { href_link } - end - end - describe '#call_and_update_nodes' do include_context 'new nodes' let(:document) { Nokogiri::HTML.fragment('foo') } @@ -230,7 +207,20 @@ end end - describe '.nodes?' do + describe '#replace_references_in_text_with_html' do + let(:document) { Nokogiri::HTML.fragment('foo') } + let(:filter) { described_class.new(document, project:) } + + context 'enumerator yields non-MatchData' do + let(:weird_enumerator) { ["weh"].each } + + it 'raises ArgumentError' do + expect { filter.replace_references_in_text_with_html(weird_enumerator) }.to raise_error(ArgumentError) + end + end + end + + describe '#nodes?' do let_it_be(:document) { Nokogiri::HTML.fragment('foo') } let(:filter) { described_class.new(document, project: project) } diff --git a/spec/lib/banzai/filter/references/user_reference_filter_spec.rb b/spec/lib/banzai/filter/references/user_reference_filter_spec.rb index b9ca92dc9b91d0c32159b507a6b100e68ea4996b..1d9af606723dbe765d7c7f7bc04132b48854490b 100644 --- a/spec/lib/banzai/filter/references/user_reference_filter_spec.rb +++ b/spec/lib/banzai/filter/references/user_reference_filter_spec.rb @@ -322,4 +322,8 @@ def get_reference(user) let(:text) { "#{reference} #{reference} #{reference}" } let(:ends_with) { " #{reference}" } end + + it_behaves_like 'ReferenceFilter#references_in' do + let(:filter_instance) { described_class.new(nil, { project: nil }) } + end end diff --git a/spec/lib/banzai/pipeline/full_pipeline_spec.rb b/spec/lib/banzai/pipeline/full_pipeline_spec.rb index 73225d8fd261ced89ad97e2aa33fb01019dd6bdd..22a8cd0ce7e63ecac9b3fcef4f2c308767c3731f 100644 --- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb @@ -43,9 +43,45 @@ markdown.delete!("\n") - result = described_class.to_html(markdown, project: project) + # Part of understanding this spec means knowing how the above is actually treated + # by the FullPipeline. The PlainMarkdownPipeline runs first and produces the DOM + # which is actually operated on by the GfmPipeline. + # + # Here we recreate the PlainMarkdownPipeline's effects by themselves and assert the + # result, to clarify what the malicious input is interpreted as: + + html_after_plain_markdown_pipeline = Banzai::Pipeline::PlainMarkdownPipeline.to_html(markdown, project:) + expect(html_after_plain_markdown_pipeline).to eq_html(<<-HTML, trim_text_nodes: true) +
+ + http://localhost/namespace1/project-1/-/issues/1 + + + + +
+ HTML + result = described_class.to_html(markdown, project: project) expect(result).to include "" + + # As above, we assert the full result to clarify the full result. We used to interpret + # this input as a valid reference link, which eventually lead to the original XSS whose + # fix this spec initially accompanied. + # + # We no longer do; we do not compare the full inner HTML of the link with the href + # attribute, but only its textual contents. + expect(result).to eq_html(<<-HTML, trim_text_nodes: true) +
+ + http://localhost/namespace1/project-1/-/issues/1 + + + + +
+ HTML end it 'escapes the data-original attribute on a reference' do diff --git a/spec/lib/banzai/reference_redactor_spec.rb b/spec/lib/banzai/reference_redactor_spec.rb index b7d991553dcf4ced29cf1ef46bb5b8ef7b9e8ce4..137e93f3b7aa848abfa7eb771827e2d659405cfc 100644 --- a/spec/lib/banzai/reference_redactor_spec.rb +++ b/spec/lib/banzai/reference_redactor_spec.rb @@ -35,14 +35,18 @@ end context 'when data-original attribute provided' do - let(:original_content) { '<script>alert(1);</script>' } - - it 'replaces redacted reference with original content' do - doc = Nokogiri::HTML.fragment("bar") - redactor.redact([doc]) - expect(doc.to_html).to eq(original_content) + RSpec.shared_examples 'substitutes original content back in as-is' do |original_content| + it 'replaces redacted reference with original content' do + doc = Nokogiri::HTML.fragment("bar") + doc.css('a')[0]["data-original"] = original_content + redactor.redact([doc]) + expect(doc.to_html).to eq(original_content) + end end + it_behaves_like 'substitutes original content back in as-is', '<script>alert(1);</script>' + it_behaves_like 'substitutes original content back in as-is', 'foo' + it 'does not replace redacted reference with original content if href is given' do html = "Marge" doc = Nokogiri::HTML.fragment(html) diff --git a/spec/mailers/emails/service_desk_spec.rb b/spec/mailers/emails/service_desk_spec.rb index e12a3b61001a77f95c2314a6550d89531cd66452..c967e8657a4295c87d2ca6a725b8beceb21ef904 100644 --- a/spec/mailers/emails/service_desk_spec.rb +++ b/spec/mailers/emails/service_desk_spec.rb @@ -342,12 +342,11 @@ context 'with template' do context 'with all-user reference in a an external author comment' do let_it_be(:note) { create(:note_on_issue, noteable: issue, project: project, note: "Hey @all, just a ping", author: Users::Internal.support_bot) } + let(:expected_template_html) { 'Hey @all, just a ping' } let(:template_content) { 'some text %{ NOTE_TEXT }' } context 'when `disable_all_mention` is disabled' do - let(:expected_template_html) { 'Hey , just a ping' } - before do stub_feature_flags(disable_all_mention: false) end @@ -356,8 +355,6 @@ end context 'when `disable_all_mention` is enabled' do - let(:expected_template_html) { 'Hey @all, just a ping' } - before do stub_feature_flags(disable_all_mention: true) end diff --git a/spec/requests/projects/incident_management/timeline_events_spec.rb b/spec/requests/projects/incident_management/timeline_events_spec.rb index 8b866dcea5dda73f8e8844bc7ef502d049f9f301..587eb558d7959c42028517874df6d2f086959826 100644 --- a/spec/requests/projects/incident_management/timeline_events_spec.rb +++ b/spec/requests/projects/incident_management/timeline_events_spec.rb @@ -38,8 +38,9 @@ params: { text: timeline_text } expect(response).to have_gitlab_http_status(:ok) + expect(json_response["body"]).to eq_html(expected_body) expect(json_response).to eq({ - body: expected_body, + body: json_response["body"], references: { commands: '', suggestions: [], diff --git a/spec/support/matchers/eq_html.rb b/spec/support/matchers/eq_html.rb new file mode 100644 index 0000000000000000000000000000000000000000..ab1c9dcf846cc2c106a52d5e334a8154c440e8c1 --- /dev/null +++ b/spec/support/matchers/eq_html.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +# Assert that the two values are the same in HTML context, modulo escaping operations that +# do not change the meaning of the value --- i.e. do not change how a web browser or HTML parser +# would treat them. +# +# This means, e.g. '"' and '"' are the same when encountered outside an HTML tag, +# but '<b>' and '' are not. +# +# Likewise, '' and +# "" are considered the same. +# +# This matcher helps tests be less brittle or flakey in the face of changing attribute orders, +# and less repetitive in the face of comparing essentially unchanged values that nonetheless +# are represented differently. +# +# Note that it uses a real HTML parser internally so as not to introduce security issues +# through accidental de-escaping or any of the other myriad ways HTML can be messed with. +# +# --- +# +# The option "trim_text_nodes: true" can be given. This trims whitespace at the start and +# end of the text content of text nodes on each side. This should be used with care, but can +# aid in spec clarity by allowing expected DOMs to be written "prettified". +RSpec::Matchers.define :eq_html do |expected, **normalize_opts| + include EqHtmlMatcher + + match do |actual| + raise ArgumentError, "expected should be a String, not #{expected.class}" unless expected.is_a?(String) + raise ArgumentError, "actual should be a String, not #{actual.class}" unless actual.is_a?(String) + + normalize_html(actual, **normalize_opts) == normalize_html(expected, **normalize_opts) + end + + failure_message do |actual| + "Expected that\n\n #{actual}\n\nwould normalize to the same HTML as\n\n #{expected}\n\nbut it didn't.\n\n " \ + "#{normalize_html(actual, **normalize_opts).inspect}\n!=\n " \ + "#{normalize_html(expected, **normalize_opts).inspect}" + end + + failure_message_when_negated do |actual| + "Expected that\n\n #{actual}\n\nwould NOT normalize to the same HTML as\n\n #{expected}\n\nbut it did!\n\n " \ + "#{normalize_html(actual, **normalize_opts).inspect}\n==\n " \ + "#{normalize_html(expected, **normalize_opts).inspect}" + end +end + +# Assert the right-hand value is contained in the left-hand value after normalising HTML on both sides per eq_html. +# See eq_html's docstring for details on what this means. +# +# Note this implies you can only check for inclusion of bare text (outside tags) and whole tags; a +# partial tag will normalise to text ("" +# includes the HTML "node inside' } - let(:reference_with_element) { %(#{inner_html}) } - - it 'does not escape inner html' do - doc = reference_filter(reference_with_element, try(:context) || {}) - - expect(doc.children.first.children.first.inner_html).to eq(inner_html) - end -end - -# Requires a reference, subject and subject_name: -# subject { create(:user) } -# let(:reference) { subject.to_reference } -# let(:subject_name) { 'user' } -RSpec.shared_examples 'user reference or project reference' do - shared_examples 'it contains a data- attribute' do - it 'includes a data- attribute' do - doc = reference_filter("Hey #{reference}") - link = doc.css('a').first - - expect(link).to have_attribute("data-#{subject_name}") - expect(link.attr("data-#{subject_name}")).to eq subject.id.to_s - end - end - - context 'when mentioning a resource' do - it_behaves_like 'a reference containing an element node' - it_behaves_like 'it contains a data- attribute' - - it "links to a resource" do - doc = reference_filter("Hey #{reference}") - expect(doc.css('a').first.attr('href')).to eq urls.send("#{subject_name}_url", subject) - end - - it 'links to a resource with a period' do - subject = create(subject_name.to_sym, name: 'alphA.Beta') - - doc = reference_filter("Hey #{get_reference(subject)}") - expect(doc.css('a').length).to eq 1 - end - - it 'links to a resource with an underscore' do - subject = create(subject_name.to_sym, name: 'ping_pong_king') - - doc = reference_filter("Hey #{get_reference(subject)}") - expect(doc.css('a').length).to eq 1 - end - - it 'links to a resource with different case-sensitivity' do - subject = create(subject_name.to_sym, name: 'RescueRanger') - reference = get_reference(subject) - - doc = reference_filter("Hey #{reference.upcase}") - expect(doc.css('a').length).to eq 1 - expect(doc.css('a').text).to eq(reference) - end - end - - it 'supports an :only_path context' do - doc = reference_filter("Hey #{reference}", only_path: true) - link = doc.css('a').first.attr('href') - - expect(link).not_to match %r{https?://} - expect(link).to eq urls.send "#{subject_name}_path", subject - end - - describe 'referencing a resource in a link href' do - let(:reference) { %(Some text) } - - it_behaves_like 'it contains a data- attribute' - - it 'links to the resource' do - doc = reference_filter("Hey #{reference}") - expect(doc.css('a').first.attr('href')).to eq urls.send "#{subject_name}_url", subject - end - - it 'links with adjacent text' do - doc = reference_filter("Mention me (#{reference}.)") - expect(doc.to_html).to match(%r{\(Some text\.\)}) - end - end -end diff --git a/spec/support/shared_examples/banzai/filters/emoji_shared_examples.rb b/spec/support/shared_examples/lib/banzai/filters/emoji_shared_examples.rb similarity index 100% rename from spec/support/shared_examples/banzai/filters/emoji_shared_examples.rb rename to spec/support/shared_examples/lib/banzai/filters/emoji_shared_examples.rb diff --git a/spec/support/shared_examples/banzai/filters/filter_timeout_shared_examples.rb b/spec/support/shared_examples/lib/banzai/filters/filter_timeout_shared_examples.rb similarity index 100% rename from spec/support/shared_examples/banzai/filters/filter_timeout_shared_examples.rb rename to spec/support/shared_examples/lib/banzai/filters/filter_timeout_shared_examples.rb diff --git a/spec/support/shared_examples/lib/banzai/filters/reference_filter_shared_examples.rb b/spec/support/shared_examples/lib/banzai/filters/reference_filter_shared_examples.rb index ac96034f7e021f5d83af6ec927dc868cc68e2f8d..d6163b19dac9ea426a100ac8185a7c1eeae31a10 100644 --- a/spec/support/shared_examples/lib/banzai/filters/reference_filter_shared_examples.rb +++ b/spec/support/shared_examples/lib/banzai/filters/reference_filter_shared_examples.rb @@ -1,5 +1,93 @@ # frozen_string_literal: true +# Specs for reference links containing HTML. +# +# Requires a reference: +# let(:reference) { '#42' } +RSpec.shared_examples 'a reference containing an element node' do + let(:inner_html) { 'element node inside' } + let(:reference_with_element) { %(#{inner_html}) } + + it 'does not escape inner html' do + doc = reference_filter(reference_with_element, try(:context) || {}) + + expect(doc.children.first.children.first.inner_html).to eq(inner_html) + end +end + +# Requires a reference, subject and subject_name: +# subject { create(:user) } +# let(:reference) { subject.to_reference } +# let(:subject_name) { 'user' } +RSpec.shared_examples 'user reference or project reference' do + shared_examples 'it contains a data- attribute' do + it 'includes a data- attribute' do + doc = reference_filter("Hey #{reference}") + link = doc.css('a').first + + expect(link).to have_attribute("data-#{subject_name}") + expect(link.attr("data-#{subject_name}")).to eq subject.id.to_s + end + end + + context 'when mentioning a resource' do + it_behaves_like 'a reference containing an element node' + it_behaves_like 'it contains a data- attribute' + + it "links to a resource" do + doc = reference_filter("Hey #{reference}") + expect(doc.css('a').first.attr('href')).to eq urls.send(:"#{subject_name}_url", subject) + end + + it 'links to a resource with a period' do + subject = create(subject_name.to_sym, name: 'alphA.Beta') + + doc = reference_filter("Hey #{get_reference(subject)}") + expect(doc.css('a').length).to eq 1 + end + + it 'links to a resource with an underscore' do + subject = create(subject_name.to_sym, name: 'ping_pong_king') + + doc = reference_filter("Hey #{get_reference(subject)}") + expect(doc.css('a').length).to eq 1 + end + + it 'links to a resource with different case-sensitivity' do + subject = create(subject_name.to_sym, name: 'RescueRanger') + reference = get_reference(subject) + + doc = reference_filter("Hey #{reference.upcase}") + expect(doc.css('a').length).to eq 1 + expect(doc.css('a').text).to eq(reference) + end + end + + it 'supports an :only_path context' do + doc = reference_filter("Hey #{reference}", only_path: true) + link = doc.css('a').first.attr('href') + + expect(link).not_to match %r{https?://} + expect(link).to eq urls.send :"#{subject_name}_path", subject + end + + describe 'referencing a resource in a link href' do + let(:reference) { %(Some text) } + + it_behaves_like 'it contains a data- attribute' + + it 'links to the resource' do + doc = reference_filter("Hey #{reference}") + expect(doc.css('a').first.attr('href')).to eq urls.send :"#{subject_name}_url", subject + end + + it 'links with adjacent text' do + doc = reference_filter("Mention me (#{reference}.)") + expect(doc.to_html).to match(%r{\(Some text\.\)}) + end + end +end + RSpec.shared_examples 'HTML text with references' do let(:markdown_prepend) { "<img src=\"\" onerror=alert('bug')>" } @@ -22,3 +110,69 @@ expect(doc.to_html).to include text end end + +RSpec.shared_examples 'a reference which does not unescape its content in data-original' do + it 'does not remove a layer of escaping in data-original' do + # We assert the expected resource.title so calling `it_behaves_like` blocks can better describe + # the known behaviour around the resource. Some classes process their title in ways that are + # particularly relevant to XSS; the big example is Labels, which unescape HTML entities in + # their input. It's helpful to call out when this happens (and when this doesn't) so the rest + # of the behaviours are clear. + expect(resource.title).to eq(expected_resource_title) + + result = reference_filter("See #{reference}", context) + + expect(result.css('a').first.attr('href')).to eq(expected_href) + + # This is the important part. + expect(result.css('a').first.attr('data-original')).to eq_html(reference) + + expect(result.content).to eq "See #{expected_replacement_text}" + end +end + +# Expects an input reference, and a filter instance to call #references_in on: +# let(:reference) { range.to_reference } +# let(:filter_instance) { described_class.new(nil, { project: nil }) } +# +# Optionally, you can override the expected replacement by setting `:expected_replacement`. +# In particular, if no replacement is expected, say: +# let(:expected_replacement) { nil } +# +# See the doc comment on ReferenceFilter#references_in for a detailed discussion on +# this function's input, block, and output. +RSpec.shared_examples_for 'ReferenceFilter#references_in' do + let(:replacement) do + doc = Nokogiri::HTML.fragment("link to label") + doc.css('a').first['href'] = '/destination' + doc.to_html + end + + let(:expected_replacement) { replacement } + + it "does not return input text unescaped" do + text = "don't unescape #{reference} <anything>" + + html = filter_instance.references_in(text) do |match| + # Special case: ProjectReferenceFilter will match "b>" and "/b>" from the above text. + # This is correct. Let it pass through, translating text to HTML, like it normally would. + # + # Extremely special case: it'll also match "anything>", though hopefully this will + # become less necessary in the future. This is incorrect, but currently the case. + # See Project.reference_postfix_escaped. + next CGI.escapeHTML(match) if match == "b>" || match == "/b>" || match == "anything>" + + expect(match).to eq(reference) + replacement + end + + case expected_replacement + when NilClass + expect(html).to be_nil + when String + expect(html).to eq_html("don't <b>unescape</b> #{expected_replacement} &lt;anything&gt;") + else + raise ArgumentError, "expected_replacement should be nil or a String, not #{expected_replacement.class}" + end + end +end diff --git a/spec/support_specs/matchers/match_html_spec.rb b/spec/support_specs/matchers/match_html_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..a8d42ad3023b36064dc334765ef35fa90317077d --- /dev/null +++ b/spec/support_specs/matchers/match_html_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' +require_relative '../../support/matchers/eq_html' + +# rubocop: disable RSpec/ExpectActual -- these *are* the actual values we're testing. +RSpec.describe "eq_html", feature_category: :shared do + it "normalizes entities" do + expect(%( x < y + "1"&'2' )).to eq_html(%( x < y + "1"&'2' )) + end + + it "doesn't mix up entities" do + expect(%( no )).not_to eq_html(%( <em>no</em> )) + end + + it "normalizes attributes" do + expect(%( Click )).to \ + eq_html(%( Click )) + end + + it "doesn't confuse similar attributes" do + expect(%( Click )).not_to eq_html(%( Click )) + end +end +# rubocop: enable RSpec/ExpectActual