From 4f2599c017a39e0f21beee5c399425627cd561c6 Mon Sep 17 00:00:00 2001
From: Asherah Connor
Date: Thu, 9 Oct 2025 16:13:53 +1100
Subject: [PATCH 1/4] Compose HTML consistently in Banzai reference filters
---
bench.rb | 54 +++++++++++++++++++
.../references/epic_reference_filter.rb | 2 +-
.../references/label_reference_filter.rb | 13 +----
.../vulnerability_reference_filter.rb | 2 +-
.../references/label_reference_filter_spec.rb | 2 +-
lib/banzai/filter/concerns/html_writer.rb | 46 ++++++++++++++++
.../references/abstract_reference_filter.rb | 11 ++--
.../external_issue_reference_filter.rb | 9 ++--
.../references/project_reference_filter.rb | 11 ++--
.../filter/references/reference_filter.rb | 27 +++++-----
.../references/user_reference_filter.rb | 7 ++-
lib/gitlab/gfm/reference_rewriter.rb | 4 +-
.../abstract_reference_filter_spec.rb | 2 +-
.../commit_range_reference_filter_spec.rb | 2 +-
.../milestone_reference_filter_spec.rb | 2 +-
spec/lib/banzai/reference_redactor_spec.rb | 7 ++-
16 files changed, 152 insertions(+), 49 deletions(-)
create mode 100644 bench.rb
create mode 100644 lib/banzai/filter/concerns/html_writer.rb
diff --git a/bench.rb b/bench.rb
new file mode 100644
index 00000000000000..46d891b32bfe34
--- /dev/null
+++ b/bench.rb
@@ -0,0 +1,54 @@
+require 'benchmark/ips'
+require 'memory_profiler'
+
+tag_name = "a"
+attributes = {
+ "href" => "http://localhost/namespace36/project-36/-/issues?label_name=g.fm+%26+references%3F",
+ "title" => "",
+ "class" => "gfm gfm-label has-tooltip gl-link gl-label-link",
+ "data-original" => "~\"g.fm & references?\"",
+ "data-link" => false,
+ "data-link-reference" => false,
+ "data-label" => 365,
+ "data-project" => 3155,
+ "data-container" => "body",
+ "data-placement" => "top",
+ "data-reference-type" => :label,
+}
+
+class Filter
+ include Banzai::Filter::Concerns::HtmlWriter
+
+ def initialize
+ @doc = Nokogiri::HTML.fragment("")
+ end
+
+ attr_reader :doc
+end
+
+filter = Filter.new
+N = 300_000
+
+puts "<<>>"
+puts "<<>>"
+puts "<<>>"
+
+report = MemoryProfiler.report do
+ N.times { filter.write_opening_tag_nokogiri(tag_name, attributes) }
+end
+report.pretty_print
+
+puts "<<>>"
+puts "<<>>"
+puts "<<>>"
+
+filter = Filter.new
+report = MemoryProfiler.report do
+ N.times { filter.write_opening_tag(tag_name, attributes) }
+end
+report.pretty_print
+
+Benchmark.ips do |x|
+ x.report("nokogiri") { filter.write_opening_tag_nokogiri(tag_name, attributes) }
+ x.report("direct") { filter.write_opening_tag(tag_name, attributes) }
+end
diff --git a/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb b/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb
index e8edd2586831e5..80ad20809a91b6 100644
--- a/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb
+++ b/ee/lib/ee/banzai/filter/references/epic_reference_filter.rb
@@ -34,7 +34,7 @@ def reference_class(object_sym, tooltip: false)
def data_attributes_for(text, group, object, link_content: false, link_reference: false)
{
- original: escape_html_entities(text),
+ original: text,
link: link_content,
link_reference: link_reference,
group: group.id,
diff --git a/ee/lib/ee/banzai/filter/references/label_reference_filter.rb b/ee/lib/ee/banzai/filter/references/label_reference_filter.rb
index 6dedb186f55857..eee57295f27cb1 100644
--- a/ee/lib/ee/banzai/filter/references/label_reference_filter.rb
+++ b/ee/lib/ee/banzai/filter/references/label_reference_filter.rb
@@ -11,17 +11,8 @@ module LabelReferenceFilter
def data_attributes_for(text, parent, object, link_content: false, link_reference: false)
return super unless object.scoped_label?
- # Enabling HTML tooltips for scoped labels here and additional escaping is done in `object_link_title`
- super.merge!(
- html: true
- )
- end
-
- override :object_link_title
- def object_link_title(object, matches)
- return super unless object.scoped_label?
-
- ERB::Util.html_escape(super)
+ # Enabling HTML tooltips for scoped labels here.
+ super.merge!(html: true)
end
end
end
diff --git a/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb b/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb
index d8bdb3080e1da0..85611f630856ca 100644
--- a/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb
+++ b/ee/lib/ee/banzai/filter/references/vulnerability_reference_filter.rb
@@ -35,7 +35,7 @@ def url_for_object(vulnerability, project)
def data_attributes_for(text, project, object, link_content: false, link_reference: false)
{
- original: escape_html_entities(text),
+ original: text,
link: link_content,
link_reference: link_reference,
project: project.id,
diff --git a/ee/spec/lib/banzai/filter/references/label_reference_filter_spec.rb b/ee/spec/lib/banzai/filter/references/label_reference_filter_spec.rb
index be45da7c74753f..fc1aa60fd3904c 100644
--- a/ee/spec/lib/banzai/filter/references/label_reference_filter_spec.rb
+++ b/ee/spec/lib/banzai/filter/references/label_reference_filter_spec.rb
@@ -26,7 +26,7 @@
expect(doc.at_css('.gl-label-scoped a').attr('data-html')).to eq('true')
end
- it "escapes HTML in the label's title" do
+ it "doesn't unescape HTML in the label's title" do
expect(doc.at_css('.gl-label-scoped a').attr('title')).to include('xss <svg id="svgId">')
end
end
diff --git a/lib/banzai/filter/concerns/html_writer.rb b/lib/banzai/filter/concerns/html_writer.rb
new file mode 100644
index 00000000000000..cf97432eab93ba
--- /dev/null
+++ b/lib/banzai/filter/concerns/html_writer.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Banzai
+ module Filter
+ module Concerns
+ module HtmlWriter
+ extend ActiveSupport::Concern
+
+ # Write an opening tag with the given tag name and attributes.
+ #
+ # The tag name and attribute names must not be user-specified; they are not validated.
+ #
+ # The attribute values are always escaped in the appropriate manner; the contract is that whatever value goes in
+ # is the logical DOM value of the attribute that comes out.
+ #
+ # i.e.:
+ #
+ # ```ruby
+ # html = write_opening_tag(tag, attrs)
+ # doc = Nokogiri::HTML.fragment(html)
+ # element = doc.child
+ # expect(element.to_h).to match(attrs)
+ # ```
+ #
+ # This function should *never be changed*.
+ def write_opening_tag(tag_name, attributes)
+ s = +"<" << tag_name
+ attributes.each do |attr_name, attr_value|
+ next if attr_value.nil?
+ s << " " << attr_name << "="
+ s << '"' << CGI.escapeHTML(attr_value.to_s) << '"'
+ end
+ s << ">"
+ end
+
+ # This is the most sure-fire correct way to implement the above function, relying on Nokogiri's DOM to correctly
+ # serialise attributes for us. Comparing the two with benchmark-ips and memory_profiler shows this method to be
+ # 7x slower and adds 3x memory pressure compared to the above. It is included here for reference.
+ def write_opening_tag_nokogiri(tag_name, attributes)
+ el = doc.document.create_element(tag_name, attributes.compact)
+ el.to_html.chomp("#{tag_name}>")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/filter/references/abstract_reference_filter.rb b/lib/banzai/filter/references/abstract_reference_filter.rb
index 205bab5582eb96..dc21b224b1d3a7 100644
--- a/lib/banzai/filter/references/abstract_reference_filter.rb
+++ b/lib/banzai/filter/references/abstract_reference_filter.rb
@@ -230,9 +230,12 @@ def object_link_filter(text, pattern, link_content: nil, link_reference: false)
content = context[:link_text] || link_content || object_link_text(object, matches)
- link = %(#{content})
+ link = write_opening_tag("a", {
+ "href" => url,
+ "title" => title,
+ "class" => klass,
+ **data
+ }) << content.to_s << ""
wrap_link(link, object)
else
@@ -256,7 +259,7 @@ def data_attributes_for(text, parent, object, link_content: false, link_referenc
end
{
- original: escape_html_entities(text),
+ original: text,
link: link_content,
link_reference: link_reference,
object_sym => object.id
diff --git a/lib/banzai/filter/references/external_issue_reference_filter.rb b/lib/banzai/filter/references/external_issue_reference_filter.rb
index 7d7e7c86cbd4fc..ff62f44efe1429 100644
--- a/lib/banzai/filter/references/external_issue_reference_filter.rb
+++ b/lib/banzai/filter/references/external_issue_reference_filter.rb
@@ -60,9 +60,12 @@ def object_link_filter(text, pattern, link_content: nil, link_reference: false)
data = data_attribute(project: project.id, external_issue: id)
content = link_content || match
- %(#{content})
+ write_opening_tag("a", {
+ "href" => url,
+ "title" => issue_title,
+ "class" => klass,
+ **data
+ }) << content.to_s << ""
end
end
diff --git a/lib/banzai/filter/references/project_reference_filter.rb b/lib/banzai/filter/references/project_reference_filter.rb
index 9b6c6fedb06c3d..52688a15245b75 100644
--- a/lib/banzai/filter/references/project_reference_filter.rb
+++ b/lib/banzai/filter/references/project_reference_filter.rb
@@ -90,11 +90,12 @@ def link_to_project(project, link_content: nil)
data = data_attribute(project: project.id)
content = link_content || project.to_reference
- link_tag(url, data, content, project.name)
- end
-
- def link_tag(url, data, link_content, title)
- %(#{link_content})
+ write_opening_tag("a", {
+ "href" => url,
+ "title" => project.name,
+ "class" => link_class,
+ **data
+ }) << content << ""
end
end
end
diff --git a/lib/banzai/filter/references/reference_filter.rb b/lib/banzai/filter/references/reference_filter.rb
index 12b3b6e591898f..3bc48cf59b1b25 100644
--- a/lib/banzai/filter/references/reference_filter.rb
+++ b/lib/banzai/filter/references/reference_filter.rb
@@ -15,8 +15,10 @@ class ReferenceFilter < HTML::Pipeline::Filter
include RequestStoreReferenceCache
include Concerns::OutputSafety
prepend Concerns::PipelineTimingCheck
+ include Concerns::HtmlWriter
- REFERENCE_TYPE_DATA_ATTRIBUTE = 'data-reference-type='
+ REFERENCE_TYPE_ATTRIBUTE = :reference_type
+ REFERENCE_TYPE_DATA_ATTRIBUTE_NAME = "data-#{REFERENCE_TYPE_ATTRIBUTE.to_s.dasherize}"
class << self
# Implement in child class
@@ -122,7 +124,7 @@ def requires_unescaping?
private
- # Returns a data attribute String to attach to a reference link
+ # Returns a Hash of attributes to attach to a reference link
#
# attributes - Hash, where the key becomes the data attribute name and the
# value is the data attribute value
@@ -130,28 +132,23 @@ def requires_unescaping?
# Examples:
#
# data_attribute(project: 1, issue: 2)
- # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\""
+ # # => {"data-reference-type" => "SomeReferenceFilter", "data-container" => "body",
+ # "data-placement" => "top", "data-project" => 1, "data-issue" => 2}
#
# data_attribute(project: 3, merge_request: 4)
- # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\""
+ # # => {"data-reference-type" => "SomeReferenceFilter", "data-container" => "body",
+ # "data-placement" => "top", "data-project" => 3, "data-merge-request" => 4}
#
- # Returns a String
+ # Returns a Hash
def data_attribute(attributes = {})
- attributes = attributes.reject { |_, v| v.nil? }
-
- # "data-reference-type=" attribute got moved into a constant because we need
- # to use it on ReferenceRewriter class to detect if the markdown contains any reference
- reference_type_attribute = "#{REFERENCE_TYPE_DATA_ATTRIBUTE}#{escape_once(self.class.reference_type)} "
+ attributes = attributes.compact
attributes[:container] ||= 'body'
attributes[:placement] ||= 'top'
+ attributes[REFERENCE_TYPE_ATTRIBUTE] = self.class.reference_type
attributes.delete(:original) if context[:no_original_data]
- attributes.map do |key, value|
- %(data-#{key.to_s.dasherize}="#{escape_once(value)}")
- end
- .join(' ')
- .prepend(reference_type_attribute)
+ attributes.transform_keys { |k| "data-#{k.to_s.dasherize}" }
end
def ignore_ancestor_query
diff --git a/lib/banzai/filter/references/user_reference_filter.rb b/lib/banzai/filter/references/user_reference_filter.rb
index cbfdcf037e568f..f6ea1713bb9f36 100644
--- a/lib/banzai/filter/references/user_reference_filter.rb
+++ b/lib/banzai/filter/references/user_reference_filter.rb
@@ -155,7 +155,12 @@ def link_to_org_user_detail(org_user_detail, link_content: nil)
end
def link_tag(url, data, link_content, title)
- %(#{link_content})
+ write_opening_tag("a", {
+ "href" => url,
+ "title" => title,
+ "class" => link_class,
+ **data
+ }) << link_content << ""
end
def organization
diff --git a/lib/gitlab/gfm/reference_rewriter.rb b/lib/gitlab/gfm/reference_rewriter.rb
index 606c63e5001cc0..d4fec5e89752de 100644
--- a/lib/gitlab/gfm/reference_rewriter.rb
+++ b/lib/gitlab/gfm/reference_rewriter.rb
@@ -56,9 +56,9 @@ def rewrite(target_parent)
def needs_rewrite?
strong_memoize(:needs_rewrite) do
reference_type_attribute =
- Banzai::Filter::References::ReferenceFilter::REFERENCE_TYPE_DATA_ATTRIBUTE
+ Banzai::Filter::References::ReferenceFilter::REFERENCE_TYPE_DATA_ATTRIBUTE_NAME
- @text_html.include?(reference_type_attribute)
+ @text_html.include?("#{reference_type_attribute}=")
end
end
diff --git a/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb b/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb
index 74c7c89c629da9..a2e0d92328d729 100644
--- a/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb
+++ b/spec/lib/banzai/filter/references/abstract_reference_filter_spec.rb
@@ -16,7 +16,7 @@
data_attributes = filter_instance.data_attributes_for('xss <img onerror=alert(1) src=x>', project, issue, link_content: true)
- expect(data_attributes[:original]).to eq('xss <img onerror=alert(1) src=x>')
+ expect(data_attributes[:original]).to eq('xss <img onerror=alert(1) src=x>')
end
end
diff --git a/spec/lib/banzai/filter/references/commit_range_reference_filter_spec.rb b/spec/lib/banzai/filter/references/commit_range_reference_filter_spec.rb
index 5179c2fedd3273..1e1fec8b542d7d 100644
--- a/spec/lib/banzai/filter/references/commit_range_reference_filter_spec.rb
+++ b/spec/lib/banzai/filter/references/commit_range_reference_filter_spec.rb
@@ -68,7 +68,7 @@
it 'includes no title attribute' do
doc = reference_filter("See #{reference}")
- expect(doc.css('a').first.attr('title')).to eq ""
+ expect(doc.css('a').first.attr('title')).to be_nil
end
it 'includes default classes' do
diff --git a/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb b/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb
index 8eaf629c24c651..37269a0c675a3e 100644
--- a/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb
+++ b/spec/lib/banzai/filter/references/milestone_reference_filter_spec.rb
@@ -340,7 +340,7 @@
result = reference_filter("See #{absolute_reference}")
expect(result.css('a').first.attr('href')).to eq(urls.milestone_url(milestone))
- expect(result.css('a').first.attr('data-original')).to eq absolute_reference
+ expect(result.css('a').first.attr('data-original')).to eq(absolute_reference)
expect(result.content).to eq "See %#{milestone.title}"
end
end
diff --git a/spec/lib/banzai/reference_redactor_spec.rb b/spec/lib/banzai/reference_redactor_spec.rb
index e3c66d8075180f..b7d991553dcf4c 100644
--- a/spec/lib/banzai/reference_redactor_spec.rb
+++ b/spec/lib/banzai/reference_redactor_spec.rb
@@ -68,7 +68,8 @@
end
it 'redacts an issue attached' do
- doc = Nokogiri::HTML.fragment("foo")
+ doc = Nokogiri::HTML.fragment("foo")
+ doc.css('a').first["data-issue"] = issue.id
redactor.redact([doc])
@@ -76,7 +77,9 @@
end
it 'redacts an external issue' do
- doc = Nokogiri::HTML.fragment("foo")
+ doc = Nokogiri::HTML.fragment("foo")
+ doc.css('a').first["data-external-issue"] = issue.id
+ doc.css('a').first["data-project"] = project.id
redactor.redact([doc])
--
GitLab
From 891df93b8a649a63aa8676ff1161759719c80ed3 Mon Sep 17 00:00:00 2001
From: Asherah Connor
Date: Thu, 9 Oct 2025 19:28:32 +1100
Subject: [PATCH 2/4] Remove benchmark and comment reference method
---
bench.rb | 54 -----------------------
lib/banzai/filter/concerns/html_writer.rb | 9 ++--
2 files changed, 5 insertions(+), 58 deletions(-)
delete mode 100644 bench.rb
diff --git a/bench.rb b/bench.rb
deleted file mode 100644
index 46d891b32bfe34..00000000000000
--- a/bench.rb
+++ /dev/null
@@ -1,54 +0,0 @@
-require 'benchmark/ips'
-require 'memory_profiler'
-
-tag_name = "a"
-attributes = {
- "href" => "http://localhost/namespace36/project-36/-/issues?label_name=g.fm+%26+references%3F",
- "title" => "",
- "class" => "gfm gfm-label has-tooltip gl-link gl-label-link",
- "data-original" => "~\"g.fm & references?\"",
- "data-link" => false,
- "data-link-reference" => false,
- "data-label" => 365,
- "data-project" => 3155,
- "data-container" => "body",
- "data-placement" => "top",
- "data-reference-type" => :label,
-}
-
-class Filter
- include Banzai::Filter::Concerns::HtmlWriter
-
- def initialize
- @doc = Nokogiri::HTML.fragment("")
- end
-
- attr_reader :doc
-end
-
-filter = Filter.new
-N = 300_000
-
-puts "<<>>"
-puts "<<>>"
-puts "<<>>"
-
-report = MemoryProfiler.report do
- N.times { filter.write_opening_tag_nokogiri(tag_name, attributes) }
-end
-report.pretty_print
-
-puts "<<>>"
-puts "<<>>"
-puts "<<>>"
-
-filter = Filter.new
-report = MemoryProfiler.report do
- N.times { filter.write_opening_tag(tag_name, attributes) }
-end
-report.pretty_print
-
-Benchmark.ips do |x|
- x.report("nokogiri") { filter.write_opening_tag_nokogiri(tag_name, attributes) }
- x.report("direct") { filter.write_opening_tag(tag_name, attributes) }
-end
diff --git a/lib/banzai/filter/concerns/html_writer.rb b/lib/banzai/filter/concerns/html_writer.rb
index cf97432eab93ba..5023937588629b 100644
--- a/lib/banzai/filter/concerns/html_writer.rb
+++ b/lib/banzai/filter/concerns/html_writer.rb
@@ -36,10 +36,11 @@ def write_opening_tag(tag_name, attributes)
# This is the most sure-fire correct way to implement the above function, relying on Nokogiri's DOM to correctly
# serialise attributes for us. Comparing the two with benchmark-ips and memory_profiler shows this method to be
# 7x slower and adds 3x memory pressure compared to the above. It is included here for reference.
- def write_opening_tag_nokogiri(tag_name, attributes)
- el = doc.document.create_element(tag_name, attributes.compact)
- el.to_html.chomp("#{tag_name}>")
- end
+ #
+ # def write_opening_tag_nokogiri(tag_name, attributes)
+ # el = doc.document.create_element(tag_name, attributes.compact)
+ # el.to_html.chomp("#{tag_name}>")
+ # end
end
end
end
--
GitLab
From 0a7f414e40850c2f30c2e01ea1e66340e032664a Mon Sep 17 00:00:00 2001
From: Asherah Connor
Date: Fri, 10 Oct 2025 12:32:31 +1100
Subject: [PATCH 3/4] Appease the Rubocop
---
lib/banzai/filter/concerns/html_writer.rb | 1 +
lib/banzai/filter/references/reference_filter.rb | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/banzai/filter/concerns/html_writer.rb b/lib/banzai/filter/concerns/html_writer.rb
index 5023937588629b..cbb085ea003ac7 100644
--- a/lib/banzai/filter/concerns/html_writer.rb
+++ b/lib/banzai/filter/concerns/html_writer.rb
@@ -27,6 +27,7 @@ def write_opening_tag(tag_name, attributes)
s = +"<" << tag_name
attributes.each do |attr_name, attr_value|
next if attr_value.nil?
+
s << " " << attr_name << "="
s << '"' << CGI.escapeHTML(attr_value.to_s) << '"'
end
diff --git a/lib/banzai/filter/references/reference_filter.rb b/lib/banzai/filter/references/reference_filter.rb
index 3bc48cf59b1b25..07916f69e60f01 100644
--- a/lib/banzai/filter/references/reference_filter.rb
+++ b/lib/banzai/filter/references/reference_filter.rb
@@ -18,7 +18,7 @@ class ReferenceFilter < HTML::Pipeline::Filter
include Concerns::HtmlWriter
REFERENCE_TYPE_ATTRIBUTE = :reference_type
- REFERENCE_TYPE_DATA_ATTRIBUTE_NAME = "data-#{REFERENCE_TYPE_ATTRIBUTE.to_s.dasherize}"
+ REFERENCE_TYPE_DATA_ATTRIBUTE_NAME = "data-#{REFERENCE_TYPE_ATTRIBUTE.to_s.dasherize}".freeze
class << self
# Implement in child class
--
GitLab
From 9677587dc676778bbb05b0c5a8ead1f0ccfa5797 Mon Sep 17 00:00:00 2001
From: Asherah Connor
Date: Tue, 14 Oct 2025 09:39:40 +1100
Subject: [PATCH 4/4] Some missed CI adjustments
---
.../incident_management/timeline_events_spec.rb | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/spec/requests/projects/incident_management/timeline_events_spec.rb b/spec/requests/projects/incident_management/timeline_events_spec.rb
index fee3ef6e322c57..8b866dcea5dda7 100644
--- a/spec/requests/projects/incident_management/timeline_events_spec.rb
+++ b/spec/requests/projects/incident_management/timeline_events_spec.rb
@@ -16,12 +16,12 @@
end
let(:expected_reference) do
- "##{incident.iid}"
+ "#1"
end
let(:expected_body) do
--
GitLab