diff --git a/app/assets/stylesheets/pages/search.scss b/app/assets/stylesheets/pages/search.scss index b9818ffcf4241b23999677a9c809768a2ea5e214..9b4d4f96339d5ca4b339a8e57a9d7be020cd7079 100644 --- a/app/assets/stylesheets/pages/search.scss +++ b/app/assets/stylesheets/pages/search.scss @@ -1,4 +1,8 @@ .search-results { + strong.highlighted-terms { + background-color: yellow; + } + .search-result-row { border-bottom: 1px solid $border-color; padding-bottom: $gl-padding; diff --git a/app/helpers/blob_helper.rb b/app/helpers/blob_helper.rb index e964d7a5e1690ba8d822855486f769174b645ad6..5eef82acac3b618ba3d4d912c031379c7d794dd7 100644 --- a/app/helpers/blob_helper.rb +++ b/app/helpers/blob_helper.rb @@ -1,6 +1,13 @@ module BlobHelper - def highlight(blob_name, blob_content, repository: nil, plain: false) + def highlight(blob_name, blob_content, repository: nil, plain: false, highlighted_terms: []) highlighted = Gitlab::Highlight.highlight(blob_name, blob_content, plain: plain, repository: repository) + + if highlighted_terms.any? + escaped_terms = highlighted_terms.map {|term| Regexp.escape(term) } + regexp = Regexp.new("(#{escaped_terms.join('|')})") + highlighted.gsub!(regexp, '\1').html_safe + end + raw %(
#{highlighted}
) end diff --git a/app/models/blob.rb b/app/models/blob.rb index 954d4e4d779c95fec71d61248293032dad720b4a..c07231a8e0d1f8177e3b7295efd3c9ded936f911 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -102,6 +102,10 @@ def no_highlighting? raw_size && raw_size > MAXIMUM_TEXT_HIGHLIGHT_SIZE end + def highlighted_terms + [] + end + def empty? raw_size == 0 end diff --git a/app/views/search/results/_blob.html.haml b/app/views/search/results/_blob.html.haml index b90ee509ece12a1d2eab5ba4972756563d0997a8..c1827dbfbe403cc15150b6876becab80251a76a7 100644 --- a/app/views/search/results/_blob.html.haml +++ b/app/views/search/results/_blob.html.haml @@ -1,23 +1,2 @@ -- project = @project || find_project_for_blob(blob) - -- if blob.is_a?(Array) - - file_name, blob = blob -- else - - blob = parse_search_result(blob) - - file_name = blob.filename - -- blob_link = project_blob_path(project, tree_join(blob.ref, file_name)) -.blob-result - .file-holder - .js-file-title.file-title - = link_to blob_link do - = icon('fa-file') - %strong - - if @project - = file_name - - else - #{project.name_with_namespace}: - %i= file_name - - if blob.data - .file-content.code.term - = render 'shared/file_highlight', blob: blob, first_line_number: blob.startline, blob_link: blob_link +- partial_name = current_application_settings.elasticsearch_search? ? 'blob_es' : 'blob_regular' += render "search/results/#{partial_name}", blob: blob diff --git a/app/views/search/results/_blob_es.html.haml b/app/views/search/results/_blob_es.html.haml new file mode 100644 index 0000000000000000000000000000000000000000..e6b8fb5b0ae0655f7106342254b32a4de2e459a8 --- /dev/null +++ b/app/views/search/results/_blob_es.html.haml @@ -0,0 +1,23 @@ +- project = @project || find_project_for_blob(blob) +- blobs = parse_search_result(blob) +- blob_link = namespace_project_blob_path(project.namespace, project, tree_join(blobs[:ref], blobs[:filename])) + +.blob-result + .file-holder + .js-file-title.file-title + = link_to blob_link do + = icon('fa-file') + %strong + - if @project + = blobs[:filename] + - else + #{project.name_with_namespace}: + %i= blobs[:filename] + + - blobs[:blobs].each do |blob| + - if blob.data + .file-content.code.term + = render 'shared/file_highlight', blob: blob, first_line_number: blob.startline, blob_link: blob_link + - if blob != blobs[:blobs].last + .file-content.code.term + \--- diff --git a/app/views/search/results/_blob_regular.html.haml b/app/views/search/results/_blob_regular.html.haml new file mode 100644 index 0000000000000000000000000000000000000000..b90ee509ece12a1d2eab5ba4972756563d0997a8 --- /dev/null +++ b/app/views/search/results/_blob_regular.html.haml @@ -0,0 +1,23 @@ +- project = @project || find_project_for_blob(blob) + +- if blob.is_a?(Array) + - file_name, blob = blob +- else + - blob = parse_search_result(blob) + - file_name = blob.filename + +- blob_link = project_blob_path(project, tree_join(blob.ref, file_name)) +.blob-result + .file-holder + .js-file-title.file-title + = link_to blob_link do + = icon('fa-file') + %strong + - if @project + = file_name + - else + #{project.name_with_namespace}: + %i= file_name + - if blob.data + .file-content.code.term + = render 'shared/file_highlight', blob: blob, first_line_number: blob.startline, blob_link: blob_link diff --git a/app/views/shared/_file_highlight.html.haml b/app/views/shared/_file_highlight.html.haml index 8d64cb5d698b81b1da27c8dd91782f46d6ab1f0f..bd08005e888f87d57d1f2e56624b9566e24260df 100644 --- a/app/views/shared/_file_highlight.html.haml +++ b/app/views/shared/_file_highlight.html.haml @@ -13,4 +13,8 @@ = link_icon = i .blob-content{ data: { blob_id: blob.id } } - = highlight(blob.path, blob.data, repository: repository, plain: blob.no_highlighting?) + = highlight(blob.path, + blob.data, + repository: repository, + plain: blob.no_highlighting?, + highlighted_terms: blob.highlighted_terms) diff --git a/lib/gitlab/elastic/code_snippet.rb b/lib/gitlab/elastic/code_snippet.rb new file mode 100644 index 0000000000000000000000000000000000000000..6c187f2ce05d78a9f40759254c93125a4a80880c --- /dev/null +++ b/lib/gitlab/elastic/code_snippet.rb @@ -0,0 +1,42 @@ +module Gitlab + module Elastic + class CodeSnippet + def initialize(document, snippet) + @document = document + @snippet = snippet + end + + def startline + index = @document.rindex(clean_snippet) + @document[0, index].count("\n") + 1 + end + + def line_range + startline..last_line_number + end + + def last_line_number + startline + snippet_processed.count("\n") + 1 + end + + # Returns the origin snippet without any highlight tags + def clean_snippet + snippet_processed.gsub(/gitlabelasticsearch→|←gitlabelasticsearch/, '') + end + + # Returns highlighted terms + def highlighted_terms + snippet_processed.scan(/gitlabelasticsearch→(.*?)←gitlabelasticsearch/).flatten.uniq + end + + private + + # Removes a first line if it does not have the highlight tags + # It's needed because ES can return non-complite code line which is + # unthinkable to keep as is. We should treat code lines as atomic entities. + def snippet_processed + @snippet_processed ||= @snippet.sub(/\A((?!gitlabelasticsearch→).)*[\n\r]/, '') + end + end + end +end diff --git a/lib/gitlab/elastic/code_snippet_collection.rb b/lib/gitlab/elastic/code_snippet_collection.rb new file mode 100644 index 0000000000000000000000000000000000000000..75ac62cd4074b1d163493427c53362633d642107 --- /dev/null +++ b/lib/gitlab/elastic/code_snippet_collection.rb @@ -0,0 +1,30 @@ +module Gitlab + module Elastic + class CodeSnippetCollection + SNIPPETS_MAX = 3 + + def initialize(code_snippets) + @code_snippets = code_snippets + end + + def order + @code_snippets.sort!{ |a, b| a.startline <=> b.startline } + self + end + + def remove_overlapped_snippets + filtered = [] + + @code_snippets.each do |snippet| + break if filtered.size == SNIPPETS_MAX + + unless filtered.any?{ |s| snippet.line_range.overlaps? s.line_range } + filtered << snippet + end + end + + filtered + end + end + end +end diff --git a/lib/gitlab/elastic/search_results.rb b/lib/gitlab/elastic/search_results.rb index ad061eccb3a8325021e6d2f0c3744dfe49e0c22c..7c7d5bd911046c37ab41bd9000deb87f6f162d2f 100644 --- a/lib/gitlab/elastic/search_results.rb +++ b/lib/gitlab/elastic/search_results.rb @@ -1,6 +1,12 @@ module Gitlab module Elastic class SearchResults + # There is probability that we have one huge line where there is a huge number of + # highlighted snippets. To not have any performance problems we need to limit the number + # of snippets we will consider as all the calculation related to snippets is + # pretty expensive. + MAX_SNIPPETS_TO_CONSIDER = 10 + attr_reader :current_user, :query, :public_and_internal_projects # Limit search results by passed project ids @@ -68,46 +74,45 @@ def single_commit_result? end def self.parse_search_result(result) - ref = result["_source"]["blob"]["commit_sha"] - filename = result["_source"]["blob"]["path"] + ref = result['_source']['blob']['commit_sha'] + filename = result['_source']['blob']['path'] extname = File.extname(filename) basename = filename.sub(/#{extname}$/, '') - content = result["_source"]["blob"]["content"] - total_lines = content.lines.size - - highlighted_content = result["highlight"]["blob.content"] - term = highlighted_content && highlighted_content[0].match(/gitlabelasticsearch→(.*?)←gitlabelasticsearch/)[1] - - found_line_number = 0 - - content.each_line.each_with_index do |line, index| - if term && line.include?(term) - found_line_number = index - break + content = result['_source']['blob']['content'] + + highlighted_content = result['highlight']['blob.content'] + blobs_found_by_filename = result['highlight']['blob.file_name'] + found_file = { filename: filename, ref: ref, blobs: []} + + if highlighted_content + # byebug + snippets = highlighted_content.take(MAX_SNIPPETS_TO_CONSIDER).map{ |s| ::Gitlab::Elastic::CodeSnippet.new(content, s)} + + snippets = ::Gitlab::Elastic::CodeSnippetCollection.new(snippets).order.remove_overlapped_snippets + + found_file[:blobs] = snippets.map do |snippet| + ::Gitlab::SearchResults::FoundBlob.new( + filename: filename, + basename: basename, + ref: ref, + startline: snippet.startline, + data: snippet.clean_snippet, + highlighted_terms: snippet.highlighted_terms + ) end end - from = if found_line_number >= 2 - found_line_number - 2 - else - found_line_number - end - - to = if (total_lines - found_line_number) > 3 - found_line_number + 2 - else - found_line_number - end - - data = content.lines[from..to] - - ::Gitlab::SearchResults::FoundBlob.new( - filename: filename, - basename: basename, - ref: ref, - startline: from + 1, - data: data.join - ) + if blobs_found_by_filename + found_file[:blobs] << ::Gitlab::SearchResults::FoundBlob.new( + filename: filename, + basename: basename, + ref: ref, + startline: 1, + data: content.lines[0..3].join + ) + end + + found_file end private diff --git a/lib/gitlab/search_results.rb b/lib/gitlab/search_results.rb index efe8095beea31e194705dc2bedbe01472ed3b09b..f4f4a2ede305337a95559246be40f4e662d15d30 100644 --- a/lib/gitlab/search_results.rb +++ b/lib/gitlab/search_results.rb @@ -1,7 +1,7 @@ module Gitlab class SearchResults class FoundBlob - attr_reader :id, :filename, :basename, :ref, :startline, :data + attr_reader :id, :filename, :basename, :ref, :startline, :data, :highlighted_terms def initialize(opts = {}) @id = opts.fetch(:id, nil) @@ -10,12 +10,13 @@ def initialize(opts = {}) @ref = opts.fetch(:ref, nil) @startline = opts.fetch(:startline, nil) @data = opts.fetch(:data, nil) + @highlighted_terms = opts.fetch(:highlighted_terms, []) end def path filename end - + def no_highlighting? false end