Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Provaznik <jprovaznik@gitlab.com>2018-12-03 00:47:33 +0300
committerJan Provaznik <jprovaznik@gitlab.com>2018-12-06 11:25:09 +0300
commit58bfd733310effa94af0e1f1f19e53e34235cffc (patch)
treee793b8f8b2669034e80b7668304f3fc75dc23deb /lib/gitlab/search
parent00acef434031b5dc0bf39576a9e83802c7806842 (diff)
Optimized file search to work without limits
* removed 100 limit on file search results because we load all results anyway * expensive processing (parsing match content, utf encoding) is done only for selected page in paginated output
Diffstat (limited to 'lib/gitlab/search')
-rw-r--r--lib/gitlab/search/found_blob.rb162
-rw-r--r--lib/gitlab/search/query.rb6
2 files changed, 167 insertions, 1 deletions
diff --git a/lib/gitlab/search/found_blob.rb b/lib/gitlab/search/found_blob.rb
new file mode 100644
index 00000000000..a62ab1521a7
--- /dev/null
+++ b/lib/gitlab/search/found_blob.rb
@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Search
+ class FoundBlob
+ include EncodingHelper
+ include Presentable
+ include BlobLanguageFromGitAttributes
+ include Gitlab::Utils::StrongMemoize
+
+ attr_reader :project, :content_match, :blob_filename
+
+ FILENAME_REGEXP = /\A(?<ref>[^:]*):(?<filename>[^\x00]*)\x00/.freeze
+ CONTENT_REGEXP = /^(?<ref>[^:]*):(?<filename>[^\x00]*)\x00(?<startline>\d+)\x00/.freeze
+
+ def self.preload_blobs(blobs)
+ to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_filename }
+
+ to_fetch.each { |blob| blob.fetch_blob }
+ end
+
+ def initialize(opts = {})
+ @id = opts.fetch(:id, nil)
+ @binary_filename = opts.fetch(:filename, nil)
+ @binary_basename = opts.fetch(:basename, nil)
+ @ref = opts.fetch(:ref, nil)
+ @startline = opts.fetch(:startline, nil)
+ @binary_data = opts.fetch(:data, nil)
+ @per_page = opts.fetch(:per_page, 20)
+ @project = opts.fetch(:project, nil)
+ # Some caller does not have project object (e.g. elastic search),
+ # yet they can trigger many calls in one go,
+ # causing duplicated queries.
+ # Allow those to just pass project_id instead.
+ @project_id = opts.fetch(:project_id, nil)
+ @content_match = opts.fetch(:content_match, nil)
+ @blob_filename = opts.fetch(:blob_filename, nil)
+ @repository = opts.fetch(:repository, nil)
+ end
+
+ def id
+ @id ||= parsed_content[:id]
+ end
+
+ def ref
+ @ref ||= parsed_content[:ref]
+ end
+
+ def startline
+ @startline ||= parsed_content[:startline]
+ end
+
+ # binary_filename is used for running filters on all matches,
+ # for grepped results (which use content_match), we get
+ # filename from the beginning of the grepped result which is faster
+ # then parsing whole snippet
+ def binary_filename
+ @binary_filename ||= content_match ? search_result_filename : parsed_content[:binary_filename]
+ end
+
+ def filename
+ @filename ||= encode_utf8(@binary_filename || parsed_content[:binary_filename])
+ end
+
+ def basename
+ @basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename])
+ end
+
+ def data
+ @data ||= encode_utf8(@binary_data || parsed_content[:binary_data])
+ end
+
+ def path
+ filename
+ end
+
+ def project_id
+ @project_id || @project&.id
+ end
+
+ def present
+ super(presenter_class: BlobPresenter)
+ end
+
+ def fetch_blob
+ path = [ref, blob_filename]
+ missing_blob = { binary_filename: blob_filename }
+
+ BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader|
+ Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob|
+ # if the blob couldn't be fetched for some reason,
+ # show at least the blob filename
+ data = {
+ id: blob.id,
+ binary_filename: blob.path,
+ binary_basename: File.basename(blob.path, File.extname(blob.path)),
+ ref: ref,
+ startline: 1,
+ binary_data: blob.data,
+ project: project
+ }
+
+ loader.call([ref, blob.path], data)
+ end
+ end
+ end
+
+ private
+
+ def search_result_filename
+ content_match.match(FILENAME_REGEXP) { |matches| matches[:filename] }
+ end
+
+ def parsed_content
+ strong_memoize(:parsed_content) do
+ if content_match
+ parse_search_result
+ elsif blob_filename
+ fetch_blob
+ else
+ {}
+ end
+ end
+ end
+
+ def parse_search_result
+ ref = nil
+ filename = nil
+ basename = nil
+
+ data = []
+ startline = 0
+
+ content_match.each_line.each_with_index do |line, index|
+ prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches|
+ ref = matches[:ref]
+ filename = matches[:filename]
+ startline = matches[:startline]
+ startline = startline.to_i - index
+ extname = Regexp.escape(File.extname(filename))
+ basename = filename.sub(/#{extname}$/, '')
+ end
+
+ data << line.sub(prefix.to_s, '')
+ end
+
+ {
+ binary_filename: filename,
+ binary_basename: basename,
+ ref: ref,
+ startline: startline,
+ binary_data: data.join,
+ project: project
+ }
+ end
+
+ def repository
+ @repository ||= project.repository
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/search/query.rb b/lib/gitlab/search/query.rb
index 7f69083a492..ba0e16607a6 100644
--- a/lib/gitlab/search/query.rb
+++ b/lib/gitlab/search/query.rb
@@ -3,6 +3,8 @@
module Gitlab
module Search
class Query < SimpleDelegator
+ include EncodingHelper
+
def initialize(query, filter_opts = {}, &block)
@raw_query = query.dup
@filters = []
@@ -50,7 +52,9 @@ module Gitlab
end
def parse_filter(filter, input)
- filter[:parser].call(input)
+ result = filter[:parser].call(input)
+
+ @filter_options[:encode_binary] ? encode_binary(result) : result
end
end
end