diff options
author | Jan Provaznik <jprovaznik@gitlab.com> | 2018-12-03 00:47:33 +0300 |
---|---|---|
committer | Jan Provaznik <jprovaznik@gitlab.com> | 2018-12-06 11:25:09 +0300 |
commit | 58bfd733310effa94af0e1f1f19e53e34235cffc (patch) | |
tree | e793b8f8b2669034e80b7668304f3fc75dc23deb /lib/gitlab/file_finder.rb | |
parent | 00acef434031b5dc0bf39576a9e83802c7806842 (diff) |
Optimized file search to work without limits
* removed 100 limit on file search results because we
load all results anyway
* expensive processing (parsing match content, utf encoding)
is done only for selected page in paginated output
Diffstat (limited to 'lib/gitlab/file_finder.rb')
-rw-r--r-- | lib/gitlab/file_finder.rb | 57 |
1 files changed, 15 insertions, 42 deletions
diff --git a/lib/gitlab/file_finder.rb b/lib/gitlab/file_finder.rb index b4db3f93c9c..3958814208c 100644 --- a/lib/gitlab/file_finder.rb +++ b/lib/gitlab/file_finder.rb @@ -4,8 +4,6 @@ # the result is joined and sorted by file name module Gitlab class FileFinder - BATCH_SIZE = 100 - attr_reader :project, :ref delegate :repository, to: :project @@ -16,60 +14,35 @@ module Gitlab end def find(query) - query = Gitlab::Search::Query.new(query) do - filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i } - filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i } - filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i } + query = Gitlab::Search::Query.new(query, encode_binary: true) do + filter :filename, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}$/i } + filter :path, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}/i } + filter :extension, matcher: ->(filter, blob) { blob.binary_filename =~ /\.#{filter[:regex_value]}$/i } end - by_content = find_by_content(query.term) - - already_found = Set.new(by_content.map(&:filename)) - by_filename = find_by_filename(query.term, except: already_found) + files = find_by_filename(query.term) + find_by_content(query.term) - files = (by_content + by_filename) - .sort_by(&:filename) + files = query.filter_results(files) if query.filters.any? - query.filter_results(files).map { |blob| [blob.filename, blob] } + files end private def find_by_content(query) - results = repository.search_files_by_content(query, ref).first(BATCH_SIZE) - results.map { |result| Gitlab::ProjectSearchResults.parse_search_result(result, project) } - end - - def find_by_filename(query, except: []) - filenames = search_filenames(query, except) - - blobs(filenames).map do |blob| - Gitlab::SearchResults::FoundBlob.new( - id: blob.id, - filename: blob.path, - basename: File.basename(blob.path, File.extname(blob.path)), - ref: ref, - startline: 1, - data: blob.data, - project: project - ) + repository.search_files_by_content(query, ref).map do |result| + Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository) end end - def search_filenames(query, except) - filenames = repository.search_files_by_name(query, ref).first(BATCH_SIZE) - - filenames.delete_if { |filename| except.include?(filename) } unless except.empty? - - filenames - end - - def blob_refs(filenames) - filenames.map { |filename| [ref, filename] } + def find_by_filename(query) + search_filenames(query).map do |filename| + Gitlab::Search::FoundBlob.new(blob_filename: filename, project: project, ref: ref, repository: repository) + end end - def blobs(filenames) - Gitlab::Git::Blob.batch(repository, blob_refs(filenames), blob_size_limit: 1024) + def search_filenames(query) + repository.search_files_by_name(query, ref) end end end |