Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Provaznik <jprovaznik@gitlab.com>2018-12-03 00:47:33 +0300
committerJan Provaznik <jprovaznik@gitlab.com>2018-12-06 11:25:09 +0300
commit58bfd733310effa94af0e1f1f19e53e34235cffc (patch)
treee793b8f8b2669034e80b7668304f3fc75dc23deb /lib/gitlab/file_finder.rb
parent00acef434031b5dc0bf39576a9e83802c7806842 (diff)
Optimized file search to work without limits
* removed 100 limit on file search results because we load all results anyway * expensive processing (parsing match content, utf encoding) is done only for selected page in paginated output
Diffstat (limited to 'lib/gitlab/file_finder.rb')
-rw-r--r--lib/gitlab/file_finder.rb57
1 files changed, 15 insertions, 42 deletions
diff --git a/lib/gitlab/file_finder.rb b/lib/gitlab/file_finder.rb
index b4db3f93c9c..3958814208c 100644
--- a/lib/gitlab/file_finder.rb
+++ b/lib/gitlab/file_finder.rb
@@ -4,8 +4,6 @@
# the result is joined and sorted by file name
module Gitlab
class FileFinder
- BATCH_SIZE = 100
-
attr_reader :project, :ref
delegate :repository, to: :project
@@ -16,60 +14,35 @@ module Gitlab
end
def find(query)
- query = Gitlab::Search::Query.new(query) do
- filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i }
- filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i }
- filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i }
+ query = Gitlab::Search::Query.new(query, encode_binary: true) do
+ filter :filename, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}$/i }
+ filter :path, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}/i }
+ filter :extension, matcher: ->(filter, blob) { blob.binary_filename =~ /\.#{filter[:regex_value]}$/i }
end
- by_content = find_by_content(query.term)
-
- already_found = Set.new(by_content.map(&:filename))
- by_filename = find_by_filename(query.term, except: already_found)
+ files = find_by_filename(query.term) + find_by_content(query.term)
- files = (by_content + by_filename)
- .sort_by(&:filename)
+ files = query.filter_results(files) if query.filters.any?
- query.filter_results(files).map { |blob| [blob.filename, blob] }
+ files
end
private
def find_by_content(query)
- results = repository.search_files_by_content(query, ref).first(BATCH_SIZE)
- results.map { |result| Gitlab::ProjectSearchResults.parse_search_result(result, project) }
- end
-
- def find_by_filename(query, except: [])
- filenames = search_filenames(query, except)
-
- blobs(filenames).map do |blob|
- Gitlab::SearchResults::FoundBlob.new(
- id: blob.id,
- filename: blob.path,
- basename: File.basename(blob.path, File.extname(blob.path)),
- ref: ref,
- startline: 1,
- data: blob.data,
- project: project
- )
+ repository.search_files_by_content(query, ref).map do |result|
+ Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository)
end
end
- def search_filenames(query, except)
- filenames = repository.search_files_by_name(query, ref).first(BATCH_SIZE)
-
- filenames.delete_if { |filename| except.include?(filename) } unless except.empty?
-
- filenames
- end
-
- def blob_refs(filenames)
- filenames.map { |filename| [ref, filename] }
+ def find_by_filename(query)
+ search_filenames(query).map do |filename|
+ Gitlab::Search::FoundBlob.new(blob_filename: filename, project: project, ref: ref, repository: repository)
+ end
end
- def blobs(filenames)
- Gitlab::Git::Blob.batch(repository, blob_refs(filenames), blob_size_limit: 1024)
+ def search_filenames(query)
+ repository.search_files_by_name(query, ref)
end
end
end