Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2017-11-29 12:10:43 +0300
committerSean McGivern <sean@mcgivern.me.uk>2017-11-29 12:10:43 +0300
commita4f8dddc212fcd91f6a4a09e92b2de6117a21305 (patch)
treeeed31a76289f865cdb1d392c6bad3f2bf96ebbc4
parent3659327d71a033703451baba01873d5663755739 (diff)
parentda42dfb3cf4a2fb0cdcc1a3b41438516a0bed0e5 (diff)
Merge branch 'dm-search-pattern' into 'master'
Use fuzzy search with minimum length of 3 characters where appropriate Closes #40512 See merge request gitlab-org/gitlab-ce!15592
-rw-r--r--app/finders/notes_finder.rb3
-rw-r--r--app/models/ci/runner.rb6
-rw-r--r--app/models/concerns/issuable.rb9
-rw-r--r--app/models/email.rb1
-rw-r--r--app/models/group.rb14
-rw-r--r--app/models/milestone.rb6
-rw-r--r--app/models/namespace.rb6
-rw-r--r--app/models/note.rb5
-rw-r--r--app/models/project.rb10
-rw-r--r--app/models/snippet.rb11
-rw-r--r--app/models/user.rb24
-rw-r--r--changelogs/unreleased/dm-search-pattern.yml5
-rw-r--r--lib/gitlab/sql/pattern.rb25
-rw-r--r--spec/lib/gitlab/sql/pattern_spec.rb30
-rw-r--r--spec/models/ci/runner_spec.rb2
-rw-r--r--spec/models/concerns/issuable_spec.rb10
-rw-r--r--spec/models/snippet_spec.rb2
17 files changed, 84 insertions, 85 deletions
diff --git a/app/finders/notes_finder.rb b/app/finders/notes_finder.rb
index 02eb983bf55..12157818bcd 100644
--- a/app/finders/notes_finder.rb
+++ b/app/finders/notes_finder.rb
@@ -104,8 +104,7 @@ class NotesFinder
query = @params[:search]
return notes unless query
- pattern = "%#{query}%"
- notes.where(Note.arel_table[:note].matches(pattern))
+ notes.search(query)
end
# Notes changed since last fetch
diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb
index c6509f89117..d39610a8995 100644
--- a/app/models/ci/runner.rb
+++ b/app/models/ci/runner.rb
@@ -1,6 +1,7 @@
module Ci
class Runner < ActiveRecord::Base
extend Gitlab::Ci::Model
+ include Gitlab::SQL::Pattern
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
@@ -59,10 +60,7 @@ module Ci
#
# Returns an ActiveRecord::Relation.
def self.search(query)
- t = arel_table
- pattern = "%#{query}%"
-
- where(t[:token].matches(pattern).or(t[:description].matches(pattern)))
+ fuzzy_search(query, [:token, :description])
end
def self.contact_time_deadline
diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb
index 27cd3118f81..5ca4a7086cb 100644
--- a/app/models/concerns/issuable.rb
+++ b/app/models/concerns/issuable.rb
@@ -122,9 +122,7 @@ module Issuable
#
# Returns an ActiveRecord::Relation.
def search(query)
- title = to_fuzzy_arel(:title, query)
-
- where(title)
+ fuzzy_search(query, [:title])
end
# Searches for records with a matching title or description.
@@ -135,10 +133,7 @@ module Issuable
#
# Returns an ActiveRecord::Relation.
def full_search(query)
- title = to_fuzzy_arel(:title, query)
- description = to_fuzzy_arel(:description, query)
-
- where(title&.or(description))
+ fuzzy_search(query, [:title, :description])
end
def sort(method, excluded_labels: [])
diff --git a/app/models/email.rb b/app/models/email.rb
index 2da8b050149..d6516761f0a 100644
--- a/app/models/email.rb
+++ b/app/models/email.rb
@@ -1,5 +1,6 @@
class Email < ActiveRecord::Base
include Sortable
+ include Gitlab::SQL::Pattern
belongs_to :user
diff --git a/app/models/group.rb b/app/models/group.rb
index dc4500360b9..76262acf50c 100644
--- a/app/models/group.rb
+++ b/app/models/group.rb
@@ -50,20 +50,6 @@ class Group < Namespace
Gitlab::Database.postgresql?
end
- # Searches for groups matching the given query.
- #
- # This method uses ILIKE on PostgreSQL and LIKE on MySQL.
- #
- # query - The search query as a String
- #
- # Returns an ActiveRecord::Relation.
- def search(query)
- table = Namespace.arel_table
- pattern = "%#{query}%"
-
- where(table[:name].matches(pattern).or(table[:path].matches(pattern)))
- end
-
def sort(method)
if method == 'storage_size_desc'
# storage_size is a virtual column so we need to
diff --git a/app/models/milestone.rb b/app/models/milestone.rb
index 01458120cda..c06ee8083f0 100644
--- a/app/models/milestone.rb
+++ b/app/models/milestone.rb
@@ -13,6 +13,7 @@ class Milestone < ActiveRecord::Base
include Referable
include StripAttribute
include Milestoneish
+ include Gitlab::SQL::Pattern
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
@@ -73,10 +74,7 @@ class Milestone < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
- t = arel_table
- pattern = "%#{query}%"
-
- where(t[:title].matches(pattern).or(t[:description].matches(pattern)))
+ fuzzy_search(query, [:title, :description])
end
def filter_by_state(milestones, state)
diff --git a/app/models/namespace.rb b/app/models/namespace.rb
index 4d401e7ba18..fa76729a702 100644
--- a/app/models/namespace.rb
+++ b/app/models/namespace.rb
@@ -9,6 +9,7 @@ class Namespace < ActiveRecord::Base
include Routable
include AfterCommitQueue
include Storage::LegacyNamespace
+ include Gitlab::SQL::Pattern
# Prevent users from creating unreasonably deep level of nesting.
# The number 20 was taken based on maximum nesting level of
@@ -86,10 +87,7 @@ class Namespace < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation
def search(query)
- t = arel_table
- pattern = "%#{query}%"
-
- where(t[:name].matches(pattern).or(t[:path].matches(pattern)))
+ fuzzy_search(query, [:name, :path])
end
def clean_path(path)
diff --git a/app/models/note.rb b/app/models/note.rb
index 50c9caf8529..340fe087f82 100644
--- a/app/models/note.rb
+++ b/app/models/note.rb
@@ -14,6 +14,7 @@ class Note < ActiveRecord::Base
include ResolvableNote
include IgnorableColumn
include Editable
+ include Gitlab::SQL::Pattern
module SpecialRole
FIRST_TIME_CONTRIBUTOR = :first_time_contributor
@@ -167,6 +168,10 @@ class Note < ActiveRecord::Base
def has_special_role?(role, note)
note.special_role == role
end
+
+ def search(query)
+ fuzzy_search(query, [:note])
+ end
end
def cross_reference?
diff --git a/app/models/project.rb b/app/models/project.rb
index 85d580fe0fa..5a3f591c2e7 100644
--- a/app/models/project.rb
+++ b/app/models/project.rb
@@ -426,17 +426,11 @@ class Project < ActiveRecord::Base
#
# query - The search query as a String.
def search(query)
- pattern = to_pattern(query)
-
- where(
- arel_table[:path].matches(pattern)
- .or(arel_table[:name].matches(pattern))
- .or(arel_table[:description].matches(pattern))
- )
+ fuzzy_search(query, [:path, :name, :description])
end
def search_by_title(query)
- non_archived.where(arel_table[:name].matches(to_pattern(query)))
+ non_archived.fuzzy_search(query, [:name])
end
def visibility_levels
diff --git a/app/models/snippet.rb b/app/models/snippet.rb
index 2a5f07a15c4..05a16f11b59 100644
--- a/app/models/snippet.rb
+++ b/app/models/snippet.rb
@@ -9,6 +9,7 @@ class Snippet < ActiveRecord::Base
include Mentionable
include Spammable
include Editable
+ include Gitlab::SQL::Pattern
extend Gitlab::CurrentSettings
@@ -135,10 +136,7 @@ class Snippet < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
- t = arel_table
- pattern = "%#{query}%"
-
- where(t[:title].matches(pattern).or(t[:file_name].matches(pattern)))
+ fuzzy_search(query, [:title, :file_name])
end
# Searches for snippets with matching content.
@@ -149,10 +147,7 @@ class Snippet < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search_code(query)
- table = Snippet.arel_table
- pattern = "%#{query}%"
-
- where(table[:content].matches(pattern))
+ fuzzy_search(query, [:content])
end
end
end
diff --git a/app/models/user.rb b/app/models/user.rb
index cf6b36559a8..14941fd7f98 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -313,9 +313,6 @@ class User < ActiveRecord::Base
#
# Returns an ActiveRecord::Relation.
def search(query)
- table = arel_table
- pattern = User.to_pattern(query)
-
order = <<~SQL
CASE
WHEN users.name = %{query} THEN 0
@@ -325,11 +322,8 @@ class User < ActiveRecord::Base
END
SQL
- where(
- table[:name].matches(pattern)
- .or(table[:email].matches(pattern))
- .or(table[:username].matches(pattern))
- ).reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name)
+ fuzzy_search(query, [:name, :email, :username])
+ .reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name)
end
# searches user by given pattern
@@ -337,16 +331,16 @@ class User < ActiveRecord::Base
# This method uses ILIKE on PostgreSQL and LIKE on MySQL.
def search_with_secondary_emails(query)
- table = arel_table
email_table = Email.arel_table
- pattern = "%#{query}%"
- matched_by_emails_user_ids = email_table.project(email_table[:user_id]).where(email_table[:email].matches(pattern))
+ matched_by_emails_user_ids = email_table
+ .project(email_table[:user_id])
+ .where(Email.fuzzy_arel_match(:email, query))
where(
- table[:name].matches(pattern)
- .or(table[:email].matches(pattern))
- .or(table[:username].matches(pattern))
- .or(table[:id].in(matched_by_emails_user_ids))
+ fuzzy_arel_match(:name, query)
+ .or(fuzzy_arel_match(:email, query))
+ .or(fuzzy_arel_match(:username, query))
+ .or(arel_table[:id].in(matched_by_emails_user_ids))
)
end
diff --git a/changelogs/unreleased/dm-search-pattern.yml b/changelogs/unreleased/dm-search-pattern.yml
new file mode 100644
index 00000000000..1670d8c4b9a
--- /dev/null
+++ b/changelogs/unreleased/dm-search-pattern.yml
@@ -0,0 +1,5 @@
+---
+title: Use fuzzy search with minimum length of 3 characters where appropriate
+merge_request:
+author:
+type: performance
diff --git a/lib/gitlab/sql/pattern.rb b/lib/gitlab/sql/pattern.rb
index 7c2d1d8f887..5f0c98cb5a4 100644
--- a/lib/gitlab/sql/pattern.rb
+++ b/lib/gitlab/sql/pattern.rb
@@ -4,9 +4,15 @@ module Gitlab
extend ActiveSupport::Concern
MIN_CHARS_FOR_PARTIAL_MATCHING = 3
- REGEX_QUOTED_WORD = /(?<=^| )"[^"]+"(?= |$)/
+ REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/
class_methods do
+ def fuzzy_search(query, columns)
+ matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)
+
+ where(matches)
+ end
+
def to_pattern(query)
if partial_matching?(query)
"%#{sanitize_sql_like(query)}%"
@@ -19,12 +25,19 @@ module Gitlab
query.length >= MIN_CHARS_FOR_PARTIAL_MATCHING
end
- def to_fuzzy_arel(column, query)
- words = select_fuzzy_words(query)
+ def fuzzy_arel_match(column, query)
+ query = query.squish
+ return nil unless query.present?
- matches = words.map { |word| arel_table[column].matches(to_pattern(word)) }
+ words = select_fuzzy_words(query)
- matches.reduce { |result, match| result.and(match) }
+ if words.any?
+ words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
+ else
+ # No words of at least 3 chars, but we can search for an exact
+ # case insensitive match with the query as a whole
+ arel_table[column].matches(sanitize_sql_like(query))
+ end
end
def select_fuzzy_words(query)
@@ -32,7 +45,7 @@ module Gitlab
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
- words = query.split(/\s+/)
+ words = query.split
quoted_words.map! { |quoted_word| quoted_word[1..-2] }
diff --git a/spec/lib/gitlab/sql/pattern_spec.rb b/spec/lib/gitlab/sql/pattern_spec.rb
index 48d56628ed5..ef51e3cc8df 100644
--- a/spec/lib/gitlab/sql/pattern_spec.rb
+++ b/spec/lib/gitlab/sql/pattern_spec.rb
@@ -137,22 +137,22 @@ describe Gitlab::SQL::Pattern do
end
end
- describe '.to_fuzzy_arel' do
- subject(:to_fuzzy_arel) { Issue.to_fuzzy_arel(:title, query) }
+ describe '.fuzzy_arel_match' do
+ subject(:fuzzy_arel_match) { Issue.fuzzy_arel_match(:title, query) }
context 'with a word equal to 3 chars' do
let(:query) { 'foo' }
it 'returns a single ILIKE condition' do
- expect(to_fuzzy_arel.to_sql).to match(/title.*I?LIKE '\%foo\%'/)
+ expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE '\%foo\%'/)
end
end
context 'with a word shorter than 3 chars' do
let(:query) { 'fo' }
- it 'returns nil' do
- expect(to_fuzzy_arel).to be_nil
+ it 'returns a single equality condition' do
+ expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo'/)
end
end
@@ -160,7 +160,23 @@ describe Gitlab::SQL::Pattern do
let(:query) { 'foo baz' }
it 'returns a joining LIKE condition using a AND' do
- expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/)
+ expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/)
+ end
+ end
+
+ context 'with two words both shorter than 3 chars' do
+ let(:query) { 'fo ba' }
+
+ it 'returns a single ILIKE condition' do
+ expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo ba'/)
+ end
+ end
+
+ context 'with two words, one shorter 3 chars' do
+ let(:query) { 'foo ba' }
+
+ it 'returns a single ILIKE condition using the longer word' do
+ expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%'/)
end
end
@@ -168,7 +184,7 @@ describe Gitlab::SQL::Pattern do
let(:query) { 'foo "really bar" baz' }
it 'returns a joining LIKE condition using a AND' do
- expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/)
+ expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/)
end
end
end
diff --git a/spec/models/ci/runner_spec.rb b/spec/models/ci/runner_spec.rb
index 584dfe9a5c1..a93e7e233a8 100644
--- a/spec/models/ci/runner_spec.rb
+++ b/spec/models/ci/runner_spec.rb
@@ -473,7 +473,7 @@ describe Ci::Runner do
end
describe '.search' do
- let(:runner) { create(:ci_runner, token: '123abc') }
+ let(:runner) { create(:ci_runner, token: '123abc', description: 'test runner') }
it 'returns runners with a matching token' do
expect(described_class.search(runner.token)).to eq([runner])
diff --git a/spec/models/concerns/issuable_spec.rb b/spec/models/concerns/issuable_spec.rb
index 765b2729918..a53b59c4e08 100644
--- a/spec/models/concerns/issuable_spec.rb
+++ b/spec/models/concerns/issuable_spec.rb
@@ -67,6 +67,7 @@ describe Issuable do
describe ".search" do
let!(:searchable_issue) { create(:issue, title: "Searchable awesome issue") }
+ let!(:searchable_issue2) { create(:issue, title: 'Aw') }
it 'returns issues with a matching title' do
expect(issuable_class.search(searchable_issue.title))
@@ -86,8 +87,8 @@ describe Issuable do
expect(issuable_class.search('searchable issue')).to eq([searchable_issue])
end
- it 'returns all issues with a query shorter than 3 chars' do
- expect(issuable_class.search('zz')).to eq(issuable_class.all)
+ it 'returns issues with a matching title for a query shorter than 3 chars' do
+ expect(issuable_class.search(searchable_issue2.title.downcase)).to eq([searchable_issue2])
end
end
@@ -95,6 +96,7 @@ describe Issuable do
let!(:searchable_issue) do
create(:issue, title: "Searchable awesome issue", description: 'Many cute kittens')
end
+ let!(:searchable_issue2) { create(:issue, title: "Aw", description: "Cu") }
it 'returns issues with a matching title' do
expect(issuable_class.full_search(searchable_issue.title))
@@ -133,8 +135,8 @@ describe Issuable do
expect(issuable_class.full_search('many kittens')).to eq([searchable_issue])
end
- it 'returns all issues with a query shorter than 3 chars' do
- expect(issuable_class.search('zz')).to eq(issuable_class.all)
+ it 'returns issues with a matching description for a query shorter than 3 chars' do
+ expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
end
end
diff --git a/spec/models/snippet_spec.rb b/spec/models/snippet_spec.rb
index de3ca300ae3..e09d89d235d 100644
--- a/spec/models/snippet_spec.rb
+++ b/spec/models/snippet_spec.rb
@@ -88,7 +88,7 @@ describe Snippet do
end
describe '.search' do
- let(:snippet) { create(:snippet) }
+ let(:snippet) { create(:snippet, title: 'test snippet') }
it 'returns snippets with a matching title' do
expect(described_class.search(snippet.title)).to eq([snippet])