From 8a72f5c427426a3f90a14b2711ee1ecec4e8ca40 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 11 Sep 2018 17:31:34 +0200 Subject: Added FromUnion to easily select from a UNION This commit adds the module `FromUnion`, which provides the class method `from_union`. This simplifies the process of selecting data from the result of a UNION, and reduces the likelihood of making mistakes. As a result, instead of this: union = Gitlab::SQL::Union.new([foo, bar]) Foo.from("(#{union.to_sql}) #{Foo.table_name}") We can now write this instead: Foo.from_union([foo, bar]) This commit also includes some changes to make this new setup work properly. For example, a bug in Rails 4 (https://github.com/rails/rails/issues/24193) would break the use of `from("sub-query-here").includes(:relation)` in certain cases. There was also a CI query which appeared to repeat a lot of conditions from an outer query on an inner query, which isn't necessary. Finally, we include a RuboCop cop to ensure developers use this new module, instead of using Gitlab::SQL::Union directly. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/51307 --- app/finders/members_finder.rb | 2 +- app/finders/snippets_finder.rb | 4 +-- app/finders/todos_finder.rb | 13 ++++----- app/finders/union_finder.rb | 10 +++---- app/models/badge.rb | 2 ++ app/models/ci/runner.rb | 17 ++++++++--- app/models/concerns/from_union.rb | 51 +++++++++++++++++++++++++++++++++ app/models/event.rb | 1 + app/models/group.rb | 4 +-- app/models/label.rb | 1 + app/models/merge_request.rb | 15 ++++------ app/models/namespace.rb | 1 + app/models/note.rb | 1 + app/models/project.rb | 14 ++++----- app/models/project_authorization.rb | 8 ++++-- app/models/snippet.rb | 1 + app/models/todo.rb | 1 + app/models/user.rb | 49 +++++++++++++++---------------- app/services/labels/transfer_service.rb | 14 ++++----- 19 files changed, 132 insertions(+), 77 deletions(-) create mode 100644 app/models/concerns/from_union.rb (limited to 'app') diff --git a/app/finders/members_finder.rb b/app/finders/members_finder.rb index 48777838d60..f90a7868102 100644 --- a/app/finders/members_finder.rb +++ b/app/finders/members_finder.rb @@ -18,7 +18,7 @@ class MembersFinder group_members = GroupMembersFinder.new(group).execute(include_descendants: include_descendants) # rubocop: disable CodeReuse/Finder group_members = group_members.non_invite - union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false) + union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false) # rubocop: disable Gitlab/Union sql = distinct_on(union) diff --git a/app/finders/snippets_finder.rb b/app/finders/snippets_finder.rb index 715dffb972f..3528e4228b2 100644 --- a/app/finders/snippets_finder.rb +++ b/app/finders/snippets_finder.rb @@ -89,9 +89,7 @@ class SnippetsFinder < UnionFinder # We use a UNION here instead of OR clauses since this results in better # performance. - union = Gitlab::SQL::Union.new([authorized_projects.select('projects.id'), visible_projects.select('projects.id')]) - - Project.from("(#{union.to_sql}) AS #{Project.table_name}") + Project.from_union([authorized_projects, visible_projects]) end # rubocop: enable CodeReuse/ActiveRecord diff --git a/app/finders/todos_finder.rb b/app/finders/todos_finder.rb index 0d9f16fdce7..74baf79e4f2 100644 --- a/app/finders/todos_finder.rb +++ b/app/finders/todos_finder.rb @@ -164,16 +164,13 @@ class TodosFinder # rubocop: disable CodeReuse/ActiveRecord def by_group(items) - if group? - groups = group.self_and_descendants - project_todos = items.where(project_id: Project.where(group: groups).select(:id)) - group_todos = items.where(group_id: groups.select(:id)) + return items unless group? - union = Gitlab::SQL::Union.new([project_todos, group_todos]) - items = Todo.from("(#{union.to_sql}) #{Todo.table_name}") - end + groups = group.self_and_descendants + project_todos = items.where(project_id: Project.where(group: groups).select(:id)) + group_todos = items.where(group_id: groups.select(:id)) - items + Todo.from_union([project_todos, group_todos]) end # rubocop: enable CodeReuse/ActiveRecord diff --git a/app/finders/union_finder.rb b/app/finders/union_finder.rb index 798c3eba0f3..c3b02f7e52f 100644 --- a/app/finders/union_finder.rb +++ b/app/finders/union_finder.rb @@ -1,15 +1,15 @@ # frozen_string_literal: true class UnionFinder - # rubocop: disable CodeReuse/ActiveRecord def find_union(segments, klass) - if segments.length > 1 - union = Gitlab::SQL::Union.new(segments.map { |s| s.select(:id) }) + unless klass < FromUnion + raise TypeError, "#{klass.inspect} must include the FromUnion module" + end - klass.where("#{klass.table_name}.id IN (#{union.to_sql})") + if segments.length > 1 + klass.from_union(segments) else segments.first end end - # rubocop: enable CodeReuse/ActiveRecord end diff --git a/app/models/badge.rb b/app/models/badge.rb index 7e3b6b659e4..f016654206b 100644 --- a/app/models/badge.rb +++ b/app/models/badge.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class Badge < ActiveRecord::Base + include FromUnion + # This structure sets the placeholders that the urls # can have. This hash also sets which action to ask when # the placeholder is found. diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb index eabb41c29d7..3e815937f4b 100644 --- a/app/models/ci/runner.rb +++ b/app/models/ci/runner.rb @@ -7,6 +7,7 @@ module Ci include IgnorableColumn include RedisCacheable include ChronicDurationAttribute + include FromUnion RUNNER_QUEUE_EXPIRY_TIME = 60.minutes ONLINE_CONTACT_TIMEOUT = 1.hour @@ -57,18 +58,26 @@ module Ci } scope :owned_or_instance_wide, -> (project_id) do - union = Gitlab::SQL::Union.new( - [belonging_to_project(project_id), belonging_to_parent_group_of_project(project_id), instance_type], + from_union( + [ + belonging_to_project(project_id), + belonging_to_parent_group_of_project(project_id), + instance_type + ], remove_duplicates: false ) - from("(#{union.to_sql}) ci_runners") end scope :assignable_for, ->(project) do # FIXME: That `to_sql` is needed to workaround a weird Rails bug. # Without that, placeholders would miss one and couldn't match. + # + # We use "unscoped" here so that any current Ci::Runner filters don't + # apply to the inner query, which is not necessary. + exclude_runners = unscoped { project.runners.select(:id) }.to_sql + where(locked: false) - .where.not("ci_runners.id IN (#{project.runners.select(:id).to_sql})") + .where.not("ci_runners.id IN (#{exclude_runners})") .project_type end diff --git a/app/models/concerns/from_union.rb b/app/models/concerns/from_union.rb new file mode 100644 index 00000000000..9b8595b1211 --- /dev/null +++ b/app/models/concerns/from_union.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module FromUnion + extend ActiveSupport::Concern + + class_methods do + # Produces a query that uses a FROM to select data using a UNION. + # + # Using a FROM for a UNION has in the past lead to better query plans. As + # such, we generally recommend this pattern instead of using a WHERE IN. + # + # Example: + # users = User.from_union([User.where(id: 1), User.where(id: 2)]) + # + # This would produce the following SQL query: + # + # SELECT * + # FROM ( + # SELECT * + # FROM users + # WHERE id = 1 + # + # UNION + # + # SELECT * + # FROM users + # WHERE id = 2 + # ) users; + # + # members - An Array of ActiveRecord::Relation objects to use in the UNION. + # + # remove_duplicates - A boolean indicating if duplicate entries should be + # removed. Defaults to true. + # + # alias_as - The alias to use for the sub query. Defaults to the name of the + # table of the current model. + # rubocop: disable Gitlab/Union + def from_union(members, remove_duplicates: true, alias_as: table_name) + union = Gitlab::SQL::Union + .new(members, remove_duplicates: remove_duplicates) + .to_sql + + # This pattern is necessary as a bug in Rails 4 can cause the use of + # `from("string here").includes(:foo)` to break ActiveRecord. This is + # fixed in https://github.com/rails/rails/pull/25374, which is released as + # part of Rails 5. + from([Arel.sql("(#{union}) #{alias_as}")]) + end + # rubocop: enable Gitlab/Union + end +end diff --git a/app/models/event.rb b/app/models/event.rb index 041dac6941b..596155a9525 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -3,6 +3,7 @@ class Event < ActiveRecord::Base include Sortable include IgnorableColumn + include FromUnion default_scope { reorder(nil) } CREATED = 1 diff --git a/app/models/group.rb b/app/models/group.rb index 024e77188b8..62af20d2142 100644 --- a/app/models/group.rb +++ b/app/models/group.rb @@ -304,14 +304,12 @@ class Group < Namespace # 3. They belong to a sub-group or project in such sub-group # 4. They belong to an ancestor group def direct_and_indirect_users - union = Gitlab::SQL::Union.new([ + User.from_union([ User .where(id: direct_and_indirect_members.select(:user_id)) .reorder(nil), project_users_with_descendants ]) - - User.from("(#{union.to_sql}) #{User.table_name}") end # Returns all users that are members of projects diff --git a/app/models/label.rb b/app/models/label.rb index 8dc7ded53ad..9ef57a05b3e 100644 --- a/app/models/label.rb +++ b/app/models/label.rb @@ -7,6 +7,7 @@ class Label < ActiveRecord::Base include Gitlab::SQL::Pattern include OptionallySearch include Sortable + include FromUnion # Represents a "No Label" state used for filtering Issues and Merge # Requests that have no label assigned. diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index e19bf62dcd0..dd5d494997d 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -14,6 +14,7 @@ class MergeRequest < ActiveRecord::Base include Gitlab::Utils::StrongMemoize include LabelEventable include ReactiveCaching + include FromUnion self.reactive_cache_key = ->(model) { [model.project.id, model.iid] } self.reactive_cache_refresh_interval = 10.minutes @@ -237,11 +238,10 @@ class MergeRequest < ActiveRecord::Base def self.in_projects(relation) # unscoping unnecessary conditions that'll be applied # when executing `where("merge_requests.id IN (#{union.to_sql})")` - source = unscoped.where(source_project_id: relation).select(:id) - target = unscoped.where(target_project_id: relation).select(:id) - union = Gitlab::SQL::Union.new([source, target]) + source = unscoped.where(source_project_id: relation) + target = unscoped.where(target_project_id: relation) - where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection + from_union([source, target]) end # This is used after project import, to reset the IDs to the correct @@ -740,11 +740,8 @@ class MergeRequest < ActiveRecord::Base # compared to using OR statements. We're using UNION ALL since the queries # used won't produce any duplicates (e.g. a note for a commit can't also be # a note for an MR). - union = Gitlab::SQL::Union - .new([notes, commit_notes], remove_duplicates: false) - .to_sql - - Note.from("(#{union}) #{Note.table_name}") + Note + .from_union([notes, commit_notes], remove_duplicates: false) .includes(:noteable) end diff --git a/app/models/namespace.rb b/app/models/namespace.rb index 76920c3c039..0289f29211d 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -11,6 +11,7 @@ class Namespace < ActiveRecord::Base include Gitlab::SQL::Pattern include IgnorableColumn include FeatureGate + include FromUnion ignore_column :deleted_at diff --git a/app/models/note.rb b/app/models/note.rb index 4429c1dcb07..bea02d69b65 100644 --- a/app/models/note.rb +++ b/app/models/note.rb @@ -17,6 +17,7 @@ class Note < ActiveRecord::Base include Editable include Gitlab::SQL::Pattern include ThrottledTouch + include FromUnion module SpecialRole FIRST_TIME_CONTRIBUTOR = :first_time_contributor diff --git a/app/models/project.rb b/app/models/project.rb index c37915e111f..9e4c7f7a2d0 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -29,6 +29,7 @@ class Project < ActiveRecord::Base include BatchDestroyDependentAssociations include FeatureGate include OptionallySearch + include FromUnion extend Gitlab::Cache::RequestCache extend Gitlab::ConfigHelper @@ -1493,8 +1494,7 @@ class Project < ActiveRecord::Base end def all_runners - union = Gitlab::SQL::Union.new([runners, group_runners, shared_runners]) - Ci::Runner.from("(#{union.to_sql}) ci_runners") + Ci::Runner.from_union([runners, group_runners, shared_runners]) end def active_runners @@ -2022,12 +2022,10 @@ class Project < ActiveRecord::Base def badges return project_badges unless group - group_badges_rel = GroupBadge.where(group: group.self_and_ancestors) - - union = Gitlab::SQL::Union.new([project_badges.select(:id), - group_badges_rel.select(:id)]) - - Badge.where("id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection + Badge.from_union([ + project_badges, + GroupBadge.where(group: group.self_and_ancestors) + ]) end def merge_requests_allowing_push_to_user(user) diff --git a/app/models/project_authorization.rb b/app/models/project_authorization.rb index 746bb4584c9..2c590008db2 100644 --- a/app/models/project_authorization.rb +++ b/app/models/project_authorization.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class ProjectAuthorization < ActiveRecord::Base + include FromUnion + belongs_to :user belongs_to :project @@ -8,9 +10,9 @@ class ProjectAuthorization < ActiveRecord::Base validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true - def self.select_from_union(union) - select(['project_id', 'MAX(access_level) AS access_level']) - .from("(#{union.to_sql}) #{ProjectAuthorization.table_name}") + def self.select_from_union(relations) + from_union(relations) + .select(['project_id', 'MAX(access_level) AS access_level']) .group(:project_id) end diff --git a/app/models/snippet.rb b/app/models/snippet.rb index 5b394e3fa79..e9533ee7c77 100644 --- a/app/models/snippet.rb +++ b/app/models/snippet.rb @@ -12,6 +12,7 @@ class Snippet < ActiveRecord::Base include Spammable include Editable include Gitlab::SQL::Pattern + include FromUnion cache_markdown_field :title, pipeline: :single_line cache_markdown_field :description diff --git a/app/models/todo.rb b/app/models/todo.rb index 48d92ad04b3..265fb932f7c 100644 --- a/app/models/todo.rb +++ b/app/models/todo.rb @@ -2,6 +2,7 @@ class Todo < ActiveRecord::Base include Sortable + include FromUnion ASSIGNED = 1 MENTIONED = 2 diff --git a/app/models/user.rb b/app/models/user.rb index d68108a8e8e..277218d8949 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -20,6 +20,7 @@ class User < ActiveRecord::Base include BlocksJsonSerialization include WithUploads include OptionallySearch + include FromUnion DEFAULT_NOTIFICATION_LEVEL = :participating @@ -286,11 +287,9 @@ class User < ActiveRecord::Base # user_id - The ID of the user to include. def self.union_with_user(user_id = nil) if user_id.present? - union = Gitlab::SQL::Union.new([all, User.unscoped.where(id: user_id)]) - # We use "unscoped" here so that any inner conditions are not repeated for # the outer query, which would be redundant. - User.unscoped.from("(#{union.to_sql}) #{User.table_name}") + User.unscoped.from_union([all, User.unscoped.where(id: user_id)]) else all end @@ -354,9 +353,8 @@ class User < ActiveRecord::Base emails = joins(:emails).where(emails: { email: email }) emails = emails.confirmed if confirmed - union = Gitlab::SQL::Union.new([users, emails]) - from("(#{union.to_sql}) #{table_name}") + from_union([users, emails]) end def filter(filter_name) @@ -676,10 +674,10 @@ class User < ActiveRecord::Base # Returns the groups a user has access to, either through a membership or a project authorization def authorized_groups - union = Gitlab::SQL::Union - .new([groups.select(:id), authorized_projects.select(:namespace_id)]) - - Group.where("namespaces.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection + Group.from_union([ + groups, + authorized_projects.joins(:namespace).select('namespaces.*') + ]) end # Returns the groups a user is a member of, either directly or through a parent group @@ -744,7 +742,15 @@ class User < ActiveRecord::Base end def owned_projects - @owned_projects ||= Project.from("(#{owned_projects_union.to_sql}) AS projects") + @owned_projects ||= Project.from_union( + [ + Project.where(namespace: namespace), + Project.joins(:project_authorizations) + .where("projects.namespace_id <> ?", namespace.id) + .where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER }) + ], + remove_duplicates: false + ) end # Returns projects which user can admin issues on (for example to move an issue to that project). @@ -1138,17 +1144,17 @@ class User < ActiveRecord::Base def ci_owned_runners @ci_owned_runners ||= begin - project_runner_ids = Ci::RunnerProject + project_runners = Ci::RunnerProject .where(project: authorized_projects(Gitlab::Access::MAINTAINER)) - .select(:runner_id) + .joins(:runner) + .select('ci_runners.*') - group_runner_ids = Ci::RunnerNamespace + group_runners = Ci::RunnerNamespace .where(namespace_id: owned_or_maintainers_groups.select(:id)) - .select(:runner_id) + .joins(:runner) + .select('ci_runners.*') - union = Gitlab::SQL::Union.new([project_runner_ids, group_runner_ids]) - - Ci::Runner.where("ci_runners.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection + Ci::Runner.from_union([project_runners, group_runners]) end end @@ -1380,15 +1386,6 @@ class User < ActiveRecord::Base Gitlab::CurrentSettings.usage_stats_set_by_user_id == self.id end - def owned_projects_union - Gitlab::SQL::Union.new([ - Project.where(namespace: namespace), - Project.joins(:project_authorizations) - .where("projects.namespace_id <> ?", namespace.id) - .where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER }) - ], remove_duplicates: false) - end - # Added according to https://github.com/plataformatec/devise/blob/7df57d5081f9884849ca15e4fde179ef164a575f/README.md#activejob-integration def send_devise_notification(notification, *args) return true unless can?(:receive_notifications) diff --git a/app/services/labels/transfer_service.rb b/app/services/labels/transfer_service.rb index aec0282b31b..52360f775dc 100644 --- a/app/services/labels/transfer_service.rb +++ b/app/services/labels/transfer_service.rb @@ -34,13 +34,13 @@ module Labels # rubocop: disable CodeReuse/ActiveRecord def labels_to_transfer - label_ids = [] - label_ids << group_labels_applied_to_issues.select(:id) - label_ids << group_labels_applied_to_merge_requests.select(:id) - - union = Gitlab::SQL::Union.new(label_ids) - - Label.where("labels.id IN (#{union.to_sql})").reorder(nil).uniq # rubocop:disable GitlabSecurity/SqlInjection + Label + .from_union([ + group_labels_applied_to_issues, + group_labels_applied_to_merge_requests + ]) + .reorder(nil) + .uniq end # rubocop: enable CodeReuse/ActiveRecord -- cgit v1.2.3