diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-07-01 06:08:37 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-07-01 06:08:37 +0300 |
commit | d9a9116e0e78fb7f2690e88878a4b6384d80f763 (patch) | |
tree | bb974645b3b95af9e1248fcd2086c3dcd4a99887 | |
parent | 516b939c44ec77bb773f08df15079c80fb4d10d2 (diff) |
Add latest changes from gitlab-org/gitlab@master
-rw-r--r-- | lib/gitlab/pagination/keyset/order.rb | 4 | ||||
-rw-r--r-- | lib/unnested_in_filters/dsl.rb | 87 | ||||
-rw-r--r-- | lib/unnested_in_filters/rewriter.rb | 185 | ||||
-rw-r--r-- | spec/lib/gitlab/pagination/keyset/order_spec.rb | 24 | ||||
-rw-r--r-- | spec/lib/unnested_in_filters/dsl_spec.rb | 31 | ||||
-rw-r--r-- | spec/lib/unnested_in_filters/rewriter_spec.rb | 142 |
6 files changed, 473 insertions, 0 deletions
diff --git a/lib/gitlab/pagination/keyset/order.rb b/lib/gitlab/pagination/keyset/order.rb index 290e94401b8..eecf74b8d18 100644 --- a/lib/gitlab/pagination/keyset/order.rb +++ b/lib/gitlab/pagination/keyset/order.rb @@ -107,6 +107,10 @@ module Gitlab end end + def attribute_names + column_definitions.map(&:attribute_name) + end + # This methods builds the conditions for the keyset pagination # # Example: diff --git a/lib/unnested_in_filters/dsl.rb b/lib/unnested_in_filters/dsl.rb new file mode 100644 index 00000000000..f5f358c729e --- /dev/null +++ b/lib/unnested_in_filters/dsl.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# Including the `UnnestedInFilters::Dsl` module to an ActiveRecord +# model extends the interface of the following class instances to be +# able to use the `use_unnested_filters` method; +# +# - Model relation; +# `Model.where(...).use_unnested_filters` +# - All the association proxies +# `project.model_association.use_unnested_filters` +# - All the relation instances of the association +# `project.model_association.where(...).use_unnested_filters +# +# Note: The interface of the model itself won't be extended as we don't +# have a use-case for now(`Model.use_unnested_filters` won't work). +# +# Example usage of the API; +# +# relation = Vulnerabilities::Read.where(state: [1, 4]) +# .use_unnested_filters +# .order(severity: :desc, vulnerability_id: :desc) +# +# relation.to_a # => Will load records by using the optimized query +# +# See `UnnestedInFilters::Rewriter` for the details about the optimizations applied. +# +# rubocop:disable Gitlab/ModuleWithInstanceVariables +module UnnestedInFilters + module Dsl + extend ActiveSupport::Concern + + MODULES_TO_EXTEND = [ + ActiveRecord::Relation, + ActiveRecord::Associations::CollectionProxy, + ActiveRecord::AssociationRelation + ].freeze + + included do + MODULES_TO_EXTEND.each do |mod| + delegate_mod = relation_delegate_class(mod) + delegate_mod.prepend(UnnestedInFilters::Dsl::Relation) + end + end + + module Relation + def use_unnested_filters + spawn.use_unnested_filters! + end + + def use_unnested_filters! + assert_mutability! + @values[:unnested_filters] = true + + self + end + + def use_unnested_filters? + @values.fetch(:unnested_filters, false) + end + + def load(*) + return super if loaded? || !rewrite_query? + + @records = unnested_filter_rewriter.rewrite.to_a + @loaded = true + + self + end + + def exists?(*) + return super unless rewrite_query? + + unnested_filter_rewriter.rewrite.exists? + end + + private + + def rewrite_query? + use_unnested_filters? && unnested_filter_rewriter.rewrite? + end + + def unnested_filter_rewriter + @unnested_filter_rewriter ||= UnnestedInFilters::Rewriter.new(self) + end + end + end +end diff --git a/lib/unnested_in_filters/rewriter.rb b/lib/unnested_in_filters/rewriter.rb new file mode 100644 index 00000000000..c953b673c0e --- /dev/null +++ b/lib/unnested_in_filters/rewriter.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true + +# rubocop:disable CodeReuse/ActiveRecord (This module is generating ActiveRecord relations therefore using AR methods is necessary) +module UnnestedInFilters + class Rewriter + include Gitlab::Utils::StrongMemoize + + class ValueTable + def initialize(model, attribute, values) + @model = model + @attribute = attribute.to_s + @values = values + end + + def to_sql + "unnest(#{serialized_values}::#{sql_type}[]) AS #{table_name}(#{column_name})" + end + + def as_predicate + "#{model.table_name}.#{column_name} = #{table_name}.#{column_name}" + end + + private + + attr_reader :model, :attribute, :values + + delegate :connection, :columns, :attribute_types, to: :model, private: true + delegate :quote, :quote_table_name, :quote_column_name, to: :connection + + def table_name + quote_table_name(attribute.pluralize) + end + + def column_name + quote_column_name(attribute) + end + + def serialized_values + array_type.serialize(values) + .then { |array| quote(array) } + end + + def array_type + ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.new(attribute_types[attribute]) + end + + def sql_type + column.sql_type_metadata.sql_type + end + + def column + columns.find { _1.name == attribute } + end + end + + def initialize(relation) + @relation = relation + end + + # Rewrites the given ActiveRecord::Relation object to + # utilize the DB indices efficiently. + # + # Example usage; + # + # relation = Vulnerabilities::Read.where(state: [1, 4]) + # relation = relation.order(severity: :desc, vulnerability_id: :desc) + # + # rewriter = UnnestedInFilters::Rewriter.new(relation) + # optimized_relation = rewriter.rewrite + # + # In the above example. the `relation` object would produce the following SQL query; + # + # SELECT + # "vulnerability_reads".* + # FROM + # "vulnerability_reads" + # WHERE + # "vulnerability_reads"."state" IN (1, 4) + # ORDER BY + # "vulnerability_reads"."severity" DESC, + # "vulnerability_reads"."vulnerability_id" DESC + # LIMIT 20; + # + # And the `optimized_relation` object would would produce the following query to + # utilize the index on (state, severity, vulnerability_id); + # + # SELECT + # "vulnerability_reads".* + # FROM + # unnest('{1, 4}'::smallint[]) AS "states" ("state"), + # LATERAL ( + # SELECT + # "vulnerability_reads".* + # FROM + # "vulnerability_reads" + # WHERE + # (vulnerability_reads."state" = "states"."state") + # ORDER BY + # "vulnerability_reads"."severity" DESC, + # "vulnerability_reads"."vulnerability_id" DESC + # LIMIT 20) AS vulnerability_reads + # ORDER BY + # "vulnerability_reads"."severity" DESC, + # "vulnerability_reads"."vulnerability_id" DESC + # LIMIT 20 + # + def rewrite + model.from(from) + .limit(limit_value) + .order(order_values) + .includes(relation.includes_values) + .preload(relation.preload_values) + .eager_load(relation.eager_load_values) + end + + def rewrite? + strong_memoize(:rewrite) do + in_filters.present? && has_index_coverage? + end + end + + private + + attr_reader :relation + + delegate :model, :order_values, :limit_value, :where_values_hash, to: :relation, private: true + + def from + [value_tables.map(&:to_sql) + [lateral]].join(', ') + end + + def lateral + "LATERAL (#{join_relation.to_sql}) AS #{model.table_name}" + end + + def join_relation + value_tables.reduce(unscoped_relation) do |memo, tmp_table| + memo.where(tmp_table.as_predicate) + end + end + + def unscoped_relation + relation.unscope(where: in_filters.keys) + end + + def in_filters + @in_filters ||= where_values_hash.select { _2.is_a?(Array) } + end + + def has_index_coverage? + indices.any? do |index| + (filter_attributes - Array(index.columns)).empty? && # all the filter attributes are indexed + index.columns.last(order_attributes.length) == order_attributes && # index can be used in sorting + (index.columns - (filter_attributes + order_attributes)).empty? # there is no other columns in the index + end + end + + def filter_attributes + @filter_attributes ||= where_values_hash.keys + end + + def order_attributes + @order_attributes ||= order_values.flat_map(&method(:extract_column_name)) + end + + def extract_column_name(order_value) + case order_value + when Arel::Nodes::Ordering + order_value.expr.name + when ::Gitlab::Pagination::Keyset::Order + order_value.attribute_names + end + end + + def indices + model.connection.schema_cache.indexes(model.table_name) + end + + def value_tables + @value_tables ||= in_filters.map do |attribute, values| + ValueTable.new(model, attribute, values) + end + end + end +end diff --git a/spec/lib/gitlab/pagination/keyset/order_spec.rb b/spec/lib/gitlab/pagination/keyset/order_spec.rb index abbb3a21cd4..c1fc73603d6 100644 --- a/spec/lib/gitlab/pagination/keyset/order_spec.rb +++ b/spec/lib/gitlab/pagination/keyset/order_spec.rb @@ -680,4 +680,28 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do end end end + + describe '#attribute_names' do + let(:expected_attribute_names) { %w(id name) } + let(:order) do + Gitlab::Pagination::Keyset::Order.build([ + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'id', + order_expression: Project.arel_table['id'].desc, + nullable: :not_nullable, + distinct: true + ), + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'name', + order_expression: Project.arel_table['name'].desc, + nullable: :not_nullable, + distinct: true + ) + ]) + end + + subject { order.attribute_names } + + it { is_expected.to match_array(expected_attribute_names) } + end end diff --git a/spec/lib/unnested_in_filters/dsl_spec.rb b/spec/lib/unnested_in_filters/dsl_spec.rb new file mode 100644 index 00000000000..bce4c88f94c --- /dev/null +++ b/spec/lib/unnested_in_filters/dsl_spec.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe UnnestedInFilters::Dsl do + let(:test_model) do + Class.new(ApplicationRecord) do + include UnnestedInFilters::Dsl + + self.table_name = 'users' + end + end + + describe '#exists?' do + let(:states) { %w(active banned) } + + subject { test_model.where(state: states).use_unnested_filters.exists? } + + context 'when there is no record in the database with given filters' do + it { is_expected.to be_falsey } + end + + context 'when there is a record in the database with given filters' do + before do + create(:user, state: :active) + end + + it { is_expected.to be_truthy } + end + end +end diff --git a/spec/lib/unnested_in_filters/rewriter_spec.rb b/spec/lib/unnested_in_filters/rewriter_spec.rb new file mode 100644 index 00000000000..f4fff393f28 --- /dev/null +++ b/spec/lib/unnested_in_filters/rewriter_spec.rb @@ -0,0 +1,142 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe UnnestedInFilters::Rewriter do + let(:rewriter) { described_class.new(relation) } + + before(:all) do + User.include(UnnestedInFilters::Dsl) + end + + describe '#rewrite?' do + subject(:rewrite?) { rewriter.rewrite? } + + context 'when the given relation does not have an `IN` predicate' do + let(:relation) { User.where(username: 'user') } + + it { is_expected.to be_falsey } + end + + context 'when the given relation has an `IN` predicate' do + context 'when there is no index coverage for the used columns' do + let(:relation) { User.where(username: %w(user_1 user_2), state: :active) } + + it { is_expected.to be_falsey } + end + + context 'when there is an index coverage for the used columns' do + let(:relation) { User.where(state: :active, user_type: [:support_bot, :alert_bot]) } + + it { is_expected.to be_truthy } + + context 'when there is an ordering' do + let(:relation) { User.where(state: %w(active blocked banned)).order(order).limit(2) } + + context 'when the order is an Arel node' do + let(:order) { { user_type: :desc } } + + it { is_expected.to be_truthy } + end + + context 'when the order is a Keyset order' do + let(:order) do + Gitlab::Pagination::Keyset::Order.build([ + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'user_type', + order_expression: User.arel_table['user_type'].desc, + nullable: :not_nullable, + distinct: false + ) + ]) + end + + it { is_expected.to be_truthy } + end + end + end + end + end + + describe '#rewrite' do + let(:recorded_queries) { ActiveRecord::QueryRecorder.new { rewriter.rewrite.load } } + let(:relation) { User.where(state: :active, user_type: %i(support_bot alert_bot)).limit(2) } + + let(:expected_query) do + <<~SQL + SELECT + "users".* + FROM + unnest('{1,2}'::smallint[]) AS "user_types"("user_type"), + LATERAL ( + SELECT + "users".* + FROM + "users" + WHERE + "users"."state" = 'active' AND + (users."user_type" = "user_types"."user_type") + LIMIT 2 + ) AS users + LIMIT 2 + SQL + end + + subject(:issued_query) { recorded_queries.occurrences.each_key.first } + + it 'changes the query' do + expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, '')) + end + + context 'when there is an order' do + let(:relation) { User.where(state: %w(active blocked banned)).order(order).limit(2) } + let(:expected_query) do + <<~SQL + SELECT + "users".* + FROM + unnest('{active,blocked,banned}'::charactervarying[]) AS "states"("state"), + LATERAL ( + SELECT + "users".* + FROM + "users" + WHERE + (users."state" = "states"."state") + ORDER BY + "users"."user_type" DESC + LIMIT 2 + ) AS users + ORDER BY + "users"."user_type" DESC + LIMIT 2 + SQL + end + + context 'when the order is an Arel node' do + let(:order) { { user_type: :desc } } + + it 'changes the query' do + expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, '')) + end + end + + context 'when the order is a Keyset order' do + let(:order) do + Gitlab::Pagination::Keyset::Order.build([ + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'user_type', + order_expression: User.arel_table['user_type'].desc, + nullable: :not_nullable, + distinct: false + ) + ]) + end + + it 'changes the query' do + expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, '')) + end + end + end + end +end |