diff options
author | Douwe Maan <douwe@gitlab.com> | 2018-03-01 20:40:24 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2018-03-01 20:40:24 +0300 |
commit | 008120f8db90dcf65fdd00343a4d8a22695abed1 (patch) | |
tree | 274e896be3f9b4b33fd922c4860668becbf74a93 /lib | |
parent | ccb080d94aa765d8391f262e25c5ead0764dc2ff (diff) | |
parent | 522f4b2c6a0a1fe1e93d7bc2f9fe5290fccba4f9 (diff) |
Merge branch '41777-include-cycle-time-in-usage-ping' into 'master'
Resolve "Include cycle time in usage ping"
Closes #41777
See merge request gitlab-org/gitlab-ce!16793
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/cycle_analytics/base_query.rb | 7 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/base_stage.rb | 29 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/production_helper.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/test_stage.rb | 6 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/usage_data.rb | 72 | ||||
-rw-r--r-- | lib/gitlab/database/median.rb | 130 | ||||
-rw-r--r-- | lib/gitlab/usage_data.rb | 5 |
7 files changed, 206 insertions, 47 deletions
diff --git a/lib/gitlab/cycle_analytics/base_query.rb b/lib/gitlab/cycle_analytics/base_query.rb index 8b3bc3e440d..86d708be0d6 100644 --- a/lib/gitlab/cycle_analytics/base_query.rb +++ b/lib/gitlab/cycle_analytics/base_query.rb @@ -8,13 +8,14 @@ module Gitlab private def base_query - @base_query ||= stage_query + @base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables end - def stage_query + def stage_query(project_ids) query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id])) .join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id])) - .where(issue_table[:project_id].eq(@project.id)) # rubocop:disable Gitlab/ModuleWithInstanceVariables + .project(issue_table[:project_id].as("project_id")) + .where(issue_table[:project_id].in(project_ids)) .where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables # Load merge_requests diff --git a/lib/gitlab/cycle_analytics/base_stage.rb b/lib/gitlab/cycle_analytics/base_stage.rb index cac31ea8cff..038d5a19bc4 100644 --- a/lib/gitlab/cycle_analytics/base_stage.rb +++ b/lib/gitlab/cycle_analytics/base_stage.rb @@ -21,17 +21,28 @@ module Gitlab end def median - cte_table = Arel::Table.new("cte_table_for_#{name}") + BatchLoader.for(@project.id).batch(key: name) do |project_ids, loader| + cte_table = Arel::Table.new("cte_table_for_#{name}") - # Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time). - # Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time). - # We compute the (end_time - start_time) interval, and give it an alias based on the current - # cycle analytics stage. - interval_query = Arel::Nodes::As.new( - cte_table, - subtract_datetimes(base_query.dup, start_time_attrs, end_time_attrs, name.to_s)) + # Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time). + # Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time). + # We compute the (end_time - start_time) interval, and give it an alias based on the current + # cycle analytics stage. + interval_query = Arel::Nodes::As.new(cte_table, + subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s)) - median_datetime(cte_table, interval_query, name) + if project_ids.one? + loader.call(@project.id, median_datetime(cte_table, interval_query, name)) + else + begin + median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median| + loader.call(project_id, median) + end + rescue NotSupportedError + {} + end + end + end end def name diff --git a/lib/gitlab/cycle_analytics/production_helper.rb b/lib/gitlab/cycle_analytics/production_helper.rb index 7a889b3877f..d0ca62e46e4 100644 --- a/lib/gitlab/cycle_analytics/production_helper.rb +++ b/lib/gitlab/cycle_analytics/production_helper.rb @@ -1,8 +1,8 @@ module Gitlab module CycleAnalytics module ProductionHelper - def stage_query - super + def stage_query(project_ids) + super(project_ids) .where(mr_metrics_table[:first_deployed_to_production_at] .gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables end diff --git a/lib/gitlab/cycle_analytics/test_stage.rb b/lib/gitlab/cycle_analytics/test_stage.rb index 2b5f72bef89..0e9d235ca79 100644 --- a/lib/gitlab/cycle_analytics/test_stage.rb +++ b/lib/gitlab/cycle_analytics/test_stage.rb @@ -25,11 +25,11 @@ module Gitlab _("Total test time for all commits/merges") end - def stage_query + def stage_query(project_ids) if @options[:branch] - super.where(build_table[:ref].eq(@options[:branch])) + super(project_ids).where(build_table[:ref].eq(@options[:branch])) else - super + super(project_ids) end end end diff --git a/lib/gitlab/cycle_analytics/usage_data.rb b/lib/gitlab/cycle_analytics/usage_data.rb new file mode 100644 index 00000000000..5122e3417ca --- /dev/null +++ b/lib/gitlab/cycle_analytics/usage_data.rb @@ -0,0 +1,72 @@ +module Gitlab + module CycleAnalytics + class UsageData + PROJECTS_LIMIT = 10 + + attr_reader :projects, :options + + def initialize + @projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT) + @options = { from: 7.days.ago } + end + + def to_json + total = 0 + + values = + medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh| + calculations = stage_values(medians) + + total += calculations.values.compact.sum + hsh[stage_name] = calculations + end + + values[:total] = total + + { avg_cycle_analytics: values } + end + + private + + def medians_per_stage + projects.each_with_object({}) do |project, hsh| + ::CycleAnalytics.new(project, options).all_medians_per_stage.each do |stage_name, median| + hsh[stage_name] ||= [] + hsh[stage_name] << median + end + end + end + + def stage_values(medians) + medians = medians.map(&:presence).compact + average = calc_average(medians) + + { + average: average, + sd: standard_deviation(medians, average), + missing: projects.length - medians.length + } + end + + def calc_average(values) + return if values.empty? + + (values.sum / values.length).to_i + end + + def standard_deviation(values, average) + Math.sqrt(sample_variance(values, average)).to_i + end + + def sample_variance(values, average) + return 0 if values.length <= 1 + + sum = values.inject(0) do |acc, val| + acc + (val - average)**2 + end + + sum / (values.length - 1) + end + end + end +end diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 059054ac9ff..74fed447289 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -2,18 +2,14 @@ module Gitlab module Database module Median + NotSupportedError = Class.new(StandardError) + def median_datetime(arel_table, query_so_far, column_sym) - median_queries = - if Gitlab::Database.postgresql? - pg_median_datetime_sql(arel_table, query_so_far, column_sym) - elsif Gitlab::Database.mysql? - mysql_median_datetime_sql(arel_table, query_so_far, column_sym) - end - - results = Array.wrap(median_queries).map do |query| - ActiveRecord::Base.connection.execute(query) - end - extract_median(results).presence + extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence + end + + def median_datetimes(arel_table, query_so_far, column_sym, partition_column) + extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence end def extract_median(results) @@ -21,13 +17,21 @@ module Gitlab if Gitlab::Database.postgresql? result = result.first.presence - median = result['median'] if result - median.to_f if median + + result['median']&.to_f if result elsif Gitlab::Database.mysql? result.to_a.flatten.first end end + def extract_medians(results) + median_values = results.compact.first.values + + median_values.each_with_object({}) do |(id, median), hash| + hash[id.to_i] = median&.to_f + end + end + def mysql_median_datetime_sql(arel_table, query_so_far, column_sym) query = arel_table .from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)) @@ -53,7 +57,7 @@ module Gitlab ] end - def pg_median_datetime_sql(arel_table, query_so_far, column_sym) + def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil) # Create a CTE with the column we're operating on, row number (after sorting by the column # we're operating on), and count of the table we're operating on (duplicated across) all rows # of the CTE. For example, if we're looking to find the median of the `projects.star_count` @@ -64,41 +68,107 @@ module Gitlab # 5 | 1 | 3 # 9 | 2 | 3 # 15 | 3 | 3 + # + # If a partition column is used we will do the same operation but for separate partitions, + # when that happens the CTE might look like this: + # + # project_id | star_count | row_id | ct + # ------------+------------+--------+---- + # 1 | 5 | 1 | 2 + # 1 | 9 | 2 | 2 + # 2 | 10 | 1 | 3 + # 2 | 15 | 2 | 3 + # 2 | 20 | 3 | 3 cte_table = Arel::Table.new("ordered_records") + cte = Arel::Nodes::As.new( cte_table, - arel_table - .project( - arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). + arel_table.project(*rank_rows(arel_table, column_sym, partition_column)). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) # From the CTE, select either the middle row or the middle two rows (this is accomplished # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. - cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")) - .where( - Arel::Nodes::Between.new( - cte_table[:row_id], - Arel::Nodes::And.new( - [(cte_table[:ct] / Arel.sql('2.0')), - (cte_table[:ct] / Arel.sql('2.0') + 1)] + result = + cte_table + .project(*median_projections(cte_table, column_sym, partition_column)) + .where( + Arel::Nodes::Between.new( + cte_table[:row_id], + Arel::Nodes::And.new( + [(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)] + ) ) ) - ) - .with(query_so_far, cte) - .to_sql + .with(query_so_far, cte) + + result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column + + result.to_sql end private + def median_queries(arel_table, query_so_far, column_sym, partition_column = nil) + if Gitlab::Database.postgresql? + pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) + elsif Gitlab::Database.mysql? + raise NotSupportedError, "partition_column is not supported for MySQL" if partition_column + + mysql_median_datetime_sql(arel_table, query_so_far, column_sym) + end + end + + def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil) + queries = median_queries(arel_table, query_so_far, column_sym, partition_column) + + Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) } + end + def average(args, as) Arel::Nodes::NamedFunction.new("AVG", args, as) end + def rank_rows(arel_table, column_sym, partition_column) + column_row = arel_table[column_sym].as(column_sym.to_s) + + if partition_column + partition_row = arel_table[partition_column] + row_id = + Arel::Nodes::Over.new( + Arel::Nodes::NamedFunction.new('rank', []), + Arel::Nodes::Window.new.partition(arel_table[partition_column]) + .order(arel_table[column_sym]) + ).as('row_id') + + count = arel_table.from(arel_table.alias) + .project('COUNT(*)') + .where(arel_table[partition_column].eq(arel_table.alias[partition_column])) + .as('ct') + + [partition_row, column_row, row_id, count] + else + row_id = + Arel::Nodes::Over.new( + Arel::Nodes::NamedFunction.new('row_number', []), + Arel::Nodes::Window.new.order(arel_table[column_sym]) + ).as('row_id') + + count = arel_table.project("COUNT(1)").as('ct') + + [column_row, row_id, count] + end + end + + def median_projections(table, column_sym, partition_column) + projections = [] + projections << table[partition_column] if partition_column + projections << average([extract_epoch(table[column_sym])], "median") + projections + end + def extract_epoch(arel_attribute) Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")}) end diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index 9d13d1d781f..37d3512990e 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -9,6 +9,7 @@ module Gitlab license_usage_data.merge(system_usage_data) .merge(features_usage_data) .merge(components_usage_data) + .merge(cycle_analytics_usage_data) end def to_json(force_refresh: false) @@ -71,6 +72,10 @@ module Gitlab } end + def cycle_analytics_usage_data + Gitlab::CycleAnalytics::UsageData.new.to_json + end + def features_usage_data features_usage_data_ce end |