Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2018-03-01 20:40:24 +0300
committerDouwe Maan <douwe@gitlab.com>2018-03-01 20:40:24 +0300
commit008120f8db90dcf65fdd00343a4d8a22695abed1 (patch)
tree274e896be3f9b4b33fd922c4860668becbf74a93 /lib
parentccb080d94aa765d8391f262e25c5ead0764dc2ff (diff)
parent522f4b2c6a0a1fe1e93d7bc2f9fe5290fccba4f9 (diff)
Merge branch '41777-include-cycle-time-in-usage-ping' into 'master'
Resolve "Include cycle time in usage ping" Closes #41777 See merge request gitlab-org/gitlab-ce!16793
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/cycle_analytics/base_query.rb7
-rw-r--r--lib/gitlab/cycle_analytics/base_stage.rb29
-rw-r--r--lib/gitlab/cycle_analytics/production_helper.rb4
-rw-r--r--lib/gitlab/cycle_analytics/test_stage.rb6
-rw-r--r--lib/gitlab/cycle_analytics/usage_data.rb72
-rw-r--r--lib/gitlab/database/median.rb130
-rw-r--r--lib/gitlab/usage_data.rb5
7 files changed, 206 insertions, 47 deletions
diff --git a/lib/gitlab/cycle_analytics/base_query.rb b/lib/gitlab/cycle_analytics/base_query.rb
index 8b3bc3e440d..86d708be0d6 100644
--- a/lib/gitlab/cycle_analytics/base_query.rb
+++ b/lib/gitlab/cycle_analytics/base_query.rb
@@ -8,13 +8,14 @@ module Gitlab
private
def base_query
- @base_query ||= stage_query
+ @base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
- def stage_query
+ def stage_query(project_ids)
query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id]))
.join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id]))
- .where(issue_table[:project_id].eq(@project.id)) # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ .project(issue_table[:project_id].as("project_id"))
+ .where(issue_table[:project_id].in(project_ids))
.where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
# Load merge_requests
diff --git a/lib/gitlab/cycle_analytics/base_stage.rb b/lib/gitlab/cycle_analytics/base_stage.rb
index cac31ea8cff..038d5a19bc4 100644
--- a/lib/gitlab/cycle_analytics/base_stage.rb
+++ b/lib/gitlab/cycle_analytics/base_stage.rb
@@ -21,17 +21,28 @@ module Gitlab
end
def median
- cte_table = Arel::Table.new("cte_table_for_#{name}")
+ BatchLoader.for(@project.id).batch(key: name) do |project_ids, loader|
+ cte_table = Arel::Table.new("cte_table_for_#{name}")
- # Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time).
- # Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time).
- # We compute the (end_time - start_time) interval, and give it an alias based on the current
- # cycle analytics stage.
- interval_query = Arel::Nodes::As.new(
- cte_table,
- subtract_datetimes(base_query.dup, start_time_attrs, end_time_attrs, name.to_s))
+ # Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time).
+ # Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time).
+ # We compute the (end_time - start_time) interval, and give it an alias based on the current
+ # cycle analytics stage.
+ interval_query = Arel::Nodes::As.new(cte_table,
+ subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s))
- median_datetime(cte_table, interval_query, name)
+ if project_ids.one?
+ loader.call(@project.id, median_datetime(cte_table, interval_query, name))
+ else
+ begin
+ median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
+ loader.call(project_id, median)
+ end
+ rescue NotSupportedError
+ {}
+ end
+ end
+ end
end
def name
diff --git a/lib/gitlab/cycle_analytics/production_helper.rb b/lib/gitlab/cycle_analytics/production_helper.rb
index 7a889b3877f..d0ca62e46e4 100644
--- a/lib/gitlab/cycle_analytics/production_helper.rb
+++ b/lib/gitlab/cycle_analytics/production_helper.rb
@@ -1,8 +1,8 @@
module Gitlab
module CycleAnalytics
module ProductionHelper
- def stage_query
- super
+ def stage_query(project_ids)
+ super(project_ids)
.where(mr_metrics_table[:first_deployed_to_production_at]
.gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
diff --git a/lib/gitlab/cycle_analytics/test_stage.rb b/lib/gitlab/cycle_analytics/test_stage.rb
index 2b5f72bef89..0e9d235ca79 100644
--- a/lib/gitlab/cycle_analytics/test_stage.rb
+++ b/lib/gitlab/cycle_analytics/test_stage.rb
@@ -25,11 +25,11 @@ module Gitlab
_("Total test time for all commits/merges")
end
- def stage_query
+ def stage_query(project_ids)
if @options[:branch]
- super.where(build_table[:ref].eq(@options[:branch]))
+ super(project_ids).where(build_table[:ref].eq(@options[:branch]))
else
- super
+ super(project_ids)
end
end
end
diff --git a/lib/gitlab/cycle_analytics/usage_data.rb b/lib/gitlab/cycle_analytics/usage_data.rb
new file mode 100644
index 00000000000..5122e3417ca
--- /dev/null
+++ b/lib/gitlab/cycle_analytics/usage_data.rb
@@ -0,0 +1,72 @@
+module Gitlab
+ module CycleAnalytics
+ class UsageData
+ PROJECTS_LIMIT = 10
+
+ attr_reader :projects, :options
+
+ def initialize
+ @projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT)
+ @options = { from: 7.days.ago }
+ end
+
+ def to_json
+ total = 0
+
+ values =
+ medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh|
+ calculations = stage_values(medians)
+
+ total += calculations.values.compact.sum
+ hsh[stage_name] = calculations
+ end
+
+ values[:total] = total
+
+ { avg_cycle_analytics: values }
+ end
+
+ private
+
+ def medians_per_stage
+ projects.each_with_object({}) do |project, hsh|
+ ::CycleAnalytics.new(project, options).all_medians_per_stage.each do |stage_name, median|
+ hsh[stage_name] ||= []
+ hsh[stage_name] << median
+ end
+ end
+ end
+
+ def stage_values(medians)
+ medians = medians.map(&:presence).compact
+ average = calc_average(medians)
+
+ {
+ average: average,
+ sd: standard_deviation(medians, average),
+ missing: projects.length - medians.length
+ }
+ end
+
+ def calc_average(values)
+ return if values.empty?
+
+ (values.sum / values.length).to_i
+ end
+
+ def standard_deviation(values, average)
+ Math.sqrt(sample_variance(values, average)).to_i
+ end
+
+ def sample_variance(values, average)
+ return 0 if values.length <= 1
+
+ sum = values.inject(0) do |acc, val|
+ acc + (val - average)**2
+ end
+
+ sum / (values.length - 1)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb
index 059054ac9ff..74fed447289 100644
--- a/lib/gitlab/database/median.rb
+++ b/lib/gitlab/database/median.rb
@@ -2,18 +2,14 @@
module Gitlab
module Database
module Median
+ NotSupportedError = Class.new(StandardError)
+
def median_datetime(arel_table, query_so_far, column_sym)
- median_queries =
- if Gitlab::Database.postgresql?
- pg_median_datetime_sql(arel_table, query_so_far, column_sym)
- elsif Gitlab::Database.mysql?
- mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
- end
-
- results = Array.wrap(median_queries).map do |query|
- ActiveRecord::Base.connection.execute(query)
- end
- extract_median(results).presence
+ extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence
+ end
+
+ def median_datetimes(arel_table, query_so_far, column_sym, partition_column)
+ extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence
end
def extract_median(results)
@@ -21,13 +17,21 @@ module Gitlab
if Gitlab::Database.postgresql?
result = result.first.presence
- median = result['median'] if result
- median.to_f if median
+
+ result['median']&.to_f if result
elsif Gitlab::Database.mysql?
result.to_a.flatten.first
end
end
+ def extract_medians(results)
+ median_values = results.compact.first.values
+
+ median_values.each_with_object({}) do |(id, median), hash|
+ hash[id.to_i] = median&.to_f
+ end
+ end
+
def mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
query = arel_table
.from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name))
@@ -53,7 +57,7 @@ module Gitlab
]
end
- def pg_median_datetime_sql(arel_table, query_so_far, column_sym)
+ def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
@@ -64,41 +68,107 @@ module Gitlab
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
+ #
+ # If a partition column is used we will do the same operation but for separate partitions,
+ # when that happens the CTE might look like this:
+ #
+ # project_id | star_count | row_id | ct
+ # ------------+------------+--------+----
+ # 1 | 5 | 1 | 2
+ # 1 | 9 | 2 | 2
+ # 2 | 10 | 1 | 3
+ # 2 | 15 | 2 | 3
+ # 2 | 20 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
+
cte = Arel::Nodes::As.new(
cte_table,
- arel_table
- .project(
- arel_table[column_sym].as(column_sym.to_s),
- Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
- Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
- arel_table.project("COUNT(1)").as('ct')).
+ arel_table.project(*rank_rows(arel_table, column_sym, partition_column)).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
- cte_table.project(average([extract_epoch(cte_table[column_sym])], "median"))
- .where(
- Arel::Nodes::Between.new(
- cte_table[:row_id],
- Arel::Nodes::And.new(
- [(cte_table[:ct] / Arel.sql('2.0')),
- (cte_table[:ct] / Arel.sql('2.0') + 1)]
+ result =
+ cte_table
+ .project(*median_projections(cte_table, column_sym, partition_column))
+ .where(
+ Arel::Nodes::Between.new(
+ cte_table[:row_id],
+ Arel::Nodes::And.new(
+ [(cte_table[:ct] / Arel.sql('2.0')),
+ (cte_table[:ct] / Arel.sql('2.0') + 1)]
+ )
)
)
- )
- .with(query_so_far, cte)
- .to_sql
+ .with(query_so_far, cte)
+
+ result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column
+
+ result.to_sql
end
private
+ def median_queries(arel_table, query_so_far, column_sym, partition_column = nil)
+ if Gitlab::Database.postgresql?
+ pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
+ elsif Gitlab::Database.mysql?
+ raise NotSupportedError, "partition_column is not supported for MySQL" if partition_column
+
+ mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
+ end
+ end
+
+ def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil)
+ queries = median_queries(arel_table, query_so_far, column_sym, partition_column)
+
+ Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) }
+ end
+
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
+ def rank_rows(arel_table, column_sym, partition_column)
+ column_row = arel_table[column_sym].as(column_sym.to_s)
+
+ if partition_column
+ partition_row = arel_table[partition_column]
+ row_id =
+ Arel::Nodes::Over.new(
+ Arel::Nodes::NamedFunction.new('rank', []),
+ Arel::Nodes::Window.new.partition(arel_table[partition_column])
+ .order(arel_table[column_sym])
+ ).as('row_id')
+
+ count = arel_table.from(arel_table.alias)
+ .project('COUNT(*)')
+ .where(arel_table[partition_column].eq(arel_table.alias[partition_column]))
+ .as('ct')
+
+ [partition_row, column_row, row_id, count]
+ else
+ row_id =
+ Arel::Nodes::Over.new(
+ Arel::Nodes::NamedFunction.new('row_number', []),
+ Arel::Nodes::Window.new.order(arel_table[column_sym])
+ ).as('row_id')
+
+ count = arel_table.project("COUNT(1)").as('ct')
+
+ [column_row, row_id, count]
+ end
+ end
+
+ def median_projections(table, column_sym, partition_column)
+ projections = []
+ projections << table[partition_column] if partition_column
+ projections << average([extract_epoch(table[column_sym])], "median")
+ projections
+ end
+
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb
index 9d13d1d781f..37d3512990e 100644
--- a/lib/gitlab/usage_data.rb
+++ b/lib/gitlab/usage_data.rb
@@ -9,6 +9,7 @@ module Gitlab
license_usage_data.merge(system_usage_data)
.merge(features_usage_data)
.merge(components_usage_data)
+ .merge(cycle_analytics_usage_data)
end
def to_json(force_refresh: false)
@@ -71,6 +72,10 @@ module Gitlab
}
end
+ def cycle_analytics_usage_data
+ Gitlab::CycleAnalytics::UsageData.new.to_json
+ end
+
def features_usage_data
features_usage_data_ce
end