Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-08-19 21:10:34 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2020-08-19 21:10:34 +0300
commit2f5731cf536deff075d1011814f271cbb1ed67e2 (patch)
treef6e6dec098a60039b1413dae64d24c0bf55bf03d /lib
parent74b5b3ffcb9fe4d9424bc2bf35e43f749f76d023 (diff)
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib')
-rw-r--r--lib/extracts_ref.rb75
-rw-r--r--lib/gitlab/analytics/unique_visits.rb69
-rw-r--r--lib/gitlab/metrics/dashboard/validator.rb14
-rw-r--r--lib/gitlab/metrics/dashboard/validator/client.rb2
-rw-r--r--lib/gitlab/metrics/dashboard/validator/schemas/panel.json2
-rw-r--r--lib/gitlab/repository_cache_adapter.rb16
-rw-r--r--lib/gitlab/usage_data.rb8
-rw-r--r--lib/gitlab/usage_data_counters/hll_redis_counter.rb149
-rw-r--r--lib/gitlab/usage_data_counters/known_events.yml88
9 files changed, 311 insertions, 112 deletions
diff --git a/lib/extracts_ref.rb b/lib/extracts_ref.rb
index adbbe6c0e50..5ef2d888550 100644
--- a/lib/extracts_ref.rb
+++ b/lib/extracts_ref.rb
@@ -40,50 +40,11 @@ module ExtractsRef
# Returns an Array where the first value is the tree-ish and the second is the
# path
def extract_ref(id)
- pair = ['', '']
-
- return pair unless repository_container
-
- if id =~ /^(\h{40})(.+)/
- # If the ref appears to be a SHA, we're done, just split the string
- pair = $~.captures
- elsif id.exclude?('/')
- # If the ID contains no slash, we must have a ref and no path, so
- # we can skip the Redis calls below
- pair = [id, '']
- else
- # Otherwise, attempt to detect the ref using a list of the repository_container's
- # branches and tags
-
- # Append a trailing slash if we only get a ref and no file path
- unless id.ends_with?('/')
- id = [id, '/'].join
- end
-
- first_path_segment, rest = id.split('/', 2)
-
- if use_first_path_segment?(first_path_segment)
- pair = [first_path_segment, rest]
- else
- valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
-
- if valid_refs.empty?
- # No exact ref match, so just try our best
- pair = id.match(%r{([^/]+)(.*)}).captures
- else
- # There is a distinct possibility that multiple refs prefix the ID.
- # Use the longest match to maximize the chance that we have the
- # right ref.
- best_match = valid_refs.max_by(&:length)
- # Partition the string into the ref and the path, ignoring the empty first value
- pair = id.partition(best_match)[1..-1]
- end
- end
- end
+ pair = extract_raw_ref(id)
[
pair[0].strip,
- pair[1].gsub(%r{^/|/$}, '') # Remove leading and trailing slashes from path
+ pair[1].delete_prefix('/').delete_suffix('/')
]
end
@@ -117,6 +78,38 @@ module ExtractsRef
private
+ def extract_raw_ref(id)
+ return ['', ''] unless repository_container
+
+ # If the ref appears to be a SHA, we're done, just split the string
+ return $~.captures if id =~ /^(\h{40})(.+)/
+
+ # No slash means we must have a ref and no path
+ return [id, ''] unless id.include?('/')
+
+ # Otherwise, attempt to detect the ref using a list of the
+ # repository_container's branches and tags
+
+ # Append a trailing slash if we only get a ref and no file path
+ id = [id, '/'].join unless id.ends_with?('/')
+ first_path_segment, rest = id.split('/', 2)
+
+ return [first_path_segment, rest] if use_first_path_segment?(first_path_segment)
+
+ valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
+
+ # No exact ref match, so just try our best
+ return id.match(%r{([^/]+)(.*)}).captures if valid_refs.empty?
+
+ # There is a distinct possibility that multiple refs prefix the ID.
+ # Use the longest match to maximize the chance that we have the
+ # right ref.
+ best_match = valid_refs.max_by(&:length)
+
+ # Partition the string into the ref and the path, ignoring the empty first value
+ id.partition(best_match)[1..-1]
+ end
+
def use_first_path_segment?(ref)
return false unless ::Feature.enabled?(:extracts_path_optimization)
return false unless repository_container
diff --git a/lib/gitlab/analytics/unique_visits.rb b/lib/gitlab/analytics/unique_visits.rb
index 33ea6644fb0..ad746ebbd42 100644
--- a/lib/gitlab/analytics/unique_visits.rb
+++ b/lib/gitlab/analytics/unique_visits.rb
@@ -3,77 +3,36 @@
module Gitlab
module Analytics
class UniqueVisits
- ANALYTICS_IDS = Set[
- 'g_analytics_contribution',
- 'g_analytics_insights',
- 'g_analytics_issues',
- 'g_analytics_productivity',
- 'g_analytics_valuestream',
- 'p_analytics_pipelines',
- 'p_analytics_code_reviews',
- 'p_analytics_valuestream',
- 'p_analytics_insights',
- 'p_analytics_issues',
- 'p_analytics_repo',
- 'i_analytics_cohorts',
- 'i_analytics_dev_ops_score'
- ]
-
- COMPLIANCE_IDS = Set[
- 'g_compliance_dashboard',
- 'g_compliance_audit_events',
- 'i_compliance_credential_inventory',
- 'i_compliance_audit_events'
- ].freeze
-
- KEY_EXPIRY_LENGTH = 12.weeks
-
def track_visit(visitor_id, target_id, time = Time.zone.now)
- target_key = key(target_id, time)
-
- Gitlab::Redis::HLL.add(key: target_key, value: visitor_id, expiry: KEY_EXPIRY_LENGTH)
+ Gitlab::UsageDataCounters::HLLRedisCounter.track_event(visitor_id, target_id, time)
end
# Returns number of unique visitors for given targets in given time frame
#
# @param [String, Array[<String>]] targets ids of targets to count visits on. Special case for :any
- # @param [ActiveSupport::TimeWithZone] start_week start of time frame
- # @param [Integer] weeks time frame length in weeks
+ # @param [ActiveSupport::TimeWithZone] start_date start of time frame
+ # @param [ActiveSupport::TimeWithZone] end_date end of time frame
# @return [Integer] number of unique visitors
- def unique_visits_for(targets:, start_week: 7.days.ago, weeks: 1)
+ def unique_visits_for(targets:, start_date: 7.days.ago, end_date: start_date + 1.week)
target_ids = if targets == :analytics
- ANALYTICS_IDS
+ self.class.analytics_ids
elsif targets == :compliance
- COMPLIANCE_IDS
+ self.class.compliance_ids
else
Array(targets)
end
- timeframe_start = [start_week, weeks.weeks.ago].min
-
- redis_keys = keys(targets: target_ids, timeframe_start: timeframe_start, weeks: weeks)
-
- Gitlab::Redis::HLL.count(keys: redis_keys)
+ Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: target_ids, start_date: start_date, end_date: end_date)
end
- private
-
- def key(target_id, time)
- target_ids = ANALYTICS_IDS + COMPLIANCE_IDS
-
- raise "Invalid target id #{target_id}" unless target_ids.include?(target_id.to_s)
-
- target_key = target_id.to_s.gsub('analytics', '{analytics}').gsub('compliance', '{compliance}')
-
- year_week = time.strftime('%G-%V')
-
- "#{target_key}-#{year_week}"
- end
+ class << self
+ def analytics_ids
+ Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('analytics')
+ end
- def keys(targets:, timeframe_start:, weeks:)
- (0..(weeks - 1)).map do |week_increment|
- targets.map { |target_id| key(target_id, timeframe_start + week_increment * 7.days) }
- end.flatten
+ def compliance_ids
+ Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('compliance')
+ end
end
end
end
diff --git a/lib/gitlab/metrics/dashboard/validator.rb b/lib/gitlab/metrics/dashboard/validator.rb
index a2450c59886..8edd9c397f9 100644
--- a/lib/gitlab/metrics/dashboard/validator.rb
+++ b/lib/gitlab/metrics/dashboard/validator.rb
@@ -8,18 +8,20 @@ module Gitlab
class << self
def validate(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil)
- errors(content, schema_path, dashboard_path: dashboard_path, project: project).empty?
+ errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project)
+ errors.empty?
end
def validate!(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil)
- errors = errors(content, schema_path, dashboard_path: dashboard_path, project: project)
+ errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project)
errors.empty? || raise(errors.first)
end
- def errors(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil)
- Validator::Client
- .new(content, schema_path, dashboard_path: dashboard_path, project: project)
- .execute
+ private
+
+ def _validate(content, schema_path, dashboard_path: nil, project: nil)
+ client = Validator::Client.new(content, schema_path, dashboard_path: dashboard_path, project: project)
+ client.execute
end
end
end
diff --git a/lib/gitlab/metrics/dashboard/validator/client.rb b/lib/gitlab/metrics/dashboard/validator/client.rb
index 588c677ca28..c63415abcfc 100644
--- a/lib/gitlab/metrics/dashboard/validator/client.rb
+++ b/lib/gitlab/metrics/dashboard/validator/client.rb
@@ -46,7 +46,7 @@ module Gitlab
def validate_against_schema
schemer.validate(content).map do |error|
- ::Gitlab::Metrics::Dashboard::Validator::Errors::SchemaValidationError.new(error)
+ Errors::SchemaValidationError.new(error)
end
end
end
diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/panel.json b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json
index 2ae9608036e..011eef53e40 100644
--- a/lib/gitlab/metrics/dashboard/validator/schemas/panel.json
+++ b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json
@@ -4,7 +4,7 @@
"properties": {
"type": {
"type": "string",
- "enum": ["area-chart", "line-chart", "anomaly-chart", "bar", "column", "stacked-column", "single-stat", "heatmap", "gauge"],
+ "enum": ["area-chart", "anomaly-chart", "bar", "column", "stacked-column", "single-stat", "heatmap"],
"default": "area-chart"
},
"title": { "type": "string" },
diff --git a/lib/gitlab/repository_cache_adapter.rb b/lib/gitlab/repository_cache_adapter.rb
index da8025d2265..f6a5c6ed754 100644
--- a/lib/gitlab/repository_cache_adapter.rb
+++ b/lib/gitlab/repository_cache_adapter.rb
@@ -58,11 +58,19 @@ module Gitlab
# wrong answer. We handle that by querying the full list - which fills
# the cache - and using it directly to answer the question.
define_method("#{name}_include?") do |value|
- if strong_memoized?(name) || !redis_set_cache.exist?(name)
- return __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend
- end
+ ivar = "@#{name}_include"
+ memoized = instance_variable_get(ivar) || {}
+
+ next memoized[value] if memoized.key?(value)
+
+ memoized[value] =
+ if strong_memoized?(name) || !redis_set_cache.exist?(name)
+ __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend
+ else
+ redis_set_cache.include?(name, value)
+ end
- redis_set_cache.include?(name, value)
+ instance_variable_set(ivar, memoized)[value]
end
end
diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb
index 73a80155dbc..70efe86143e 100644
--- a/lib/gitlab/usage_data.rb
+++ b/lib/gitlab/usage_data.rb
@@ -584,21 +584,21 @@ module Gitlab
end
def analytics_unique_visits_data
- results = ::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each_with_object({}) do |target_id, hash|
+ results = ::Gitlab::Analytics::UniqueVisits.analytics_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['analytics_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics) }
- results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, weeks: 4) }
+ results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ analytics_unique_visits: results }
end
def compliance_unique_visits_data
- results = ::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each_with_object({}) do |target_id, hash|
+ results = ::Gitlab::Analytics::UniqueVisits.compliance_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['compliance_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance) }
- results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, weeks: 4) }
+ results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ compliance_unique_visits: results }
end
diff --git a/lib/gitlab/usage_data_counters/hll_redis_counter.rb b/lib/gitlab/usage_data_counters/hll_redis_counter.rb
new file mode 100644
index 00000000000..c9c39225068
--- /dev/null
+++ b/lib/gitlab/usage_data_counters/hll_redis_counter.rb
@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module UsageDataCounters
+ module HLLRedisCounter
+ DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
+ DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
+ DEFAULT_REDIS_SLOT = ''.freeze
+
+ UnknownEvent = Class.new(StandardError)
+ UnknownAggregation = Class.new(StandardError)
+
+ KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze
+ ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
+
+ # Track event on entity_id
+ # Increment a Redis HLL counter for unique event_name and entity_id
+ #
+ # All events should be added to know_events file lib/gitlab/usage_data_counters/known_events.yml
+ #
+ # Event example:
+ #
+ # - name: g_compliance_dashboard # Unique event name
+ # redis_slot: compliance # Optional slot name, if not defined it will use name as a slot, used for totals
+ # category: compliance # Group events in categories
+ # expiry: 29 # Optional expiration time in days, default value 29 days for daily and 6.weeks for weekly
+ # aggregation: daily # Aggregation level, keys are stored daily or weekly
+ #
+ # Usage:
+ #
+ # * Track event: Gitlab::UsageDataCounters::HLLRedisCounter.track_event(user_id, 'g_compliance_dashboard')
+ # * Get unique counts per user: Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_dashboard', start_date: 28.days.ago, end_date: Date.current)
+ class << self
+ def track_event(entity_id, event_name, time = Time.zone.now)
+ event = event_for(event_name)
+
+ raise UnknownEvent.new("Unknown event #{event_name}") unless event.present?
+
+ Gitlab::Redis::HLL.add(key: redis_key(event, time), value: entity_id, expiry: expiry(event))
+ end
+
+ def unique_events(event_names:, start_date:, end_date:)
+ events = events_for(Array(event_names))
+
+ raise 'Events should be in same slot' unless events_in_same_slot?(events)
+ raise 'Events should be in same category' unless events_in_same_category?(events)
+ raise 'Events should have same aggregation level' unless events_same_aggregation?(events)
+
+ aggregation = events.first[:aggregation]
+
+ keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
+
+ Gitlab::Redis::HLL.count(keys: keys)
+ end
+
+ def events_for_category(category)
+ known_events.select { |event| event[:category] == category }.map { |event| event[:name] }
+ end
+
+ private
+
+ def keys_for_aggregation(aggregation, events:, start_date:, end_date:)
+ if aggregation.to_sym == :daily
+ daily_redis_keys(events: events, start_date: start_date, end_date: end_date)
+ else
+ weekly_redis_keys(events: events, start_date: start_date, end_date: end_date)
+ end
+ end
+
+ def known_events
+ @known_events ||= YAML.load_file(Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access)
+ end
+
+ def known_events_names
+ known_events.map { |event| event[:name] }
+ end
+
+ def events_in_same_slot?(events)
+ slot = events.first[:redis_slot]
+ events.all? { |event| event[:redis_slot] == slot }
+ end
+
+ def events_in_same_category?(events)
+ category = events.first[:category]
+ events.all? { |event| event[:category] == category }
+ end
+
+ def events_same_aggregation?(events)
+ aggregation = events.first[:aggregation]
+ events.all? { |event| event[:aggregation] == aggregation }
+ end
+
+ def expiry(event)
+ return event[:expiry] if event[:expiry].present?
+
+ event[:aggregation].to_sym == :daily ? DEFAULT_DAILY_KEY_EXPIRY_LENGTH : DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH
+ end
+
+ def event_for(event_name)
+ known_events.find { |event| event[:name] == event_name }
+ end
+
+ def events_for(event_names)
+ known_events.select { |event| event_names.include?(event[:name]) }
+ end
+
+ def redis_slot(event)
+ event[:redis_slot] || DEFAULT_REDIS_SLOT
+ end
+
+ # Compose the key in order to store events daily or weekly
+ def redis_key(event, time)
+ raise UnknownEvent.new("Unknown event #{event[:name]}") unless known_events_names.include?(event[:name].to_s)
+ raise UnknownAggregation.new("Use :daily or :weekly aggregation") unless ALLOWED_AGGREGATIONS.include?(event[:aggregation].to_sym)
+
+ slot = redis_slot(event)
+ key = if slot.present?
+ event[:name].to_s.gsub(slot, "{#{slot}}")
+ else
+ "{#{event[:name]}}"
+ end
+
+ if event[:aggregation].to_sym == :daily
+ year_day = time.strftime('%G-%j')
+ "#{year_day}-#{key}"
+ else
+ year_week = time.strftime('%G-%V')
+ "#{key}-#{year_week}"
+ end
+ end
+
+ def daily_redis_keys(events:, start_date:, end_date:)
+ (start_date.to_date..end_date.to_date).map do |date|
+ events.map { |event| redis_key(event, date) }
+ end.flatten
+ end
+
+ def weekly_redis_keys(events:, start_date:, end_date:)
+ weeks = end_date.to_date.cweek - start_date.to_date.cweek
+ weeks = 1 if weeks == 0
+
+ (0..(weeks - 1)).map do |week_increment|
+ events.map { |event| redis_key(event, start_date + week_increment * 7.days) }
+ end.flatten
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage_data_counters/known_events.yml b/lib/gitlab/usage_data_counters/known_events.yml
new file mode 100644
index 00000000000..b7e516fa8b1
--- /dev/null
+++ b/lib/gitlab/usage_data_counters/known_events.yml
@@ -0,0 +1,88 @@
+---
+# Compliance category
+- name: g_compliance_dashboard
+ redis_slot: compliance
+ category: compliance
+ expiry: 84 # expiration time in days, equivalent to 12 weeks
+ aggregation: weekly
+- name: g_compliance_audit_events
+ category: compliance
+ redis_slot: compliance
+ expiry: 84
+ aggregation: weekly
+- name: i_compliance_audit_events
+ category: compliance
+ redis_slot: compliance
+ expiry: 84
+ aggregation: weekly
+- name: i_compliance_credential_inventory
+ category: compliance
+ redis_slot: compliance
+ expiry: 84
+ aggregation: weekly
+# Analytics category
+- name: g_analytics_contribution
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: g_analytics_insights
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: g_analytics_issues
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: g_analytics_productivity
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: g_analytics_valuestream
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_pipelines
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_code_reviews
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_valuestream
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_insights
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_issues
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: p_analytics_repo
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: i_analytics_cohorts
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly
+- name: i_analytics_dev_ops_score
+ category: analytics
+ redis_slot: analytics
+ expiry: 84
+ aggregation: weekly