Welcome to mirror list, hosted at ThFree Co, Russian Federation.

intersection.rb « calculations « sources « aggregates « metrics « usage « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c8c248905f7f6fa39b91c2ae110dc330fbfc86d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# frozen_string_literal: true

module Gitlab
  module Usage
    module Metrics
      module Aggregates
        module Sources
          module Calculations
            module Intersection
              def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, subset_powers_cache: Hash.new({}))
                # calculate power of intersection of all given metrics from inclusion exclusion principle
                # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|)  =>
                # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
                # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
                # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|

                # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) -  ...
                subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache)

                # calculate last component of the equation  |A & B & C & D| = .... - |A + B + C + D|
                power_of_union_of_all_metrics = subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
                  calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)

                # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
                # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
                # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C|  =>
                # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
                # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
                # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
                subset_powers_size_even = subset_powers_data.size.even?

                # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) -  ... =>
                sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)

                # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
                sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
              end

              private

              def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache)
                subset_sizes = (1...metric_names.size)

                subset_sizes.map do |subset_size|
                  if subset_size > 1
                    # calculate sum of powers of intersection between each subset (with given size) of metrics:  #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
                    metric_names.combination(subset_size).sum do |metrics_subset|
                      subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
                        calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache)
                    end
                  else
                    # calculate sum of powers of each set (metric) alone  #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
                    metric_names.sum do |metric|
                      subset_powers_cache[subset_size][metric] ||= \
                        calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
                    end
                  end
                end
              end

              def sum_subset_powers(subset_powers_data, subset_powers_size_even)
                sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
                  (index + 1).odd? ? value : -value
                end

                (subset_powers_size_even ? -1 : 1) * sum_without_sign
              end
            end
          end
        end
      end
    end
  end
end