Welcome to mirror list, hosted at ThFree Co, Russian Federation.

update_build_state_service.rb « ci « services « app - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 874f4bf459a0e6d3930bd3b5469beb5776c5dfc1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# frozen_string_literal: true

module Ci
  class UpdateBuildStateService
    include ::Gitlab::Utils::StrongMemoize
    include ::Gitlab::ExclusiveLeaseHelpers

    Result = Struct.new(:status, :backoff, keyword_init: true)
    InvalidTraceError = Class.new(StandardError)

    ACCEPT_TIMEOUT = 5.minutes.freeze

    attr_reader :build, :params, :metrics

    def initialize(build, params, metrics = ::Gitlab::Ci::Trace::Metrics.new)
      @build = build
      @params = params
      @metrics = metrics
    end

    def execute
      overwrite_trace! if has_trace?

      unless accept_available?
        return update_build_state!
      end

      ensure_pending_state!

      in_build_trace_lock do
        process_build_state!
      end
    end

    private

    def overwrite_trace!
      metrics.increment_trace_operation(operation: :overwrite)

      build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
    end

    def ensure_pending_state!
      pending_state.created_at
    end

    def process_build_state!
      if live_chunks_pending?
        if pending_state_outdated?
          discard_build_trace!
          update_build_state!
        else
          accept_build_state!
        end
      else
        validate_build_trace!
        update_build_state!
      end
    end

    def accept_build_state!
      build.trace_chunks.live.find_each do |chunk|
        chunk.schedule_to_persist!
      end

      metrics.increment_trace_operation(operation: :accepted)

      ::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff|
        Result.new(status: 202, backoff: backoff.to_seconds)
      end
    end

    def validate_build_trace!
      return unless has_chunks?

      unless live_chunks_pending?
        metrics.increment_trace_operation(operation: :finalized)
        metrics.observe_migration_duration(pending_state_seconds)
      end

      ::Gitlab::Ci::Trace::Checksum.new(build).then do |checksum|
        unless checksum.valid?
          metrics.increment_trace_operation(operation: :invalid)

          if checksum.corrupted?
            metrics.increment_trace_operation(operation: :corrupted)
          end

          next unless log_invalid_chunks?

          ::Gitlab::ErrorTracking.log_exception(InvalidTraceError.new,
            project_path: build.project.full_path,
            build_id: build.id,
            state_crc32: checksum.state_crc32,
            chunks_crc32: checksum.chunks_crc32,
            chunks_count: checksum.chunks_count,
            chunks_corrupted: checksum.corrupted?
          )
        end
      end
    end

    def update_build_state!
      case build_state
      when 'running'
        build.touch if build.needs_touch?

        Result.new(status: 200)
      when 'success'
        build.success!

        Result.new(status: 200)
      when 'failed'
        build.drop_with_exit_code!(params[:failure_reason] || :unknown_failure, params[:exit_code])

        Result.new(status: 200)
      else
        Result.new(status: 400)
      end
    end

    def discard_build_trace!
      metrics.increment_trace_operation(operation: :discarded)
    end

    def accept_available?
      !build_running? && has_checksum? && chunks_migration_enabled?
    end

    def live_chunks_pending?
      build.trace_chunks.live.any?
    end

    def has_chunks?
      build.trace_chunks.any?
    end

    def pending_state_outdated?
      pending_state_duration > ACCEPT_TIMEOUT
    end

    def pending_state_duration
      Time.current - pending_state.created_at
    end

    def pending_state_seconds
      pending_state_duration.seconds
    end

    def build_state
      params.dig(:state).to_s
    end

    def has_trace?
      params.dig(:trace).present?
    end

    def has_checksum?
      trace_checksum.present?
    end

    def build_running?
      build_state == 'running'
    end

    def trace_checksum
      params.dig(:output, :checksum) || params.dig(:checksum)
    end

    def trace_bytesize
      params.dig(:output, :bytesize)
    end

    def pending_state
      strong_memoize(:pending_state) { ensure_pending_state }
    end

    def ensure_pending_state
      build_state = Ci::BuildPendingState.safe_find_or_create_by(
        build_id: build.id,
        state: params.fetch(:state),
        trace_checksum: trace_checksum,
        trace_bytesize: trace_bytesize,
        failure_reason: params.dig(:failure_reason)
      )

      unless build_state.present?
        metrics.increment_trace_operation(operation: :conflict)
      end

      build_state || build.pending_state
    end

    ##
    # This method is releasing an exclusive lock on a build trace the moment we
    # conclude that build status has been written and the build state update
    # has been committed to the database.
    #
    # Because a build state machine schedules a bunch of workers to run after
    # build status transition to complete, we do not want to keep the lease
    # until all the workers are scheduled because it opens a possibility of
    # race conditions happening.
    #
    # Instead of keeping the lease until the transition is fully done and
    # workers are scheduled, we immediately release the lock after the database
    # commit happens.
    #
    def in_build_trace_lock(&block)
      build.trace.lock do |_, lease| # rubocop:disable CodeReuse/ActiveRecord
        build.run_on_status_commit { lease.cancel }

        yield
      end
    rescue ::Gitlab::Ci::Trace::LockedError
      metrics.increment_trace_operation(operation: :locked)

      accept_build_state!
    end

    def chunks_migration_enabled?
      ::Gitlab::Ci::Features.accept_trace?(build.project)
    end

    def log_invalid_chunks?
      ::Gitlab::Ci::Features.log_invalid_trace_chunks?(build.project)
    end
  end
end