From 25d8c8d1f0846f563745da99e4e16fba8c268b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Coutable?= Date: Mon, 15 Oct 2018 18:06:44 +0200 Subject: Improve automated Review Apps cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémy Coutable --- scripts/review_apps/automated_cleanup.rb | 102 +++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 25 deletions(-) (limited to 'scripts') diff --git a/scripts/review_apps/automated_cleanup.rb b/scripts/review_apps/automated_cleanup.rb index a5f0ec372d8..4166070f7cd 100755 --- a/scripts/review_apps/automated_cleanup.rb +++ b/scripts/review_apps/automated_cleanup.rb @@ -5,12 +5,26 @@ require_relative File.expand_path('../../lib/quality/helm_client.rb', __dir__) require_relative File.expand_path('../../lib/quality/kubernetes_client.rb', __dir__) class AutomatedCleanup - attr_reader :project_path, :gitlab_token, :cleaned_up_releases + attr_reader :project_path, :gitlab_token + + DEPLOYMENTS_PER_PAGE = 100 + HELM_RELEASES_BATCH_SIZE = 5 + IGNORED_HELM_ERRORS = [ + 'transport is closing', + 'error upgrading connection' + ].freeze + IGNORED_KUBERNETES_ERRORS = [ + 'NotFound' + ].freeze + + def self.ee? + ENV['CI_PROJECT_NAME'] == 'gitlab-ee' || File.exist?('CHANGELOG-EE.md') + end def initialize(project_path: ENV['CI_PROJECT_PATH'], gitlab_token: ENV['GITLAB_BOT_REVIEW_APPS_CLEANUP_TOKEN']) @project_path = project_path @gitlab_token = gitlab_token - @cleaned_up_releases = [] + ENV['TILLER_NAMESPACE'] ||= review_apps_namespace end def gitlab @@ -25,12 +39,16 @@ class AutomatedCleanup end end + def review_apps_namespace + self.class.ee? ? 'review-apps-ee' : 'review-apps-ce' + end + def helm - @helm ||= Quality::HelmClient.new + @helm ||= Quality::HelmClient.new(namespace: review_apps_namespace) end def kubernetes - @kubernetes ||= Quality::KubernetesClient.new + @kubernetes ||= Quality::KubernetesClient.new(namespace: review_apps_namespace) end def perform_gitlab_environment_cleanup!(days_for_stop:, days_for_delete:) @@ -39,26 +57,27 @@ class AutomatedCleanup checked_environments = [] delete_threshold = threshold_time(days: days_for_delete) stop_threshold = threshold_time(days: days_for_stop) - gitlab.deployments(project_path, per_page: 50).auto_paginate do |deployment| - next unless deployment.environment.name.start_with?('review/') - next if checked_environments.include?(deployment.environment.slug) - puts + gitlab.deployments(project_path, per_page: DEPLOYMENTS_PER_PAGE).auto_paginate do |deployment| + environment = deployment.environment - checked_environments << deployment.environment.slug - deployed_at = Time.parse(deployment.created_at) + next unless environment.name.start_with?('review/') + next if checked_environments.include?(environment.slug) + + last_deploy = deployment.created_at + deployed_at = Time.parse(last_deploy) if deployed_at < delete_threshold - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'deleting') - gitlab.delete_environment(project_path, deployment.environment.id) - cleaned_up_releases << deployment.environment.slug + delete_environment(environment, deployment) + release = Quality::HelmClient::Release.new(environment.slug, 1, deployed_at.to_s, nil, nil, review_apps_namespace) + delete_helm_release(release) elsif deployed_at < stop_threshold - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'stopping') - gitlab.stop_environment(project_path, deployment.environment.id) - cleaned_up_releases << deployment.environment.slug + stop_environment(environment, deployment) else - print_release_state(subject: 'Review app', release_name: deployment.environment.slug, release_date: deployment.created_at, action: 'leaving') + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: last_deploy, action: 'leaving') end + + checked_environments << environment.slug end end @@ -66,25 +85,58 @@ class AutomatedCleanup puts "Checking for Helm releases not updated in the last #{days} days..." threshold_day = threshold_time(days: days) - helm.releases(args: ['--deployed', '--failed', '--date', '--reverse', '--max 25']).each do |release| - next if cleaned_up_releases.include?(release.name) - if release.last_update < threshold_day - print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'cleaning') - helm.delete(release_name: release.name) - kubernetes.cleanup(release_name: release.name) + helm_releases.each do |release| + if release.status == 'FAILED' || release.last_update < threshold_day + delete_helm_release(release) else print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'leaving') end end end + private + + def delete_environment(environment, deployment) + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'deleting') + gitlab.delete_environment(project_path, environment.id) + end + + def stop_environment(environment, deployment) + print_release_state(subject: 'Review app', release_name: environment.slug, release_date: deployment.created_at, action: 'stopping') + gitlab.stop_environment(project_path, environment.id) + end + + def helm_releases + args = ['--all', '--date', "--max #{HELM_RELEASES_BATCH_SIZE}"] + + helm.releases(args: args) + end + + def delete_helm_release(release) + print_release_state(subject: 'Release', release_name: release.name, release_status: release.status, release_date: release.last_update, action: 'cleaning') + helm.delete(release_name: release.name) + kubernetes.cleanup(release_name: release.name) + rescue Quality::HelmClient::CommandFailedError => ex + raise ex unless ignore_exception?(ex.message, IGNORED_HELM_ERRORS) + + puts "Ignoring the following Helm error:\n#{ex}\n" + rescue Quality::KubernetesClient::CommandFailedError => ex + raise ex unless ignore_exception?(ex.message, IGNORED_KUBERNETES_ERRORS) + + puts "Ignoring the following Kubernetes error:\n#{ex}\n" + end + def threshold_time(days:) Time.now - days * 24 * 3600 end - def print_release_state(subject:, release_name:, release_date:, action:) - puts "\n#{subject} '#{release_name}' was last deployed on #{release_date}: #{action} it." + def ignore_exception?(exception_message, exceptions_ignored) + exception_message.match?(/(#{exceptions_ignored})/) + end + + def print_release_state(subject:, release_name:, release_date:, action:, release_status: nil) + puts "\n#{subject} '#{release_name}' #{"(#{release_status}) " if release_status}was last deployed on #{release_date}: #{action} it.\n" end end -- cgit v1.2.3