diff options
Diffstat (limited to 'scripts/duo_chat/reporter.rb')
-rwxr-xr-x | scripts/duo_chat/reporter.rb | 204 |
1 files changed, 138 insertions, 66 deletions
diff --git a/scripts/duo_chat/reporter.rb b/scripts/duo_chat/reporter.rb index 686a49164a7..0136c39ccb1 100755 --- a/scripts/duo_chat/reporter.rb +++ b/scripts/duo_chat/reporter.rb @@ -5,7 +5,10 @@ require 'gitlab' require 'json' class Reporter - IDENTIFIABLE_NOTE_TAG = 'gitlab-org/ai-powered/ai-framework:duo-chat-qa-evaluation-' + GITLAB_COM_API_V4_ENDPOINT = "https://gitlab.com/api/v4" + QA_EVALUATION_PROJECT_ID = 52020045 # https://gitlab.com/gitlab-org/ai-powered/ai-framework/qa-evaluation + AGGREGATED_REPORT_ISSUE_IID = 1 # https://gitlab.com/gitlab-org/ai-powered/ai-framework/qa-evaluation/-/issues/1 + IDENTIFIABLE_NOTE_TAG = 'gitlab-org/ai-powered/ai-framework:duo-chat-qa-evaluation' GRADE_TO_EMOJI_MAPPING = { correct: ":white_check_mark:", @@ -14,60 +17,19 @@ class Reporter }.freeze def run - merge_request_iid = ENV['CI_MERGE_REQUEST_IID'] - ci_project_id = ENV['CI_PROJECT_ID'] - - puts "Saving #{artifact_path}" - File.write(artifact_path, report_note) - - # Look for an existing note - report_notes = com_gitlab_client - .merge_request_notes(ci_project_id, merge_request_iid) - .auto_paginate - .select do |note| - note.body.include? note_identifier_tag - end - - note = report_notes.max_by { |note| Time.parse(note.created_at) } - - if note && note.type != 'DiscussionNote' - # The latest note has not led to a discussion. Update it. - com_gitlab_client.edit_merge_request_note(ci_project_id, merge_request_iid, note.id, report_note) - - puts "Updated comment." + if pipeline_running_on_master_branch? + snippet_web_url = upload_data_as_snippet + report_issue_url = create_report_issue + update_aggregation_issue(report_issue_url, snippet_web_url) else - # This is the first note or the latest note has been discussed on the MR. - # Don't update, create new note instead. - com_gitlab_client.create_merge_request_note(ci_project_id, merge_request_iid, report_note) - - puts "Posted comment." + save_report_as_artifact + post_or_update_report_note end end - private - - def report_filename - "#{ENV['DUO_RSPEC']}.md" - end - - def artifact_path - File.join(ENV['CI_PROJECT_DIR'], report_filename) - end - - def note_identifier_tag - "#{IDENTIFIABLE_NOTE_TAG}#{ENV['DUO_RSPEC']}" - end - - def com_gitlab_client - @com_gitlab_client ||= Gitlab.client( - endpoint: "https://gitlab.com/api/v4", - private_token: ENV['PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE'] - ) - end - - def report_note - report = <<~MARKDOWN - <!-- #{note_identifier_tag} --> + def markdown_report + @report ||= <<~MARKDOWN + <!-- #{IDENTIFIABLE_NOTE_TAG} --> ## GitLab Duo Chat QA evaluation @@ -93,7 +55,7 @@ class Reporter - Note: if an evaluation request failed or its response was not parsable, it was ignored. For example, :x: :warning: would count as `INCORRECT`. - - The number of evaluations in which LLMs disagreed: #{summary_numbers[:disagreed]} (#{summary_numbers[:disagreed_ratio]}%) + - The number of evaluations in which LLMs disagreed: #{summary_numbers[:disagreed]} (#{summary_numbers[:disagreed_ratio]}%) ### Evaluations @@ -103,29 +65,137 @@ class Reporter MARKDOWN - if report.length > 1000000 - return <<~MARKDOWN - <!-- #{note_identifier_tag} --> + # Do this to avoid pinging users in notes/issues. + quote_usernames(@report) + end - ## GitLab Duo Chat QA evaluation + private - Report generated for "#{ENV['CI_JOB_NAME']}". This report is generated and refreshed automatically. Do not edit. + def quote_usernames(text) + text.gsub(/(@\w+)/, '`\\1`') + end - **:warning: the evaluation report is too long (> `1000000`) and cannot be posted as a note.** + def pipeline_running_on_master_branch? + ENV['CI_COMMIT_BRANCH'] == ENV['CI_DEFAULT_BRANCH'] + end - Please check out the artifact for the CI job "#{ENV['CI_JOB_NAME']}": + def utc_timestamp + @utc_timestamp ||= Time.now.utc + end - https://gitlab.com/gitlab-org/gitlab/-/jobs/#{ENV['CI_JOB_ID']}/artifacts/file/#{report_filename} + def upload_data_as_snippet + filename = "#{utc_timestamp.to_i}.json" + title = utc_timestamp.to_s + snippet_content = ::JSON.pretty_generate({ + commit: ENV["CI_COMMIT_SHA"], + pipeline_url: ENV["CI_PIPELINE_URL"], + data: report_data + }) + + puts "Creating a snippet #{filename}." + snippet = qa_evaluation_project_client.create_snippet( + QA_EVALUATION_PROJECT_ID, + { + title: title, + files: [{ file_path: filename, content: snippet_content }], + visibility: 'private' + } + ) - MARKDOWN + snippet.web_url + end + + def create_report_issue + puts "Creating a report issue." + issue_title = "Report #{utc_timestamp}" + new_issue = qa_evaluation_project_client.create_issue( + QA_EVALUATION_PROJECT_ID, issue_title, { description: markdown_report } + ) + + new_issue.web_url + end + + def update_aggregation_issue(report_issue_url, snippet_web_url) + puts "Updating the aggregated report issue." + + new_line = ["\n|"] + new_line << "#{utc_timestamp} |" + new_line << "#{summary_numbers[:total]} |" + new_line << "#{summary_numbers[:correct_ratio]}% |" + new_line << "#{summary_numbers[:incorrect_ratio]}% |" + new_line << "#{summary_numbers[:disagreed_ratio]}% |" + new_line << "#{report_issue_url} |" + new_line << "#{snippet_web_url} |" + new_line = new_line.join(' ') + + aggregated_report_issue = qa_evaluation_project_client.issue(QA_EVALUATION_PROJECT_ID, AGGREGATED_REPORT_ISSUE_IID) + updated_description = aggregated_report_issue.description + new_line + qa_evaluation_project_client.edit_issue( + QA_EVALUATION_PROJECT_ID, AGGREGATED_REPORT_ISSUE_IID, { description: updated_description } + ) + end + + def save_report_as_artifact + artifact_path = File.join(base_dir, ENV['QA_EVAL_REPORT_FILENAME']) + + puts "Saving #{artifact_path}" + File.write(artifact_path, markdown_report) + end + + def post_or_update_report_note + note = existing_report_note + if note && note.type != 'DiscussionNote' + # The latest note has not led to a discussion. Update it. + gitlab_project_client.edit_merge_request_note(ci_project_id, merge_request_iid, note.id, markdown_report) + + puts "Updated comment." + else + # This is the first note or the latest note has been discussed on the MR. + # Don't update, create new note instead. + gitlab_project_client.create_merge_request_note(ci_project_id, merge_request_iid, markdown_report) + + puts "Posted comment." end + end + + def existing_report_note + # Look for an existing note using `IDENTIFIABLE_NOTE_TAG` + gitlab_project_client + .merge_request_notes(ci_project_id, merge_request_iid) + .auto_paginate + .select { |note| note.body.include? IDENTIFIABLE_NOTE_TAG } + .max_by { |note| Time.parse(note.created_at) } + end + + def gitlab_project_client + @gitlab_project_client ||= Gitlab.client( + endpoint: GITLAB_COM_API_V4_ENDPOINT, + private_token: ENV['PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE'] + ) + end + + def qa_evaluation_project_client + @qa_evaluation_project_client ||= Gitlab.client( + endpoint: GITLAB_COM_API_V4_ENDPOINT, + private_token: ENV['CHAT_QA_EVALUATION_PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE'] + ) + end + + def base_dir + ENV['CI_PROJECT_DIR'] || "./" + end + + def merge_request_iid + ENV['CI_MERGE_REQUEST_IID'] + end - report + def ci_project_id + ENV['CI_PROJECT_ID'] end def report_data - @report_data ||= Dir[File.join(ENV['CI_PROJECT_DIR'], "tmp/duo_chat/qa*.json")] - .map { |file| JSON.parse(File.read(file)) } + @report_data ||= Dir[File.join(base_dir, "tmp/duo_chat/qa*.json")] + .flat_map { |file| JSON.parse(File.read(file)) } end def eval_content @@ -168,7 +238,9 @@ class Reporter end def summary_numbers - @graded_evaluations ||= report_data.map { |data| data["evaluations"].map { |eval| parse_grade(eval) } } + @graded_evaluations ||= report_data + .map { |data| data["evaluations"].map { |eval| parse_grade(eval) } } + .reject { |grades| !(grades.include? :correct) && !(grades.include? :incorrect) } total = @graded_evaluations.size correct = @graded_evaluations.count { |grades| !(grades.include? :incorrect) } @@ -230,4 +302,4 @@ class Reporter end end -Reporter.new.run +Reporter.new.run if $PROGRAM_NAME == __FILE__ |