benchmarking: Use scripts in bench loop

Now that we have scripts to run benchmarks and profile Gitaly, we can update the `benchmark` role to invoke them. By default we clear the kernel page cache and run the profiling script, but if needed these can be disabled with `./run-benchmarks --extra-vars "profile=false clear_page_cache=false`. `bench_duration` defaults to be slightly longer than `profile_duration` to ensure that `ghz` is sending traffic for the full time we're profiling. `ghz_wait_duration` controls how long to wait before the `Run ghz` task is considered to have failed. When writing HTML output `ghz` may take 30+ seconds to finish, so a sizeable wait period helps prevent spurious failures without adding delays if it exits sooner. Currently we are using JSON output which does not add this delay.
author: Will Chandler <wchandler@gitlab.com> 2023-01-05 22:37:50 +0300
committer: Will Chandler <wchandler@gitlab.com> 2023-02-02 18:07:02 +0300
commit: 5d869f4cedce452c6bb36f84d20fffa748bf2699 (patch)
tree: 3cccd99d6eb5c9fa8a727689958e4831410a8778 /_support
parent: ee856a0c74eaf327cf9f33ad29f22f6ef2f25aea (diff)
4 files changed, 78 insertions, 0 deletions
diff --git a/_support/benchmarking/.gitignore b/_support/benchmarking/.gitignore
index aa8762020..1380e5950 100644
--- a/_support/benchmarking/.gitignore
+++ b/_support/benchmarking/.gitignore
@@ -2,3 +2,4 @@
 /terraform/*
 !/terraform/main.tf
 /group_vars/all.yml
+/results/*
diff --git a/_support/benchmarking/roles/benchmark/tasks/bench.yml b/_support/benchmarking/roles/benchmark/tasks/bench.yml
index 76603c300..5335de6ff 100644
--- a/_support/benchmarking/roles/benchmark/tasks/bench.yml
+++ b/_support/benchmarking/roles/benchmark/tasks/bench.yml
@@ -20,9 +20,67 @@
     group: git
   delegate_to: "{{ groups['client'][0] }}"
 
+# Clear the page cache so all runs start with a cold cache
+- name: Clear kernel page cache
+  shell: sync && echo 3 > /proc/sys/vm/drop_caches
+  when: clear_page_cache | bool
+
 # Create new Gitaly process for each run for easy log collection
 - name: Start Gitaly service
   systemd:
     name: gitaly
     state: started
   notify: stop gitaly
+
+- name: Pause for Gitaly to start up
+  pause:
+    seconds: 5
+
+# We run this task as async and poll 0 to make it non-blocking.
+# It will be allowed to run up to `ghz_wait_duration` before
+# being treated as failed.
+- name: Run ghz
+  command:
+    argv:
+      - /usr/local/bin/benchmark-gitaly
+      - "-a{{ hostvars[groups['gitaly'][0]]['internal'] }}"
+      - "-d{{ bench_duration }}"
+      - "-o{{ output_dir }}"
+      - "-p{{ rpc_info.proto }}"
+      - "-s{{ rpc_info.service }}"
+      - "-r{{ rpc_info.rpc }}"
+      - "-g{{ repo_name }}"
+  async: "{{ ghz_wait_duration }}"
+  poll: 0
+  register: ghz_wait
+  delegate_to: "{{ groups['client'][0] }}"
+
+- name: Profile system with perf and libbpf-tools
+  command:
+    argv:
+      - /usr/local/bin/profile-gitaly
+      - "-d{{ profile_duration }}"
+      - "-o{{ output_dir }}"
+      - "-r{{ rpc_info.rpc }}"
+      - "-g{{ repo_name }}"
+  when: profile | bool
+
+- name: Wait for ghz to exit
+  async_status:
+    jid: "{{ ghz_wait.ansible_job_id }}"
+  register: ghz_result
+  until: ghz_result.finished
+  retries: 100
+  delay: 10
+  delegate_to: "{{ groups['client'][0] }}"
+
+- name: Copy ghz data to Gitaly node
+  synchronize:
+    src: "{{ output_dir }}/ghz.json"
+    dest: "{{ output_dir }}/"
+    private_key: /root/.ssh/id_ed25519
+    set_remote_user: false
+  delegate_to: "{{ groups['client'][0] }}"
+
+- name: Capture Gitaly logs
+  shell: journalctl --output=cat _PID=$(pidof -s gitaly) > "{{ output_dir }}/gitaly.log"
diff --git a/_support/benchmarking/roles/benchmark/tasks/main.yml b/_support/benchmarking/roles/benchmark/tasks/main.yml
index 8d72d79f1..7a1d1f105 100644
--- a/_support/benchmarking/roles/benchmark/tasks/main.yml
+++ b/_support/benchmarking/roles/benchmark/tasks/main.yml
@@ -10,3 +10,16 @@
   loop: "{{ rpcs }}"
   loop_control:
     loop_var: rpc_info
+
+- name: Archive results
+  archive:
+    path:
+    - "/tmp/{{ run_name }}"
+    dest: "/tmp/{{ run_name }}.tar.gz"
+    owner: git
+
+- name: Fetch results
+  fetch:
+    src: "/tmp/{{ run_name }}.tar.gz"
+    dest: "{{ playbook_dir }}/results/"
+    flat: true
diff --git a/_support/benchmarking/roles/benchmark/vars/main.yml b/_support/benchmarking/roles/benchmark/vars/main.yml
new file mode 100644
index 000000000..e23bd7641
--- /dev/null
+++ b/_support/benchmarking/roles/benchmark/vars/main.yml
@@ -0,0 +1,6 @@
+---
+profile: true
+clear_page_cache: true
+bench_duration: 35
+profile_duration: 30
+ghz_wait_duration: 120
author	Will Chandler <wchandler@gitlab.com>	2023-01-05 22:37:50 +0300
committer	Will Chandler <wchandler@gitlab.com>	2023-02-02 18:07:02 +0300
commit	5d869f4cedce452c6bb36f84d20fffa748bf2699 (patch)
tree	3cccd99d6eb5c9fa8a727689958e4831410a8778 /_support
parent	ee856a0c74eaf327cf9f33ad29f22f6ef2f25aea (diff)