diff options
author | Will Chandler <wchandler@gitlab.com> | 2023-01-05 22:37:50 +0300 |
---|---|---|
committer | Will Chandler <wchandler@gitlab.com> | 2023-02-02 18:07:02 +0300 |
commit | 5d869f4cedce452c6bb36f84d20fffa748bf2699 (patch) | |
tree | 3cccd99d6eb5c9fa8a727689958e4831410a8778 /_support | |
parent | ee856a0c74eaf327cf9f33ad29f22f6ef2f25aea (diff) |
benchmarking: Use scripts in bench loop
Now that we have scripts to run benchmarks and profile Gitaly, we can
update the `benchmark` role to invoke them.
By default we clear the kernel page cache and run the profiling script,
but if needed these can be disabled with `./run-benchmarks --extra-vars
"profile=false clear_page_cache=false`.
`bench_duration` defaults to be slightly longer than `profile_duration`
to ensure that `ghz` is sending traffic for the full time we're
profiling.
`ghz_wait_duration` controls how long to wait before the `Run
ghz` task is considered to have failed. When writing HTML output `ghz`
may take 30+ seconds to finish, so a sizeable wait period helps prevent
spurious failures without adding delays if it exits sooner. Currently
we are using JSON output which does not add this delay.
Diffstat (limited to '_support')
-rw-r--r-- | _support/benchmarking/.gitignore | 1 | ||||
-rw-r--r-- | _support/benchmarking/roles/benchmark/tasks/bench.yml | 58 | ||||
-rw-r--r-- | _support/benchmarking/roles/benchmark/tasks/main.yml | 13 | ||||
-rw-r--r-- | _support/benchmarking/roles/benchmark/vars/main.yml | 6 |
4 files changed, 78 insertions, 0 deletions
diff --git a/_support/benchmarking/.gitignore b/_support/benchmarking/.gitignore index aa8762020..1380e5950 100644 --- a/_support/benchmarking/.gitignore +++ b/_support/benchmarking/.gitignore @@ -2,3 +2,4 @@ /terraform/* !/terraform/main.tf /group_vars/all.yml +/results/* diff --git a/_support/benchmarking/roles/benchmark/tasks/bench.yml b/_support/benchmarking/roles/benchmark/tasks/bench.yml index 76603c300..5335de6ff 100644 --- a/_support/benchmarking/roles/benchmark/tasks/bench.yml +++ b/_support/benchmarking/roles/benchmark/tasks/bench.yml @@ -20,9 +20,67 @@ group: git delegate_to: "{{ groups['client'][0] }}" +# Clear the page cache so all runs start with a cold cache +- name: Clear kernel page cache + shell: sync && echo 3 > /proc/sys/vm/drop_caches + when: clear_page_cache | bool + # Create new Gitaly process for each run for easy log collection - name: Start Gitaly service systemd: name: gitaly state: started notify: stop gitaly + +- name: Pause for Gitaly to start up + pause: + seconds: 5 + +# We run this task as async and poll 0 to make it non-blocking. +# It will be allowed to run up to `ghz_wait_duration` before +# being treated as failed. +- name: Run ghz + command: + argv: + - /usr/local/bin/benchmark-gitaly + - "-a{{ hostvars[groups['gitaly'][0]]['internal'] }}" + - "-d{{ bench_duration }}" + - "-o{{ output_dir }}" + - "-p{{ rpc_info.proto }}" + - "-s{{ rpc_info.service }}" + - "-r{{ rpc_info.rpc }}" + - "-g{{ repo_name }}" + async: "{{ ghz_wait_duration }}" + poll: 0 + register: ghz_wait + delegate_to: "{{ groups['client'][0] }}" + +- name: Profile system with perf and libbpf-tools + command: + argv: + - /usr/local/bin/profile-gitaly + - "-d{{ profile_duration }}" + - "-o{{ output_dir }}" + - "-r{{ rpc_info.rpc }}" + - "-g{{ repo_name }}" + when: profile | bool + +- name: Wait for ghz to exit + async_status: + jid: "{{ ghz_wait.ansible_job_id }}" + register: ghz_result + until: ghz_result.finished + retries: 100 + delay: 10 + delegate_to: "{{ groups['client'][0] }}" + +- name: Copy ghz data to Gitaly node + synchronize: + src: "{{ output_dir }}/ghz.json" + dest: "{{ output_dir }}/" + private_key: /root/.ssh/id_ed25519 + set_remote_user: false + delegate_to: "{{ groups['client'][0] }}" + +- name: Capture Gitaly logs + shell: journalctl --output=cat _PID=$(pidof -s gitaly) > "{{ output_dir }}/gitaly.log" diff --git a/_support/benchmarking/roles/benchmark/tasks/main.yml b/_support/benchmarking/roles/benchmark/tasks/main.yml index 8d72d79f1..7a1d1f105 100644 --- a/_support/benchmarking/roles/benchmark/tasks/main.yml +++ b/_support/benchmarking/roles/benchmark/tasks/main.yml @@ -10,3 +10,16 @@ loop: "{{ rpcs }}" loop_control: loop_var: rpc_info + +- name: Archive results + archive: + path: + - "/tmp/{{ run_name }}" + dest: "/tmp/{{ run_name }}.tar.gz" + owner: git + +- name: Fetch results + fetch: + src: "/tmp/{{ run_name }}.tar.gz" + dest: "{{ playbook_dir }}/results/" + flat: true diff --git a/_support/benchmarking/roles/benchmark/vars/main.yml b/_support/benchmarking/roles/benchmark/vars/main.yml new file mode 100644 index 000000000..e23bd7641 --- /dev/null +++ b/_support/benchmarking/roles/benchmark/vars/main.yml @@ -0,0 +1,6 @@ +--- +profile: true +clear_page_cache: true +bench_duration: 35 +profile_duration: 30 +ghz_wait_duration: 120 |