Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2020-03-09 20:32:15 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2020-03-10 02:04:07 +0300
commit47c09d6a9f6794caface4ad50930460b82d7c670 (patch)
treebc29e8f83ffc3df13df0650e19c884970a6ab406 /tools/bpf/bpftool/skeleton/profiler.bpf.c
parent7b4b73bc8a609fa9655022ed24a7a714bc8c2155 (diff)
bpftool: Introduce "prog profile" command
With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com
Diffstat (limited to 'tools/bpf/bpftool/skeleton/profiler.bpf.c')
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c119
1 files changed, 119 insertions, 0 deletions
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
new file mode 100644
index 000000000000..20034c12f7c5
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "profiler.h"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} accum_readings SEC(".maps");
+
+/* sample counts, one per cpu */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+} counts SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+const volatile __u32 num_metric = 1;
+#define MAX_NUM_MATRICS 4
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+ struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+ u32 key = bpf_get_smp_processor_id();
+ u32 i;
+
+ /* look up before reading, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ u32 flag = i;
+
+ ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
+ if (!ptrs[i])
+ return 0;
+ }
+
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ struct bpf_perf_event_value reading;
+ int err;
+
+ err = bpf_perf_event_read_value(&events, key, &reading,
+ sizeof(reading));
+ if (err)
+ return 0;
+ *(ptrs[i]) = reading;
+ key += num_cpu;
+ }
+
+ return 0;
+}
+
+static inline void
+fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+{
+ struct bpf_perf_event_value *before, diff, *accum;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &id);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+ struct bpf_perf_event_value *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+ diff.running = after->running - before->running;
+
+ accum = bpf_map_lookup_elem(&accum_readings, &id);
+ if (accum) {
+ accum->counter += diff.counter;
+ accum->enabled += diff.enabled;
+ accum->running += diff.running;
+ }
+ }
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 i, one = 1, zero = 0;
+ int err;
+ u64 *count;
+
+ /* read all events before updating the maps, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
+ readings + i, sizeof(*readings));
+ if (err)
+ return 0;
+ }
+ count = bpf_map_lookup_elem(&counts, &zero);
+ if (count) {
+ *count += 1;
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
+ fexit_update_maps(i, &readings[i]);
+ }
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";