diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2022-05-20 03:28:33 +0300 |
---|---|---|
committer | Henrik Gramner <henrik@gramner.com> | 2022-05-20 14:59:33 +0300 |
commit | 297682250fd77b014d7b0fc63e61782025426a1a (patch) | |
tree | 4c263f4b19d650d85006606656e15ab74bafd748 | |
parent | dd3b1eb88681e1ccac62e69840539c8c65f2105f (diff) |
checkasm: Improve benchmarking of functions that modify their input
Alternate between buffers when benchmarking in order to more
accurately measure throughout instead of latency.
-rw-r--r-- | tests/checkasm/cdef.c | 2 | ||||
-rw-r--r-- | tests/checkasm/checkasm.h | 11 | ||||
-rw-r--r-- | tests/checkasm/itx.c | 4 | ||||
-rw-r--r-- | tests/checkasm/loopfilter.c | 18 | ||||
-rw-r--r-- | tests/checkasm/looprestoration.c | 8 | ||||
-rw-r--r-- | tests/checkasm/mc.c | 6 | ||||
-rw-r--r-- | tests/checkasm/msac.c | 11 |
7 files changed, 35 insertions, 25 deletions
diff --git a/tests/checkasm/cdef.c b/tests/checkasm/cdef.c index 833fa4b..6f9f0cd 100644 --- a/tests/checkasm/cdef.c +++ b/tests/checkasm/cdef.c @@ -104,7 +104,7 @@ static void check_cdef_filter(const cdef_fn fn, const int w, const int h) { */ pri_strength = (edges & 1) << bitdepth_min_8; sec_strength = (edges & 2) << bitdepth_min_8; - bench_new(a_dst, stride, left, top, bot, pri_strength, + bench_new(alternate(c_dst, a_dst), stride, left, top, bot, pri_strength, sec_strength, dir, damping, edges HIGHBD_TAIL_SUFFIX); } } diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index a804c0e..fb5f8ed 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -284,15 +284,19 @@ void checkasm_stack_clobber(uint64_t clobber, ...); #define bench_new(...)\ do {\ if (checkasm_bench_func()) {\ - func_type *tfunc = func_new;\ + func_type *const tfunc = func_new;\ checkasm_set_signal_handler_state(1);\ uint64_t tsum = 0;\ int tcount = 0;\ for (int ti = 0; ti < BENCH_RUNS; ti++) {\ uint64_t t = readtime();\ + int talt = 0; (void)talt;\ tfunc(__VA_ARGS__);\ + talt = 1;\ tfunc(__VA_ARGS__);\ + talt = 0;\ tfunc(__VA_ARGS__);\ + talt = 1;\ tfunc(__VA_ARGS__);\ t = readtime() - t;\ if (t*tcount <= tsum*4 && ti > 0) {\ @@ -303,6 +307,7 @@ void checkasm_stack_clobber(uint64_t clobber, ...); checkasm_set_signal_handler_state(0);\ checkasm_update_bench(tcount, tsum);\ } else {\ + const int talt = 0; (void)talt;\ call_new(__VA_ARGS__);\ }\ } while (0) @@ -310,6 +315,10 @@ void checkasm_stack_clobber(uint64_t clobber, ...); #define bench_new(...) do {} while (0) #endif +/* Alternates between two pointers. Intended to be used within bench_new() + * calls for functions which modifies their input buffer(s) to ensure that + * throughput, and not latency, is measured. */ +#define alternate(a, b) (talt ? (b) : (a)) #define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1)) #define PIXEL_RECT(name, w, h) \ diff --git a/tests/checkasm/itx.c b/tests/checkasm/itx.c index 6b6cda0..c7cc411 100644 --- a/tests/checkasm/itx.c +++ b/tests/checkasm/itx.c @@ -294,8 +294,8 @@ static void check_itxfm_add(Dav1dInvTxfmDSPContext *const c, if (memcmp(coeff[0], coeff[1], sizeof(*coeff))) fail(); - bench_new(a_dst, a_dst_stride, coeff[0], eob - HIGHBD_TAIL_SUFFIX); + bench_new(alternate(c_dst, a_dst), a_dst_stride, + alternate(coeff[0], coeff[1]), eob HIGHBD_TAIL_SUFFIX); } } report("add_%dx%d", w, h); diff --git a/tests/checkasm/loopfilter.c b/tests/checkasm/loopfilter.c index 438f080..f4901fb 100644 --- a/tests/checkasm/loopfilter.c +++ b/tests/checkasm/loopfilter.c @@ -174,18 +174,18 @@ static void check_lpf_sb(loopfilter_sb_fn fn, const char *const name, } memcpy(a_dst_mem, c_dst_mem, 128 * sizeof(pixel) * 16); - call_ref(c_dst, stride, - vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride, - &lut, n_blks HIGHBD_TAIL_SUFFIX); - call_new(a_dst, stride, - vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride, - &lut, n_blks HIGHBD_TAIL_SUFFIX); + call_ref(c_dst, stride, vmask, + (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], + b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX); + call_new(a_dst, stride, vmask, + (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], + b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX); checkasm_check_pixel(c_dst_mem, stride, a_dst_mem, stride, w, h, "dst"); - bench_new(a_dst, stride, - vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride, - &lut, n_blks HIGHBD_TAIL_SUFFIX); + bench_new(alternate(c_dst, a_dst), stride, vmask, + (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], + b4_stride, &lut, n_blks HIGHBD_TAIL_SUFFIX); } } report(name); diff --git a/tests/checkasm/looprestoration.c b/tests/checkasm/looprestoration.c index c542901..d84f3c4 100644 --- a/tests/checkasm/looprestoration.c +++ b/tests/checkasm/looprestoration.c @@ -109,8 +109,8 @@ static void check_wiener(Dav1dLoopRestorationDSPContext *const c, const int bpc) break; } } - bench_new(a_dst, 448 * sizeof(pixel), left, h_edge, - 256, 64, ¶ms, 0xf HIGHBD_TAIL_SUFFIX); + bench_new(alternate(c_dst, a_dst), 448 * sizeof(pixel), left, + h_edge, 256, 64, ¶ms, 0xf HIGHBD_TAIL_SUFFIX); } } } @@ -169,8 +169,8 @@ static void check_sgr(Dav1dLoopRestorationDSPContext *const c, const int bpc) { break; } } - bench_new(a_dst, 448 * sizeof(pixel), left, h_edge, - 256, 64, ¶ms, 0xf HIGHBD_TAIL_SUFFIX); + bench_new(alternate(c_dst, a_dst), 448 * sizeof(pixel), left, + h_edge, 256, 64, ¶ms, 0xf HIGHBD_TAIL_SUFFIX); } } } diff --git a/tests/checkasm/mc.c b/tests/checkasm/mc.c index c8f2dc5..6b3b146 100644 --- a/tests/checkasm/mc.c +++ b/tests/checkasm/mc.c @@ -477,7 +477,7 @@ static void check_blend(Dav1dMCDSPContext *const c) { checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride, w, h, "dst"); - bench_new(a_dst, a_dst_stride, tmp, w, h, mask); + bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h, mask); } } report("blend"); @@ -516,7 +516,7 @@ static void check_blend_v(Dav1dMCDSPContext *const c) { checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride, w, h, "dst"); - bench_new(a_dst, a_dst_stride, tmp, w, h); + bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h); } } report("blend_v"); @@ -554,7 +554,7 @@ static void check_blend_h(Dav1dMCDSPContext *const c) { checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride, w, h, "dst"); - bench_new(a_dst, a_dst_stride, tmp, w, h); + bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h); } } report("blend_h"); diff --git a/tests/checkasm/msac.c b/tests/checkasm/msac.c index 4e73789..b9c89b4 100644 --- a/tests/checkasm/msac.c +++ b/tests/checkasm/msac.c @@ -125,7 +125,8 @@ static void msac_dump(unsigned c_res, unsigned a_res, } \ } \ if (cdf_update && ns == n - 1) \ - bench_new(&s_a, cdf[1], ns); \ + bench_new(alternate(&s_c, &s_a), \ + alternate(cdf[0], cdf[1]), ns); \ } \ } \ } \ @@ -164,7 +165,7 @@ static void check_decode_bool_adapt(MsacDSPContext *const c, uint8_t *const buf) } } if (cdf_update) - bench_new(&s_a, cdf[1]); + bench_new(alternate(&s_c, &s_a), alternate(cdf[0], cdf[1])); } } } @@ -184,7 +185,7 @@ static void check_decode_bool_equi(MsacDSPContext *const c, uint8_t *const buf) msac_dump(c_res, a_res, &s_c, &s_a, NULL, NULL, 0); } } - bench_new(&s_a); + bench_new(alternate(&s_c, &s_a)); } } @@ -204,7 +205,7 @@ static void check_decode_bool(MsacDSPContext *const c, uint8_t *const buf) { msac_dump(c_res, a_res, &s_c, &s_a, NULL, NULL, 0); } } - bench_new(&s_a, 16384); + bench_new(alternate(&s_c, &s_a), 16384); } } @@ -239,7 +240,7 @@ static void check_decode_hi_tok(MsacDSPContext *const c, uint8_t *const buf) { } } if (cdf_update) - bench_new(&s_a, cdf[1]); + bench_new(alternate(&s_c, &s_a), alternate(cdf[0], cdf[1])); } } report("decode_hi_tok"); |