From 3aac025204602810c5bf33cbad6ac1bf157487cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 13 May 2021 09:33:21 +0300 Subject: arm64: filmgrain16: Guard against out of range pixels in the gather function In 16 bpc, the pixels are 16 bit integers, but valid pixels only are up to 12 bits, and the scaling buffer only contains 4096 elements. The src pixels are, normally, supposed to be valid pixels, but when processing blocks of 32 pixels at a time, it can operate on uninitialized pixels past the right edge. Before: Cortex A53 A72 A73 Apple M1 fgy_32x32xn_16bpc_neon: 10372.5 8194.4 8612.1 24.2 After: fgy_32x32xn_16bpc_neon: 10837.9 8469.5 8885.1 24.6 --- tests/checkasm/filmgrain.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/tests/checkasm/filmgrain.c b/tests/checkasm/filmgrain.c index eadf2ad..3db2f61 100644 --- a/tests/checkasm/filmgrain.c +++ b/tests/checkasm/filmgrain.c @@ -183,10 +183,6 @@ static void check_fgy_sbrow(const Dav1dFilmGrainDSPContext *const dsp) { generate_scaling(bitdepth_from_max(bitdepth_max), fg_data[0].y_points, fg_data[0].num_y_points, scaling); - for (int y = 0; y < 32; y++) - for (int x = 0; x < 128; x++) - src[y * PXSTRIDE(stride) + x] = rnd() & bitdepth_max; - fg_data[0].clip_to_restricted_range = rnd() & 1; fg_data[0].scaling_shift = (rnd() & 3) + 8; for (fg_data[0].overlap_flag = 0; fg_data[0].overlap_flag <= 1; @@ -204,6 +200,14 @@ static void check_fgy_sbrow(const Dav1dFilmGrainDSPContext *const dsp) { row_num = rnd() & 0x7ff; } + for (int y = 0; y < 32; y++) { + // Src pixels past the right edge can be uninitialized + for (int x = 0; x < 128; x++) + src[y * PXSTRIDE(stride) + x] = rnd(); + for (int x = 0; x < w; x++) + src[y * PXSTRIDE(stride) + x] &= bitdepth_max; + } + CLEAR_PIXEL_RECT(c_dst); CLEAR_PIXEL_RECT(a_dst); call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut, h, @@ -275,12 +279,6 @@ static void check_fguv_sbrow(const Dav1dFilmGrainDSPContext *const dsp) { dsp->generate_grain_uv[layout_idx](grain_lut[1], grain_lut[0], fg_data, uv_pl HIGHBD_TAIL_SUFFIX); - for (int y = 0; y < 32; y++) - for (int x = 0; x < 128; x++) - src[y * PXSTRIDE(stride) + x] = rnd() & bitdepth_max; - for (int y = 0; y < 32; y++) - for (int x = 0; x < 128; x++) - luma_src[y * PXSTRIDE(lstride) + x] = rnd() & bitdepth_max; if (csfl) { fg_data[0].num_y_points = 2 + (rnd() % 13); const int pad = 0xff / fg_data[0].num_y_points; @@ -325,6 +323,18 @@ static void check_fguv_sbrow(const Dav1dFilmGrainDSPContext *const dsp) { row_num = rnd() & 0x7ff; } + for (int y = 0; y < 32; y++) { + // Src pixels past the right edge can be uninitialized + for (int x = 0; x < 128; x++) { + src[y * PXSTRIDE(stride) + x] = rnd(); + luma_src[y * PXSTRIDE(lstride) + x] = rnd(); + } + for (int x = 0; x < w; x++) + src[y * PXSTRIDE(stride) + x] &= bitdepth_max; + for (int x = 0; x < (w << ss_x); x++) + luma_src[y * PXSTRIDE(lstride) + x] &= bitdepth_max; + } + CLEAR_PIXEL_RECT(c_dst); CLEAR_PIXEL_RECT(a_dst); call_ref(c_dst, src, stride, fg_data, w, scaling, grain_lut[1], h, -- cgit v1.2.3