diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2018-11-09 22:13:05 +0300 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2018-11-12 18:31:17 +0300 |
commit | 2f251bd11528a930934b77e2ee0056d5075a35e6 (patch) | |
tree | 90d20aad14e0b3e64f21debebfeba139633420fa | |
parent | 4b0683a615a353757ad75c4eb4ee67e12a0aa8ce (diff) |
Add a max_width/height argument to angular_ipred_fn
This is used in z2 to limit the number of pixels over which the
filter is applied, as per "numPx" in 7.11.2.4 point 4 in the AV1
specification. This only applies to z2, because in z1/3, the edge
filter is (incomprehensibly) lengtened by the opposite side's edge
length, which undoes the limit on the filter length (like a bug
undoing another bug).
I admit the code is getting rather complex, so we may want to
redesign this to make writing SIMD easier.
-rw-r--r-- | src/ipred.h | 2 | ||||
-rw-r--r-- | src/ipred_tmpl.c | 73 | ||||
-rw-r--r-- | src/recon_tmpl.c | 14 | ||||
-rw-r--r-- | tests/checkasm/ipred.c | 9 |
4 files changed, 63 insertions, 35 deletions
diff --git a/src/ipred.h b/src/ipred.h index 05dc3ca..43a7e3a 100644 --- a/src/ipred.h +++ b/src/ipred.h @@ -41,7 +41,7 @@ */ #define decl_angular_ipred_fn(name) \ void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \ - int width, int height, int angle) + int width, int height, int angle, int max_width, int max_height) typedef decl_angular_ipred_fn(*angular_ipred_fn); /* diff --git a/src/ipred_tmpl.c b/src/ipred_tmpl.c index 4b91eb0..8a7adb1 100644 --- a/src/ipred_tmpl.c +++ b/src/ipred_tmpl.c @@ -83,8 +83,7 @@ cfl_pred(pixel *dst, const ptrdiff_t stride, } } -static unsigned dc_gen_top(const pixel *const topleft, const int width) -{ +static unsigned dc_gen_top(const pixel *const topleft, const int width) { unsigned dc = width >> 1; for (int i = 0; i < width; i++) dc += topleft[1 + i]; @@ -93,7 +92,8 @@ static unsigned dc_gen_top(const pixel *const topleft, const int width) static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { splat_dc(dst, stride, width, height, dc_gen_top(topleft, width)); } @@ -106,8 +106,7 @@ static void ipred_cfl_top_c(pixel *dst, const ptrdiff_t stride, cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha); } -static unsigned dc_gen_left(const pixel *const topleft, const int height) -{ +static unsigned dc_gen_left(const pixel *const topleft, const int height) { unsigned dc = height >> 1; for (int i = 0; i < height; i++) dc += topleft[-(1 + i)]; @@ -116,7 +115,8 @@ static unsigned dc_gen_left(const pixel *const topleft, const int height) static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { splat_dc(dst, stride, width, height, dc_gen_left(topleft, height)); } @@ -140,8 +140,8 @@ static void ipred_cfl_left_c(pixel *dst, const ptrdiff_t stride, #define BASE_SHIFT 17 #endif -static unsigned -dc_gen(const pixel *const topleft, const int width, const int height) +static unsigned dc_gen(const pixel *const topleft, + const int width, const int height) { unsigned dc = (width + height) >> 1; for (int i = 0; i < width; i++) @@ -160,7 +160,8 @@ dc_gen(const pixel *const topleft, const int width, const int height) static void ipred_dc_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { splat_dc(dst, stride, width, height, dc_gen(topleft, width, height)); } @@ -180,7 +181,8 @@ static void ipred_cfl_c(pixel *dst, const ptrdiff_t stride, static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1)); } @@ -195,7 +197,8 @@ static void ipred_cfl_128_c(pixel *dst, const ptrdiff_t stride, static void ipred_v_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { for (int y = 0; y < height; y++) { pixel_copy(dst, topleft + 1, width); @@ -205,7 +208,8 @@ static void ipred_v_c(pixel *dst, const ptrdiff_t stride, static void ipred_h_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { for (int y = 0; y < height; y++) { pixel_set(dst, topleft[-(1 + y)], width); @@ -215,7 +219,8 @@ static void ipred_h_c(pixel *dst, const ptrdiff_t stride, static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride, const pixel *const tl_ptr, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { const int topleft = tl_ptr[0]; for (int y = 0; y < height; y++) { @@ -236,7 +241,8 @@ static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride, static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { const uint8_t *const weights_hor = &dav1d_sm_weights[width]; const uint8_t *const weights_ver = &dav1d_sm_weights[height]; @@ -256,7 +262,8 @@ static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride, static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { const uint8_t *const weights_ver = &dav1d_sm_weights[height]; const int bottom = topleft[-height]; @@ -273,7 +280,8 @@ static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride, static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, - const int width, const int height, const int a) + const int width, const int height, const int a, + const int max_width, const int max_height) { const uint8_t *const weights_hor = &dav1d_sm_weights[width]; const int right = topleft[width]; @@ -328,7 +336,9 @@ static int get_filter_strength(const unsigned blk_wh, const unsigned d, return strength; } -static void filter_edge(pixel *const out, const int sz, const pixel *const in, +static void filter_edge(pixel *const out, const int sz, + const int lim_from, const int lim_to, + const pixel *const in, const int from, const int to, const unsigned strength) { static const uint8_t kernel[3][5] = { @@ -338,12 +348,17 @@ static void filter_edge(pixel *const out, const int sz, const pixel *const in, }; assert(strength > 0); - for (int i = 0; i < sz; i++) { + int i = 0; + for (; i < lim_from; i++) + out[i] = in[iclip(i, from, to - 1)]; + for (; i < imin(lim_to, sz); i++) { int s = 0; for (int j = 0; j < 5; j++) s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j]; out[i] = (s + 8) >> 4; } + for (; i < sz; i++) + out[i] = in[iclip(i, from, to - 1)]; } static int get_upsample(const int blk_wh, const unsigned d, const int type) { @@ -369,7 +384,8 @@ static void upsample_edge(pixel *const out, const int hsz, static void ipred_z1_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in, - const int width, const int height, int angle) + const int width, const int height, int angle, + const int max_width, const int max_height) { const int is_sm = angle >> 9; angle &= 511; @@ -389,7 +405,7 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride, get_filter_strength(width + height, 90 - angle, is_sm); if (filter_strength) { - filter_edge(top_out, width + height, + filter_edge(top_out, width + height, 0, width + height, &topleft_in[1], -1, width + imin(width, height), filter_strength); top = top_out; @@ -421,7 +437,8 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride, static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in, - const int width, const int height, int angle) + const int width, const int height, int angle, + const int max_width, const int max_height) { const int is_sm = angle >> 9; angle &= 511; @@ -440,7 +457,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, get_filter_strength(width + height, angle - 90, is_sm); if (filter_strength) { - filter_edge(&topleft[1], width, &topleft_in[1], -1, width, + filter_edge(&topleft[1], width, 0, max_width, + &topleft_in[1], -1, width, filter_strength); } else { pixel_copy(&topleft[1], &topleft_in[1], width); @@ -453,7 +471,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, get_filter_strength(width + height, 180 - angle, is_sm); if (filter_strength) { - filter_edge(&topleft[-height], height, &topleft_in[-height], + filter_edge(&topleft[-height], height, height - max_height, height, + &topleft_in[-height], 0, height + 1, filter_strength); } else { pixel_copy(&topleft[-height], &topleft_in[-height], height); @@ -492,7 +511,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, static void ipred_z3_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in, - const int width, const int height, int angle) + const int width, const int height, int angle, + const int max_width, const int max_height) { const int is_sm = angle >> 9; angle &= 511; @@ -513,7 +533,7 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride, get_filter_strength(width + height, angle - 180, is_sm); if (filter_strength) { - filter_edge(left_out, width + height, + filter_edge(left_out, width + height, 0, width + height, &topleft_in[-(width + height)], imax(width - height, 0), width + height + 1, filter_strength); @@ -548,7 +568,8 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride, /* Up to 32x32 only */ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in, - const int width, const int height, int filt_idx) + const int width, const int height, int filt_idx, + const int max_width, const int max_height) { filt_idx &= 511; assert(filt_idx < 5); diff --git a/src/recon_tmpl.c b/src/recon_tmpl.c index e4ea731..f26564f 100644 --- a/src/recon_tmpl.c +++ b/src/recon_tmpl.c @@ -766,7 +766,9 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize t_dim->w, t_dim->h, edge); dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge, t_dim->w * 4, t_dim->h * 4, - angle | sm_fl); + angle | sm_fl, + f->cur.p.p.w - 4 * t->bx, + f->cur.p.p.h - 4 * t->by); if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) { hex_dump(edge - t_dim->h * 4, t_dim->h * 4, @@ -981,7 +983,11 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize dsp->ipred.intra_pred[m](dst, stride, edge, uv_t_dim->w * 4, uv_t_dim->h * 4, - angle | sm_uv_fl); + angle | sm_uv_fl, + (f->cur.p.p.w + ss_hor - + 4 * (t->bx & ~ss_hor)) >> ss_hor, + (f->cur.p.p.w + ss_ver - + 4 * (t->by & ~ss_ver)) >> ss_ver); if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) { hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4, uv_t_dim->h * 4, 2, "l"); @@ -1136,7 +1142,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize 0, dst, f->cur.p.stride[0], top_sb_edge, m, &angle, bw4, bh4, tl_edge); dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel), - tl_edge, bw4 * 4, bh4 * 4, 0); + tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0); const uint8_t *const ii_mask = b->interintra_type == INTER_INTRA_BLEND ? dav1d_ii_masks[bs][0][b->interintra_mode] : @@ -1273,7 +1279,7 @@ int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize top_sb_edge, m, &angle, cbw4, cbh4, tl_edge); dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel), - tl_edge, cbw4 * 4, cbh4 * 4, 0); + tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0); dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp, cbw4 * 4, cbh4 * 4, ii_mask); } diff --git a/tests/checkasm/ipred.c b/tests/checkasm/ipred.c index f8455ee..f1168c9 100644 --- a/tests/checkasm/ipred.c +++ b/tests/checkasm/ipred.c @@ -68,7 +68,7 @@ static void check_intra_pred(Dav1dIntraPredDSPContext *const c) { pixel *const topleft = topleft_buf + 128; declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft, - int width, int height, int angle); + int width, int height, int angle, int max_width, int max_height); for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++) for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1) @@ -89,12 +89,13 @@ static void check_intra_pred(Dav1dIntraPredDSPContext *const c) { for (int i = -h * 2; i <= w * 2; i++) topleft[i] = rand() & ((1 << BITDEPTH) - 1); - call_ref(c_dst, stride, topleft, w, h, a); - call_new(a_dst, stride, topleft, w, h, a); + const int maxw = 1 + (rand() % 128), maxh = 1 + (rand() % 128); + call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh); + call_new(a_dst, stride, topleft, w, h, a, maxw, maxh); if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst))) fail(); - bench_new(a_dst, stride, topleft, w, h, a); + bench_new(a_dst, stride, topleft, w, h, a, 128, 128); } } report("intra_pred"); |