From ee98592ba27aa0c958778eef998ed89a664e252d Mon Sep 17 00:00:00 2001 From: David Conrad Date: Tue, 7 Jun 2022 15:03:36 -0700 Subject: Fix overflow when saturating dequantized coefficients clipped to 0 It's possible to encode a large coefficient that becomes 0 after the clipping in dequant (Abs( dq ) & 0xFFFFFF), e.g. 0x1000000 After that &0xFFFFFF, coeffs are saturated in the range of [-(1 << (bitdepth+7)), 1 << (bitdepth+7)) dav1d implements this saturation via umin(dq - sign, cf_max), then applies the sign afterwards via xor. However, for dq = 0 and sign = 1, this step evaulates to umin(UINT_MAX, cf_max) == cf_max instead of the expected 0. So instead, do unsigned saturate as umin(dq, cf_max + sign), then apply sign via (sign ? -dq : dq) On arm this is the same number of instructions, since cneg exists and is used On x86 this requires an additional instruction, but this isn't a latency-critical path --- src/recon_tmpl.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/recon_tmpl.c b/src/recon_tmpl.c index 0ed4169..5f49910 100644 --- a/src/recon_tmpl.c +++ b/src/recon_tmpl.c @@ -591,7 +591,7 @@ static int decode_coefs(Dav1dTaskContext *const t, const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane]; const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL; const int dq_shift = imax(0, t_dim->ctx - 2); - const unsigned cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc)); + const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc)); unsigned cul_level, dc_sign_level; if (!dc_tok) { @@ -608,7 +608,7 @@ static int decode_coefs(Dav1dTaskContext *const t, printf("Post-dc_sign[%d][%d][%d]: r=%d\n", chroma, dc_sign_ctx, dc_sign, ts->msac.rng); - unsigned dc_dq = dq_tbl[0]; + int dc_dq = dq_tbl[0]; dc_sign_level = (dc_sign - 1) & (2 << 6); if (qm_tbl) { @@ -628,7 +628,8 @@ static int decode_coefs(Dav1dTaskContext *const t, } cul_level = dc_tok; dc_dq >>= dq_shift; - cf[0] = (coef) (umin(dc_dq - dc_sign, cf_max) ^ -dc_sign); + dc_dq = umin(dc_dq, cf_max + dc_sign); + cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq); if (rc) ac_qm: { const unsigned ac_dq = dq_tbl[1]; @@ -638,6 +639,7 @@ static int decode_coefs(Dav1dTaskContext *const t, printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng); const unsigned rc_tok = cf[rc]; unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5; + int dq_sat; if (rc_tok >= (15 << 11)) { tok = read_golomb(&ts->msac) + 15; @@ -654,7 +656,8 @@ static int decode_coefs(Dav1dTaskContext *const t, } cul_level += tok; dq >>= dq_shift; - cf[rc] = (coef) (umin(dq - sign, cf_max) ^ -sign); + dq_sat = umin(dq, cf_max + sign); + cf[rc] = (coef) (sign ? -dq_sat : dq_sat); rc = rc_tok & 0x3ff; } while (rc); @@ -669,13 +672,13 @@ static int decode_coefs(Dav1dTaskContext *const t, dc_tok &= 0xfffff; dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift; - dc_dq = umin(dc_dq - dc_sign, cf_max); + dc_dq = umin(dc_dq, cf_max + dc_sign); } else { - dc_dq = ((dc_dq * dc_tok) >> dq_shift) - dc_sign; + dc_dq = ((dc_dq * dc_tok) >> dq_shift); assert(dc_dq <= cf_max); } cul_level = dc_tok; - cf[0] = (coef) (dc_dq ^ -dc_sign); + cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq); if (rc) ac_noqm: { const unsigned ac_dq = dq_tbl[1]; @@ -684,7 +687,8 @@ static int decode_coefs(Dav1dTaskContext *const t, if (dbg) printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng); const unsigned rc_tok = cf[rc]; - unsigned tok, dq; + unsigned tok; + int dq; // residual if (rc_tok >= (15 << 11)) { @@ -698,15 +702,15 @@ static int decode_coefs(Dav1dTaskContext *const t, // dequant, see 7.12.3 dq = ((ac_dq * tok) & 0xffffff) >> dq_shift; - dq = umin(dq - sign, cf_max); + dq = umin(dq, cf_max + sign); } else { // cannot exceed cf_max, so we can avoid the clipping tok = rc_tok >> 11; - dq = ((ac_dq * tok) >> dq_shift) - sign; + dq = ((ac_dq * tok) >> dq_shift); assert(dq <= cf_max); } cul_level += tok; - cf[rc] = (coef) (dq ^ -sign); + cf[rc] = (coef) (sign ? -dq : dq); rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob } while (rc); -- cgit v1.2.3