Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Conrad <david_conrad@apple.com>2022-06-08 01:03:36 +0300
committerDavid Conrad <david_conrad@apple.com>2022-09-15 03:28:22 +0300
commitee98592ba27aa0c958778eef998ed89a664e252d (patch)
treea94c156dbc46ddf8b81364a6c44b5e0ad32b96d8
parent1bdb776c71d615e9a1a4890bbc53f979e225e150 (diff)
Fix overflow when saturating dequantized coefficients clipped to 0
It's possible to encode a large coefficient that becomes 0 after the clipping in dequant (Abs( dq ) & 0xFFFFFF), e.g. 0x1000000 After that &0xFFFFFF, coeffs are saturated in the range of [-(1 << (bitdepth+7)), 1 << (bitdepth+7)) dav1d implements this saturation via umin(dq - sign, cf_max), then applies the sign afterwards via xor. However, for dq = 0 and sign = 1, this step evaulates to umin(UINT_MAX, cf_max) == cf_max instead of the expected 0. So instead, do unsigned saturate as umin(dq, cf_max + sign), then apply sign via (sign ? -dq : dq) On arm this is the same number of instructions, since cneg exists and is used On x86 this requires an additional instruction, but this isn't a latency-critical path
-rw-r--r--src/recon_tmpl.c26
1 files changed, 15 insertions, 11 deletions
diff --git a/src/recon_tmpl.c b/src/recon_tmpl.c
index 0ed4169..5f49910 100644
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -591,7 +591,7 @@ static int decode_coefs(Dav1dTaskContext *const t,
const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL;
const int dq_shift = imax(0, t_dim->ctx - 2);
- const unsigned cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
+ const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
unsigned cul_level, dc_sign_level;
if (!dc_tok) {
@@ -608,7 +608,7 @@ static int decode_coefs(Dav1dTaskContext *const t,
printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
chroma, dc_sign_ctx, dc_sign, ts->msac.rng);
- unsigned dc_dq = dq_tbl[0];
+ int dc_dq = dq_tbl[0];
dc_sign_level = (dc_sign - 1) & (2 << 6);
if (qm_tbl) {
@@ -628,7 +628,8 @@ static int decode_coefs(Dav1dTaskContext *const t,
}
cul_level = dc_tok;
dc_dq >>= dq_shift;
- cf[0] = (coef) (umin(dc_dq - dc_sign, cf_max) ^ -dc_sign);
+ dc_dq = umin(dc_dq, cf_max + dc_sign);
+ cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
if (rc) ac_qm: {
const unsigned ac_dq = dq_tbl[1];
@@ -638,6 +639,7 @@ static int decode_coefs(Dav1dTaskContext *const t,
printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
const unsigned rc_tok = cf[rc];
unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5;
+ int dq_sat;
if (rc_tok >= (15 << 11)) {
tok = read_golomb(&ts->msac) + 15;
@@ -654,7 +656,8 @@ static int decode_coefs(Dav1dTaskContext *const t,
}
cul_level += tok;
dq >>= dq_shift;
- cf[rc] = (coef) (umin(dq - sign, cf_max) ^ -sign);
+ dq_sat = umin(dq, cf_max + sign);
+ cf[rc] = (coef) (sign ? -dq_sat : dq_sat);
rc = rc_tok & 0x3ff;
} while (rc);
@@ -669,13 +672,13 @@ static int decode_coefs(Dav1dTaskContext *const t,
dc_tok &= 0xfffff;
dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift;
- dc_dq = umin(dc_dq - dc_sign, cf_max);
+ dc_dq = umin(dc_dq, cf_max + dc_sign);
} else {
- dc_dq = ((dc_dq * dc_tok) >> dq_shift) - dc_sign;
+ dc_dq = ((dc_dq * dc_tok) >> dq_shift);
assert(dc_dq <= cf_max);
}
cul_level = dc_tok;
- cf[0] = (coef) (dc_dq ^ -dc_sign);
+ cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
if (rc) ac_noqm: {
const unsigned ac_dq = dq_tbl[1];
@@ -684,7 +687,8 @@ static int decode_coefs(Dav1dTaskContext *const t,
if (dbg)
printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
const unsigned rc_tok = cf[rc];
- unsigned tok, dq;
+ unsigned tok;
+ int dq;
// residual
if (rc_tok >= (15 << 11)) {
@@ -698,15 +702,15 @@ static int decode_coefs(Dav1dTaskContext *const t,
// dequant, see 7.12.3
dq = ((ac_dq * tok) & 0xffffff) >> dq_shift;
- dq = umin(dq - sign, cf_max);
+ dq = umin(dq, cf_max + sign);
} else {
// cannot exceed cf_max, so we can avoid the clipping
tok = rc_tok >> 11;
- dq = ((ac_dq * tok) >> dq_shift) - sign;
+ dq = ((ac_dq * tok) >> dq_shift);
assert(dq <= cf_max);
}
cul_level += tok;
- cf[rc] = (coef) (dq ^ -sign);
+ cf[rc] = (coef) (sign ? -dq : dq);
rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob
} while (rc);