Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJanne Grunau <janne-vlc@jannau.net>2018-11-13 01:59:02 +0300
committerJanne Grunau <janne-vlc@jannau.net>2018-11-20 20:36:23 +0300
commit5098b23ab3a98dc2202449dd3bd0c06902c4ff64 (patch)
tree676dabb88aada0059f7cd8116c989754c8923aa2 /src/itx_tmpl.c
parentc627f16f5ae671c4583b6c85006ce648224814e3 (diff)
itx: clip according to spec, fixes #103, #158
This does not adjust the AVX2 asm. The asm clips in many places to the required range (16-bit signed) for performance reason. No mismatch observed with coefs generated by the forward transform in checkasm in 10 thousand runs.
Diffstat (limited to 'src/itx_tmpl.c')
-rw-r--r--src/itx_tmpl.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/src/itx_tmpl.c b/src/itx_tmpl.c
index 0fb37f1..1d35146 100644
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -40,7 +40,7 @@
#include "src/itx_1d.c"
typedef void (*itx_1d_fn)(const coef *in, ptrdiff_t in_s,
- coef *out, ptrdiff_t out_s);
+ coef *out, ptrdiff_t out_s, const int range);
static void NOINLINE
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
@@ -54,6 +54,9 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int is_rect2 = w * 2 == h || h * 2 == w;
+ const int row_clip_max = (1 << (BITDEPTH + 8 - 1)) - 1;
+ const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
+ const int col_clip_min = -col_clip_max - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
const int rnd1 = (1 << shift1) >> 1;
@@ -64,18 +67,19 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
if (is_rect2)
in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
}
- first_1d_fn(in_mem, 1, &tmp[i * w], 1);
+ first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
} else {
- first_1d_fn(&coeff[i], sh, &tmp[i * w], 1);
+ first_1d_fn(&coeff[i], sh, &tmp[i * w], 1, row_clip_max);
}
for (j = 0; j < w; j++)
- tmp[i * w + j] = (tmp[i * w + j] + (rnd1)) >> shift1;
+ tmp[i * w + j] = iclip((tmp[i * w + j] + (rnd1)) >> shift1,
+ col_clip_min, col_clip_max);
}
if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp));
const int rnd2 = (1 << shift2) >> 1;
for (i = 0; i < w; i++) {
- second_1d_fn(&tmp[i], w, out, 1);
+ second_1d_fn(&tmp[i], w, out, 1, col_clip_max);
for (j = 0; j < h; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] +
@@ -145,15 +149,18 @@ inv_txfm_fn64(64, 64, 2, 4)
static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob)
{
- int i, j;
+ const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
+ const int col_clip_min = -col_clip_max - 1;
coef tmp[4 * 4], out[4];
- for (i = 0; i < 4; i++)
+ for (int i = 0; i < 4; i++)
inv_wht4_1d(&coeff[i], 4, &tmp[i * 4], 1, 0);
+ for (int k = 0; k < 4 * 4; k++)
+ tmp[k] = iclip(tmp[k], col_clip_min, col_clip_max);
- for (i = 0; i < 4; i++) {
+ for (int i = 0; i < 4; i++) {
inv_wht4_1d(&tmp[i], 4, out, 1, 1);
- for (j = 0; j < 4; j++)
+ for (int j = 0; j < 4; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] + out[j]);
}