From 8e8fb84dcda63e83671a41235f2d71e726a2e716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 5 Feb 2020 10:17:59 +0200 Subject: arm: Use int16_t for the tmp intermediate buffer For 8bpc and 10bpc, int16_t is enough here, and for 12bpc, other intermediate int16_t buffers also need to be made of size coef anyway. --- src/arm/32/looprestoration.S | 8 ++++---- src/arm/64/looprestoration.S | 8 ++++---- src/arm/looprestoration_init_tmpl.c | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'src/arm') diff --git a/src/arm/32/looprestoration.S b/src/arm/32/looprestoration.S index 73f41d7..066f77a 100644 --- a/src/arm/32/looprestoration.S +++ b/src/arm/32/looprestoration.S @@ -1661,7 +1661,7 @@ endfunc #define FILTER_OUT_STRIDE 384 -// void dav1d_sgr_finish_filter1_neon(coef *tmp, +// void dav1d_sgr_finish_filter1_neon(int16_t *tmp, // const pixel *src, const ptrdiff_t stride, // const int32_t *a, const int16_t *b, // const int w, const int h); @@ -1765,7 +1765,7 @@ function sgr_finish_filter1_neon, export=1 pop {r4-r11,pc} endfunc -// void dav1d_sgr_finish_filter2_neon(coef *tmp, +// void dav1d_sgr_finish_filter2_neon(int16_t *tmp, // const pixel *src, const ptrdiff_t stride, // const int32_t *a, const int16_t *b, // const int w, const int h); @@ -1927,7 +1927,7 @@ endfunc // void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride, // const pixel *src, const ptrdiff_t src_stride, -// const coef *t1, const int w, const int h, +// const int16_t *t1, const int w, const int h, // const int wt); function sgr_weighted1_neon, export=1 push {r4-r9,lr} @@ -2011,7 +2011,7 @@ endfunc // void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t stride, // const pixel *src, const ptrdiff_t src_stride, -// const coef *t1, const coef *t2, +// const int16_t *t1, const int16_t *t2, // const int w, const int h, // const int16_t wt[2]); function sgr_weighted2_neon, export=1 diff --git a/src/arm/64/looprestoration.S b/src/arm/64/looprestoration.S index 656412b..c6a6ef7 100644 --- a/src/arm/64/looprestoration.S +++ b/src/arm/64/looprestoration.S @@ -1540,7 +1540,7 @@ endfunc #define FILTER_OUT_STRIDE 384 -// void dav1d_sgr_finish_filter1_neon(coef *tmp, +// void dav1d_sgr_finish_filter1_neon(int16_t *tmp, // const pixel *src, const ptrdiff_t stride, // const int32_t *a, const int16_t *b, // const int w, const int h); @@ -1657,7 +1657,7 @@ function sgr_finish_filter1_neon, export=1 ret endfunc -// void dav1d_sgr_finish_filter2_neon(coef *tmp, +// void dav1d_sgr_finish_filter2_neon(int16_t *tmp, // const pixel *src, const ptrdiff_t stride, // const int32_t *a, const int16_t *b, // const int w, const int h); @@ -1809,7 +1809,7 @@ endfunc // void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride, // const pixel *src, const ptrdiff_t src_stride, -// const coef *t1, const int w, const int h, +// const int16_t *t1, const int w, const int h, // const int wt); function sgr_weighted1_neon, export=1 dup v31.8h, w7 @@ -1889,7 +1889,7 @@ endfunc // void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t stride, // const pixel *src, const ptrdiff_t src_stride, -// const coef *t1, const coef *t2, +// const int16_t *t1, const int16_t *t2, // const int w, const int h, // const int16_t wt[2]); function sgr_weighted2_neon, export=1 diff --git a/src/arm/looprestoration_init_tmpl.c b/src/arm/looprestoration_init_tmpl.c index 6c2e8fd..ec0ea22 100644 --- a/src/arm/looprestoration_init_tmpl.c +++ b/src/arm/looprestoration_init_tmpl.c @@ -117,13 +117,13 @@ void dav1d_sgr_box3_v_neon(int32_t *sumsq, int16_t *sum, const enum LrEdgeFlags edges); void dav1d_sgr_calc_ab1_neon(int32_t *a, int16_t *b, const int w, const int h, const int strength); -void dav1d_sgr_finish_filter1_neon(coef *tmp, +void dav1d_sgr_finish_filter1_neon(int16_t *tmp, const pixel *src, const ptrdiff_t stride, const int32_t *a, const int16_t *b, const int w, const int h); /* filter with a 3x3 box (radius=1) */ -static void dav1d_sgr_filter1_neon(coef *tmp, +static void dav1d_sgr_filter1_neon(int16_t *tmp, const pixel *src, const ptrdiff_t stride, const pixel (*left)[4], const pixel *lpf, const ptrdiff_t lpf_stride, @@ -160,13 +160,13 @@ void dav1d_sgr_box5_v_neon(int32_t *sumsq, int16_t *sum, const enum LrEdgeFlags edges); void dav1d_sgr_calc_ab2_neon(int32_t *a, int16_t *b, const int w, const int h, const int strength); -void dav1d_sgr_finish_filter2_neon(coef *tmp, +void dav1d_sgr_finish_filter2_neon(int16_t *tmp, const pixel *src, const ptrdiff_t stride, const int32_t *a, const int16_t *b, const int w, const int h); /* filter with a 5x5 box (radius=2) */ -static void dav1d_sgr_filter2_neon(coef *tmp, +static void dav1d_sgr_filter2_neon(int16_t *tmp, const pixel *src, const ptrdiff_t stride, const pixel (*left)[4], const pixel *lpf, const ptrdiff_t lpf_stride, @@ -195,11 +195,11 @@ static void dav1d_sgr_filter2_neon(coef *tmp, void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride, const pixel *src, const ptrdiff_t src_stride, - const coef *t1, const int w, const int h, + const int16_t *t1, const int w, const int h, const int wt); void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t dst_stride, const pixel *src, const ptrdiff_t src_stride, - const coef *t1, const coef *t2, + const int16_t *t1, const int16_t *t2, const int w, const int h, const int16_t wt[2]); @@ -210,7 +210,7 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride, const int16_t sgr_wt[7], const enum LrEdgeFlags edges) { if (!dav1d_sgr_params[sgr_idx][0]) { - ALIGN_STK_16(coef, tmp, 64 * 384,); + ALIGN_STK_16(int16_t, tmp, 64 * 384,); dav1d_sgr_filter1_neon(tmp, dst, dst_stride, left, lpf, lpf_stride, w, h, dav1d_sgr_params[sgr_idx][3], edges); if (w >= 8) @@ -228,7 +228,7 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride, w & 7, h); } } else if (!dav1d_sgr_params[sgr_idx][1]) { - ALIGN_STK_16(coef, tmp, 64 * 384,); + ALIGN_STK_16(int16_t, tmp, 64 * 384,); dav1d_sgr_filter2_neon(tmp, dst, dst_stride, left, lpf, lpf_stride, w, h, dav1d_sgr_params[sgr_idx][2], edges); if (w >= 8) @@ -245,8 +245,8 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride, w & 7, h); } } else { - ALIGN_STK_16(coef, tmp1, 64 * 384,); - ALIGN_STK_16(coef, tmp2, 64 * 384,); + ALIGN_STK_16(int16_t, tmp1, 64 * 384,); + ALIGN_STK_16(int16_t, tmp2, 64 * 384,); dav1d_sgr_filter2_neon(tmp1, dst, dst_stride, left, lpf, lpf_stride, w, h, dav1d_sgr_params[sgr_idx][2], edges); dav1d_sgr_filter1_neon(tmp2, dst, dst_stride, left, lpf, lpf_stride, -- cgit v1.2.3