From 8e8fb84dcda63e83671a41235f2d71e726a2e716 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 5 Feb 2020 10:17:59 +0200
Subject: arm: Use int16_t for the tmp intermediate buffer

For 8bpc and 10bpc, int16_t is enough here, and for 12bpc, other
intermediate int16_t buffers also need to be made of size coef anyway.
---
 src/arm/32/looprestoration.S        |  8 ++++----
 src/arm/64/looprestoration.S        |  8 ++++----
 src/arm/looprestoration_init_tmpl.c | 20 ++++++++++----------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'src/arm')

diff --git a/src/arm/32/looprestoration.S b/src/arm/32/looprestoration.S
index 73f41d7..066f77a 100644
--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -1661,7 +1661,7 @@ endfunc
 
 #define FILTER_OUT_STRIDE 384
 
-// void dav1d_sgr_finish_filter1_neon(coef *tmp,
+// void dav1d_sgr_finish_filter1_neon(int16_t *tmp,
 //                                    const pixel *src, const ptrdiff_t stride,
 //                                    const int32_t *a, const int16_t *b,
 //                                    const int w, const int h);
@@ -1765,7 +1765,7 @@ function sgr_finish_filter1_neon, export=1
         pop             {r4-r11,pc}
 endfunc
 
-// void dav1d_sgr_finish_filter2_neon(coef *tmp,
+// void dav1d_sgr_finish_filter2_neon(int16_t *tmp,
 //                                    const pixel *src, const ptrdiff_t stride,
 //                                    const int32_t *a, const int16_t *b,
 //                                    const int w, const int h);
@@ -1927,7 +1927,7 @@ endfunc
 
 // void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride,
 //                               const pixel *src, const ptrdiff_t src_stride,
-//                               const coef *t1, const int w, const int h,
+//                               const int16_t *t1, const int w, const int h,
 //                               const int wt);
 function sgr_weighted1_neon, export=1
         push            {r4-r9,lr}
@@ -2011,7 +2011,7 @@ endfunc
 
 // void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t stride,
 //                               const pixel *src, const ptrdiff_t src_stride,
-//                               const coef *t1, const coef *t2,
+//                               const int16_t *t1, const int16_t *t2,
 //                               const int w, const int h,
 //                               const int16_t wt[2]);
 function sgr_weighted2_neon, export=1
diff --git a/src/arm/64/looprestoration.S b/src/arm/64/looprestoration.S
index 656412b..c6a6ef7 100644
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -1540,7 +1540,7 @@ endfunc
 
 #define FILTER_OUT_STRIDE 384
 
-// void dav1d_sgr_finish_filter1_neon(coef *tmp,
+// void dav1d_sgr_finish_filter1_neon(int16_t *tmp,
 //                                    const pixel *src, const ptrdiff_t stride,
 //                                    const int32_t *a, const int16_t *b,
 //                                    const int w, const int h);
@@ -1657,7 +1657,7 @@ function sgr_finish_filter1_neon, export=1
         ret
 endfunc
 
-// void dav1d_sgr_finish_filter2_neon(coef *tmp,
+// void dav1d_sgr_finish_filter2_neon(int16_t *tmp,
 //                                    const pixel *src, const ptrdiff_t stride,
 //                                    const int32_t *a, const int16_t *b,
 //                                    const int w, const int h);
@@ -1809,7 +1809,7 @@ endfunc
 
 // void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride,
 //                               const pixel *src, const ptrdiff_t src_stride,
-//                               const coef *t1, const int w, const int h,
+//                               const int16_t *t1, const int w, const int h,
 //                               const int wt);
 function sgr_weighted1_neon, export=1
         dup             v31.8h, w7
@@ -1889,7 +1889,7 @@ endfunc
 
 // void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t stride,
 //                               const pixel *src, const ptrdiff_t src_stride,
-//                               const coef *t1, const coef *t2,
+//                               const int16_t *t1, const int16_t *t2,
 //                               const int w, const int h,
 //                               const int16_t wt[2]);
 function sgr_weighted2_neon, export=1
diff --git a/src/arm/looprestoration_init_tmpl.c b/src/arm/looprestoration_init_tmpl.c
index 6c2e8fd..ec0ea22 100644
--- a/src/arm/looprestoration_init_tmpl.c
+++ b/src/arm/looprestoration_init_tmpl.c
@@ -117,13 +117,13 @@ void dav1d_sgr_box3_v_neon(int32_t *sumsq, int16_t *sum,
                            const enum LrEdgeFlags edges);
 void dav1d_sgr_calc_ab1_neon(int32_t *a, int16_t *b,
                              const int w, const int h, const int strength);
-void dav1d_sgr_finish_filter1_neon(coef *tmp,
+void dav1d_sgr_finish_filter1_neon(int16_t *tmp,
                                    const pixel *src, const ptrdiff_t stride,
                                    const int32_t *a, const int16_t *b,
                                    const int w, const int h);
 
 /* filter with a 3x3 box (radius=1) */
-static void dav1d_sgr_filter1_neon(coef *tmp,
+static void dav1d_sgr_filter1_neon(int16_t *tmp,
                                    const pixel *src, const ptrdiff_t stride,
                                    const pixel (*left)[4],
                                    const pixel *lpf, const ptrdiff_t lpf_stride,
@@ -160,13 +160,13 @@ void dav1d_sgr_box5_v_neon(int32_t *sumsq, int16_t *sum,
                            const enum LrEdgeFlags edges);
 void dav1d_sgr_calc_ab2_neon(int32_t *a, int16_t *b,
                              const int w, const int h, const int strength);
-void dav1d_sgr_finish_filter2_neon(coef *tmp,
+void dav1d_sgr_finish_filter2_neon(int16_t *tmp,
                                    const pixel *src, const ptrdiff_t stride,
                                    const int32_t *a, const int16_t *b,
                                    const int w, const int h);
 
 /* filter with a 5x5 box (radius=2) */
-static void dav1d_sgr_filter2_neon(coef *tmp,
+static void dav1d_sgr_filter2_neon(int16_t *tmp,
                                    const pixel *src, const ptrdiff_t stride,
                                    const pixel (*left)[4],
                                    const pixel *lpf, const ptrdiff_t lpf_stride,
@@ -195,11 +195,11 @@ static void dav1d_sgr_filter2_neon(coef *tmp,
 
 void dav1d_sgr_weighted1_neon(pixel *dst, const ptrdiff_t dst_stride,
                               const pixel *src, const ptrdiff_t src_stride,
-                              const coef *t1, const int w, const int h,
+                              const int16_t *t1, const int w, const int h,
                               const int wt);
 void dav1d_sgr_weighted2_neon(pixel *dst, const ptrdiff_t dst_stride,
                               const pixel *src, const ptrdiff_t src_stride,
-                              const coef *t1, const coef *t2,
+                              const int16_t *t1, const int16_t *t2,
                               const int w, const int h,
                               const int16_t wt[2]);
 
@@ -210,7 +210,7 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
                              const int16_t sgr_wt[7], const enum LrEdgeFlags edges)
 {
     if (!dav1d_sgr_params[sgr_idx][0]) {
-        ALIGN_STK_16(coef, tmp, 64 * 384,);
+        ALIGN_STK_16(int16_t, tmp, 64 * 384,);
         dav1d_sgr_filter1_neon(tmp, dst, dst_stride, left, lpf, lpf_stride,
                                w, h, dav1d_sgr_params[sgr_idx][3], edges);
         if (w >= 8)
@@ -228,7 +228,7 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
                                         w & 7, h);
         }
     } else if (!dav1d_sgr_params[sgr_idx][1]) {
-        ALIGN_STK_16(coef, tmp, 64 * 384,);
+        ALIGN_STK_16(int16_t, tmp, 64 * 384,);
         dav1d_sgr_filter2_neon(tmp, dst, dst_stride, left, lpf, lpf_stride,
                                w, h, dav1d_sgr_params[sgr_idx][2], edges);
         if (w >= 8)
@@ -245,8 +245,8 @@ static void sgr_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
                                         w & 7, h);
         }
     } else {
-        ALIGN_STK_16(coef, tmp1, 64 * 384,);
-        ALIGN_STK_16(coef, tmp2, 64 * 384,);
+        ALIGN_STK_16(int16_t, tmp1, 64 * 384,);
+        ALIGN_STK_16(int16_t, tmp2, 64 * 384,);
         dav1d_sgr_filter2_neon(tmp1, dst, dst_stride, left, lpf, lpf_stride,
                                w, h, dav1d_sgr_params[sgr_idx][2], edges);
         dav1d_sgr_filter1_neon(tmp2, dst, dst_stride, left, lpf, lpf_stride,
-- 
cgit v1.2.3