Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-11-03 09:46:38 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-11-03 09:48:38 +0300
commit1ada7d4d6f838dc0842fc89159747755c516ce24 (patch)
treef3c55be79cd56c8c8ea6b19bff30c88d8bfb0367
parent166a6c8e49fe1335feae6ffc450325f7f5f628c6 (diff)
Vectorizing sgemv for multiples of 4 with SSE
-rw-r--r--dnn/vec_avx.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/dnn/vec_avx.h b/dnn/vec_avx.h
index 767d7e19..a1d6cad2 100644
--- a/dnn/vec_avx.h
+++ b/dnn/vec_avx.h
@@ -709,6 +709,23 @@ static inline void sgemv(float *out, const float *weights, int rows, int cols, i
}
_mm256_storeu_ps (&y[0], vy0);
}
+ for (;i<rows-3;i+=4)
+ {
+ float *y;
+ __m128 vy0;
+ y = &out[i];
+ vy0 = _mm_setzero_ps();
+ for (j=0;j<cols;j++)
+ {
+ __m128 vxj;
+ __m128 vw;
+ vxj = _mm_broadcast_ss(&x[j]);
+
+ vw = _mm_loadu_ps(&weights[j*col_stride + i]);
+ vy0 = _mm_fmadd_ps(vw, vxj, vy0);
+ }
+ _mm_storeu_ps (&y[0], vy0);
+ }
for (;i<rows;i++)
{
out[i] = 0;