diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-26 11:36:46 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-26 11:36:46 +0300 |
commit | cc11c078cd8e1baf642ef0f1d2deaa98af596581 (patch) | |
tree | ef0292171b08048eff9581ee58e1fe24d8df0b5d | |
parent | c9af8f80f7976a7694c710f1426d816a67364a56 (diff) |
First step towards DNN optimization for ARMv7 Neon
Still missing some intrinsics
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | dnn/vec_neon.h | 7 |
2 files changed, 8 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am index f99e7c31..1b772446 100644 --- a/Makefile.am +++ b/Makefile.am @@ -450,6 +450,7 @@ endif if HAVE_ARM_NEON_INTR ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \ $(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \ + $(DNN_SOURCES_NEON:.c=.lo) \ $(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo) $(ARM_NEON_INTR_OBJ): CFLAGS += \ $(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS) diff --git a/dnn/vec_neon.h b/dnn/vec_neon.h index e9959b97..18e4b3a4 100644 --- a/dnn/vec_neon.h +++ b/dnn/vec_neon.h @@ -34,6 +34,13 @@ #include <arm_neon.h> #include "os_support.h" +#if defined(__arm__) && !defined(__aarch64__) +/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */ +static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) { + return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8); +} +#endif + #ifndef LPCNET_TEST static inline float32x4_t exp4_approx(float32x4_t x) { int32x4_t i; |