Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-11-23 03:05:52 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-11-25 11:15:51 +0300
commit5c3795b2879108f897d465f36885408d5325b77b (patch)
treef62640891900a80083e7fea65c8a90392e2dcfdf
parent984f35b313d57280e3e1b108ba3418e7e6232e22 (diff)
Adding dotprod instruction to ARM rtcd
Used for DNN matrix multiplies
-rw-r--r--CMakeLists.txt1
-rw-r--r--Makefile.am17
-rw-r--r--celt/arm/arm_celt_map.c31
-rw-r--r--celt/arm/armcpu.c21
-rw-r--r--celt/arm/armcpu.h13
-rw-r--r--celt/cpu_support.h5
-rw-r--r--cmake/OpusSources.cmake2
-rw-r--r--configure.ac57
-rw-r--r--dnn/arm/arm_dnn_map.c54
-rw-r--r--dnn/arm/dnn_arm.h64
-rw-r--r--dnn/arm/nnet_dotprod.c38
-rw-r--r--dnn/arm/nnet_neon.c38
-rw-r--r--dnn/nnet.h4
-rw-r--r--lpcnet_headers.mk3
-rw-r--r--lpcnet_sources.mk4
15 files changed, 338 insertions, 14 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c02a2cc4..073d7de8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -524,6 +524,7 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
+ add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
# silk arm neon depends on main_Fix.h
target_include_directories(opus PRIVATE silk/fixed)
diff --git a/Makefile.am b/Makefile.am
index fabd1a3e..f99e7c31 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -85,6 +85,18 @@ if CPU_ARM
if HAVE_RTCD
CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD)
SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD)
+endif
+endif
+
+if ENABLE_DEEP_PLC
+if HAVE_ARM_DOTPROD
+LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD)
+endif
+if HAVE_ARM_NEON_INTR
+LPCNET_SOURCES += $(DNN_SOURCES_NEON)
+endif
endif
if HAVE_ARM_NEON_INTR
@@ -442,3 +454,8 @@ ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
$(ARM_NEON_INTR_OBJ): CFLAGS += \
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
endif
+
+if HAVE_ARM_DOTPROD
+ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo)
+$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS)
+endif
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index ca988b66..cbaea495 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
- celt_inner_prod_neon /* NEON */
+ celt_inner_prod_neon,/* NEON */
+ celt_inner_prod_neon /* DOTPROD */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
- dual_inner_prod_neon /* NEON */
+ dual_inner_prod_neon,/* NEON */
+ dual_inner_prod_neon /* DOTPROD */
};
# endif
@@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
- MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */
};
# endif
@@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
- celt_pitch_xcorr_float_neon /* Neon */
+ celt_pitch_xcorr_float_neon, /* Neon */
+ celt_pitch_xcorr_float_neon /* DOTPROD */
};
# endif
# endif /* FIXED_POINT */
@@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
+ xcorr_kernel_neon_fixed /* DOTPROD */
};
#endif
@@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
- opus_fft_alloc_arm_neon /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon, /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
- opus_fft_free_arm_neon /* Neon with NE10 */
+ opus_fft_free_arm_neon, /* Neon with NE10 */
+ opus_fft_free_arm_neon /* DOTPROD with NE10 */
};
# endif /* CUSTOM_MODES */
@@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
- opus_fft_neon /* Neon with NE10 */
+ opus_fft_neon, /* Neon with NE10 */
+ opus_fft_neon /* DOTPROD with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
- opus_ifft_neon /* Neon with NE10 */
+ opus_ifft_neon, /* Neon with NE10 */
+ opus_ifft_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
- clt_mdct_forward_neon /* Neon with NE10 */
+ clt_mdct_forward_neon, /* Neon with NE10 */
+ clt_mdct_forward_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
- clt_mdct_backward_neon /* Neon with NE10 */
+ clt_mdct_backward_neon, /* Neon with NE10 */
+ clt_mdct_backward_neon /* DOTPROD with NE10 */
};
# endif /* HAVE_ARM_NE10 */
diff --git a/celt/arm/armcpu.c b/celt/arm/armcpu.c
index c7d16e6d..6fbfb904 100644
--- a/celt/arm/armcpu.c
+++ b/celt/arm/armcpu.c
@@ -43,6 +43,7 @@
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
+#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@@ -127,6 +128,11 @@ opus_uint32 opus_cpu_capabilities(void)
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+ p = strstr(buf, " asimddp");
+ if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
}
# endif
@@ -144,6 +150,13 @@ opus_uint32 opus_cpu_capabilities(void)
# endif
}
+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+ flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+
fclose(cpuinfo);
}
return flags;
@@ -180,7 +193,13 @@ static int opus_select_arch_impl(void)
}
arch++;
- celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
+ celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ return arch;
+ }
+ arch++;
+
+ celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
return arch;
}
diff --git a/celt/arm/armcpu.h b/celt/arm/armcpu.h
index 820262ff..6d5803d8 100644
--- a/celt/arm/armcpu.h
+++ b/celt/arm/armcpu.h
@@ -46,6 +46,12 @@
# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+# define MAY_HAVE_DOTPROD(name) name ## _dotprod
+# else
+# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
+# endif
+
# if defined(OPUS_ARM_PRESUME_EDSP)
# define PRESUME_EDSP(name) name ## _edsp
# else
@@ -64,6 +70,12 @@
# define PRESUME_NEON(name) PRESUME_MEDIA(name)
# endif
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+# define PRESUME_DOTPROD(name) name ## _dotprod
+# else
+# define PRESUME_DOTPROD(name) PRESUME_NEON(name)
+# endif
+
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
@@ -71,6 +83,7 @@ int opus_select_arch(void);
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
+#define OPUS_ARCH_ARM_DOTPROD (4)
# endif
diff --git a/celt/cpu_support.h b/celt/cpu_support.h
index fdd9fb64..9f13d8ae 100644
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -35,13 +35,14 @@
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
-/* We currently support 4 ARM variants:
+/* We currently support 5 ARM variants:
* arch[0] -> ARMv4
* arch[1] -> ARMv5E
* arch[2] -> ARMv6
* arch[3] -> NEON
+ * arch[4] -> NEON+DOTPROD
*/
-#define OPUS_ARCHMASK 3
+#define OPUS_ARCHMASK 7
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake
index a0259373..74e4eaed 100644
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@@ -48,6 +48,8 @@ get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2)
+get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon)
+get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod)
get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources)
get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources)
diff --git a/configure.ac b/configure.ac
index aa63f841..ed676012 100644
--- a/configure.ac
+++ b/configure.ac
@@ -334,6 +334,18 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
)
])
AC_SUBST(OPUS_ARM_MAY_HAVE_NEON)
+ AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[
+ AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1,
+ [Define if compiler supports DOTPROD instructions])
+ AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [
+ AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1,
+ [Define if binary requires DOTPROD instruction support])
+ asm_optimization="$asm_optimization (DOTPROD)"
+ ],
+ [rtcd_support="$rtcd_support (DOTPROD)"]
+ )
+ ])
+ AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD)
dnl Make sure turning on RTCD gets us at least one
dnl instruction set.
AS_IF([test x"$rtcd_support" != x""],
@@ -377,6 +389,7 @@ m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
+m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
# this option will break things.
@@ -394,12 +407,14 @@ AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@d
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
+AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
+AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10],
[
@@ -543,6 +558,46 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
intrinsics_support="$intrinsics_support (NEON [Aarch64])"
])
+ OPUS_CHECK_INTRINSICS(
+ [Aarch64 dotprod],
+ [$ARM_DOTPROD_INTR_CFLAGS],
+ [OPUS_ARM_MAY_HAVE_DOTPROD],
+ [OPUS_ARM_PRESUME_DOTPROD],
+ [[#include <arm_neon.h>
+ ]],
+ [[
+ static int32x4_t acc;
+ static int8x16_t a, b;
+ acc = vdotq_s32(acc, a, b);
+ ]]
+ )
+ AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+ [
+ OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS"
+ AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS])
+ ]
+ )
+
+ AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],
+ [
+ AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics])
+ intrinsics_support="$intrinsics_support (DOTPROD)"
+
+ AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"],
+ [
+ AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics])
+ intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])"
+ ])
+
+ AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+ [AS_IF([test x"$rtcd_support" = x"no"],
+ [rtcd_support="ARM (DOTPROD Intrinsics)"],
+ [rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])])
+
+ ]
+ )
+
+
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="ARM$intrinsics_support"])
@@ -760,6 +815,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
])
AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
+AM_CONDITIONAL([HAVE_ARM_DOTPROD],
+ [test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"])
AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
[test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
AM_CONDITIONAL([HAVE_ARM_NE10],
diff --git a/dnn/arm/arm_dnn_map.c b/dnn/arm/arm_dnn_map.c
new file mode 100644
index 00000000..e7ffecaf
--- /dev/null
+++ b/dnn/arm/arm_dnn_map.c
@@ -0,0 +1,54 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arm/armcpu.h"
+#include "nnet.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
+
+void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+) = {
+ compute_linear_c, /* default */
+ compute_linear_c,
+ compute_linear_c,
+ MAY_HAVE_NEON(compute_linear), /* neon */
+ MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
+};
+
+#endif
+
+
+#endif
diff --git a/dnn/arm/dnn_arm.h b/dnn/arm/dnn_arm.h
new file mode 100644
index 00000000..91ca2b53
--- /dev/null
+++ b/dnn/arm/dnn_arm.h
@@ -0,0 +1,64 @@
+/* Copyright (c) 2011-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DNN_ARM_H
+#define DNN_ARM_H
+
+#include "cpu_support.h"
+#include "opus_types.h"
+
+void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
+void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
+
+
+#if defined(OPUS_ARM_PRESUME_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
+
+#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+ );
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) \
+ ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
+
+
+#endif
+
+
+
+#endif /* DNN_ARM_H */
diff --git a/dnn/arm/nnet_dotprod.c b/dnn/arm/nnet_dotprod.c
new file mode 100644
index 00000000..1354ed33
--- /dev/null
+++ b/dnn/arm/nnet_dotprod.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __ARM_FEATURE_DOTPROD
+#error nnet_dotprod.c is being compiled without DOTPROD enabled
+#endif
+
+#define RTCD_ARCH dotprod
+
+#include "nnet_arch.h"
diff --git a/dnn/arm/nnet_neon.c b/dnn/arm/nnet_neon.c
new file mode 100644
index 00000000..fb636f85
--- /dev/null
+++ b/dnn/arm/nnet_neon.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
+#error nnet_neon.c is being compiled without Neon enabled
+#endif
+
+#define RTCD_ARCH neon
+
+#include "nnet_arch.h"
diff --git a/dnn/nnet.h b/dnn/nnet.h
index 4a42beca..a2eaad82 100644
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -191,6 +191,10 @@ void compute_activation_c(float *output, const float *input, int N, int activati
void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/dnn_arm.h"
+#endif
+
#if defined(OPUS_X86_MAY_HAVE_SSE2)
#include "x86/dnn_x86.h"
#endif
diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk
index d3aa1516..da610ca1 100644
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -14,7 +14,8 @@ dnn/vec_neon.h \
dnn/pitchdnn.h \
dnn/pitchdnn_data.h \
dnn/x86/dnn_x86.h \
-dnn/nnet_arch.h
+dnn/nnet_arch.h \
+dnn/arm/dnn_arm.h
DRED_HEAD = \
silk/dred_coding.h \
diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk
index ee3d79fd..9b8863ad 100644
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -27,3 +27,7 @@ DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c
+
+DNN_SOURCES_ARM_RTCD = dnn/arm/arm_dnn_map.c
+DNN_SOURCES_DOTPROD = dnn/arm/nnet_dotprod.c
+DNN_SOURCES_NEON = dnn/arm/nnet_neon.c