diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-23 03:05:52 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-25 11:15:51 +0300 |
commit | 5c3795b2879108f897d465f36885408d5325b77b (patch) | |
tree | f62640891900a80083e7fea65c8a90392e2dcfdf | |
parent | 984f35b313d57280e3e1b108ba3418e7e6232e22 (diff) |
Adding dotprod instruction to ARM rtcd
Used for DNN matrix multiplies
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | Makefile.am | 17 | ||||
-rw-r--r-- | celt/arm/arm_celt_map.c | 31 | ||||
-rw-r--r-- | celt/arm/armcpu.c | 21 | ||||
-rw-r--r-- | celt/arm/armcpu.h | 13 | ||||
-rw-r--r-- | celt/cpu_support.h | 5 | ||||
-rw-r--r-- | cmake/OpusSources.cmake | 2 | ||||
-rw-r--r-- | configure.ac | 57 | ||||
-rw-r--r-- | dnn/arm/arm_dnn_map.c | 54 | ||||
-rw-r--r-- | dnn/arm/dnn_arm.h | 64 | ||||
-rw-r--r-- | dnn/arm/nnet_dotprod.c | 38 | ||||
-rw-r--r-- | dnn/arm/nnet_neon.c | 38 | ||||
-rw-r--r-- | dnn/nnet.h | 4 | ||||
-rw-r--r-- | lpcnet_headers.mk | 3 | ||||
-rw-r--r-- | lpcnet_sources.mk | 4 |
15 files changed, 338 insertions, 14 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index c02a2cc4..073d7de8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -524,6 +524,7 @@ if(NOT OPUS_DISABLE_INTRINSICS) add_sources_group(opus celt ${celt_sources_arm_neon_intr}) add_sources_group(opus silk ${silk_sources_arm_neon_intr}) + add_sources_group(opus lpcnet ${dnn_sources_arm_neon}) # silk arm neon depends on main_Fix.h target_include_directories(opus PRIVATE silk/fixed) diff --git a/Makefile.am b/Makefile.am index fabd1a3e..f99e7c31 100644 --- a/Makefile.am +++ b/Makefile.am @@ -85,6 +85,18 @@ if CPU_ARM if HAVE_RTCD CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD) SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD) +if ENABLE_DEEP_PLC +LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD) +endif +endif + +if ENABLE_DEEP_PLC +if HAVE_ARM_DOTPROD +LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD) +endif +if HAVE_ARM_NEON_INTR +LPCNET_SOURCES += $(DNN_SOURCES_NEON) +endif endif if HAVE_ARM_NEON_INTR @@ -442,3 +454,8 @@ ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \ $(ARM_NEON_INTR_OBJ): CFLAGS += \ $(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS) endif + +if HAVE_ARM_DOTPROD +ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo) +$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS) +endif diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c index ca988b66..cbaea495 100644 --- a/celt/arm/arm_celt_map.c +++ b/celt/arm/arm_celt_map.c @@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c celt_inner_prod_c, /* ARMv4 */ celt_inner_prod_c, /* EDSP */ celt_inner_prod_c, /* Media */ - celt_inner_prod_neon /* NEON */ + celt_inner_prod_neon,/* NEON */ + celt_inner_prod_neon /* DOTPROD */ }; void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, @@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o dual_inner_prod_c, /* ARMv4 */ dual_inner_prod_c, /* EDSP */ dual_inner_prod_c, /* Media */ - dual_inner_prod_neon /* NEON */ + dual_inner_prod_neon,/* NEON */ + dual_inner_prod_neon /* DOTPROD */ }; # endif @@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, celt_pitch_xcorr_c, /* ARMv4 */ MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ - MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ + MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */ + MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */ }; # endif @@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, celt_pitch_xcorr_c, /* ARMv4 */ celt_pitch_xcorr_c, /* EDSP */ celt_pitch_xcorr_c, /* Media */ - celt_pitch_xcorr_float_neon /* Neon */ + celt_pitch_xcorr_float_neon, /* Neon */ + celt_pitch_xcorr_float_neon /* DOTPROD */ }; # endif # endif /* FIXED_POINT */ @@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( xcorr_kernel_c, /* EDSP */ xcorr_kernel_c, /* Media */ xcorr_kernel_neon_fixed, /* Neon */ + xcorr_kernel_neon_fixed /* DOTPROD */ }; #endif @@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { opus_fft_alloc_arch_c, /* ARMv4 */ opus_fft_alloc_arch_c, /* EDSP */ opus_fft_alloc_arch_c, /* Media */ - opus_fft_alloc_arm_neon /* Neon with NE10 library support */ + opus_fft_alloc_arm_neon, /* Neon with NE10 library support */ + opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */ }; void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { opus_fft_free_arch_c, /* ARMv4 */ opus_fft_free_arch_c, /* EDSP */ opus_fft_free_arch_c, /* Media */ - opus_fft_free_arm_neon /* Neon with NE10 */ + opus_fft_free_arm_neon, /* Neon with NE10 */ + opus_fft_free_arm_neon /* DOTPROD with NE10 */ }; # endif /* CUSTOM_MODES */ @@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, opus_fft_c, /* ARMv4 */ opus_fft_c, /* EDSP */ opus_fft_c, /* Media */ - opus_fft_neon /* Neon with NE10 */ + opus_fft_neon, /* Neon with NE10 */ + opus_fft_neon /* DOTPROD with NE10 */ }; void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, @@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, opus_ifft_c, /* ARMv4 */ opus_ifft_c, /* EDSP */ opus_ifft_c, /* Media */ - opus_ifft_neon /* Neon with NE10 */ + opus_ifft_neon, /* Neon with NE10 */ + opus_ifft_neon /* DOTPROD with NE10 */ }; void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, @@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, clt_mdct_forward_c, /* ARMv4 */ clt_mdct_forward_c, /* EDSP */ clt_mdct_forward_c, /* Media */ - clt_mdct_forward_neon /* Neon with NE10 */ + clt_mdct_forward_neon, /* Neon with NE10 */ + clt_mdct_forward_neon /* DOTPROD with NE10 */ }; void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, @@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, clt_mdct_backward_c, /* ARMv4 */ clt_mdct_backward_c, /* EDSP */ clt_mdct_backward_c, /* Media */ - clt_mdct_backward_neon /* Neon with NE10 */ + clt_mdct_backward_neon, /* Neon with NE10 */ + clt_mdct_backward_neon /* DOTPROD with NE10 */ }; # endif /* HAVE_ARM_NE10 */ diff --git a/celt/arm/armcpu.c b/celt/arm/armcpu.c index c7d16e6d..6fbfb904 100644 --- a/celt/arm/armcpu.c +++ b/celt/arm/armcpu.c @@ -43,6 +43,7 @@ #define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP) #define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA) #define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON) +#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD) #if defined(_MSC_VER) /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ @@ -127,6 +128,11 @@ opus_uint32 opus_cpu_capabilities(void) if(p != NULL && (p[5] == ' ' || p[5] == '\n')) flags |= OPUS_CPU_ARM_NEON_FLAG; # endif +# if defined(OPUS_ARM_MAY_HAVE_DOTPROD) + p = strstr(buf, " asimddp"); + if(p != NULL && (p[8] == ' ' || p[8] == '\n')) + flags |= OPUS_CPU_ARM_DOTPROD_FLAG; +# endif } # endif @@ -144,6 +150,13 @@ opus_uint32 opus_cpu_capabilities(void) # endif } +#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR) + flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG; +# if defined(OPUS_ARM_PRESUME_DOTPROD) + flags |= OPUS_CPU_ARM_DOTPROD_FLAG; +# endif +#endif + fclose(cpuinfo); } return flags; @@ -180,7 +193,13 @@ static int opus_select_arch_impl(void) } arch++; - celt_assert(arch == OPUS_ARCH_ARM_NEON); + if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) { + celt_assert(arch == OPUS_ARCH_ARM_NEON); + return arch; + } + arch++; + + celt_assert(arch == OPUS_ARCH_ARM_DOTPROD); return arch; } diff --git a/celt/arm/armcpu.h b/celt/arm/armcpu.h index 820262ff..6d5803d8 100644 --- a/celt/arm/armcpu.h +++ b/celt/arm/armcpu.h @@ -46,6 +46,12 @@ # define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) # endif +# if defined(OPUS_ARM_MAY_HAVE_DOTPROD) +# define MAY_HAVE_DOTPROD(name) name ## _dotprod +# else +# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name) +# endif + # if defined(OPUS_ARM_PRESUME_EDSP) # define PRESUME_EDSP(name) name ## _edsp # else @@ -64,6 +70,12 @@ # define PRESUME_NEON(name) PRESUME_MEDIA(name) # endif +# if defined(OPUS_ARM_PRESUME_DOTPROD) +# define PRESUME_DOTPROD(name) name ## _dotprod +# else +# define PRESUME_DOTPROD(name) PRESUME_NEON(name) +# endif + # if defined(OPUS_HAVE_RTCD) int opus_select_arch(void); @@ -71,6 +83,7 @@ int opus_select_arch(void); #define OPUS_ARCH_ARM_EDSP (1) #define OPUS_ARCH_ARM_MEDIA (2) #define OPUS_ARCH_ARM_NEON (3) +#define OPUS_ARCH_ARM_DOTPROD (4) # endif diff --git a/celt/cpu_support.h b/celt/cpu_support.h index fdd9fb64..9f13d8ae 100644 --- a/celt/cpu_support.h +++ b/celt/cpu_support.h @@ -35,13 +35,14 @@ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) #include "arm/armcpu.h" -/* We currently support 4 ARM variants: +/* We currently support 5 ARM variants: * arch[0] -> ARMv4 * arch[1] -> ARMv5E * arch[2] -> ARMv6 * arch[3] -> NEON + * arch[4] -> NEON+DOTPROD */ -#define OPUS_ARCHMASK 3 +#define OPUS_ARCHMASK 7 #elif defined(OPUS_HAVE_RTCD) && \ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake index a0259373..74e4eaed 100644 --- a/cmake/OpusSources.cmake +++ b/cmake/OpusSources.cmake @@ -48,6 +48,8 @@ get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd) get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2) get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1) get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2) +get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon) +get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod) get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources) get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources) diff --git a/configure.ac b/configure.ac index aa63f841..ed676012 100644 --- a/configure.ac +++ b/configure.ac @@ -334,6 +334,18 @@ AS_IF([test x"${enable_asm}" = x"yes"],[ ) ]) AC_SUBST(OPUS_ARM_MAY_HAVE_NEON) + AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[ + AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1, + [Define if compiler supports DOTPROD instructions]) + AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [ + AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1, + [Define if binary requires DOTPROD instruction support]) + asm_optimization="$asm_optimization (DOTPROD)" + ], + [rtcd_support="$rtcd_support (DOTPROD)"] + ) + ]) + AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD) dnl Make sure turning on RTCD gets us at least one dnl instruction set. AS_IF([test x"$rtcd_support" != x""], @@ -377,6 +389,7 @@ m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2]) m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1]) m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2]) m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon]) +m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"]) # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify # -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu), # this option will break things. @@ -394,12 +407,14 @@ AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@d AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@]) AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@]) AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@]) +AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@]) AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")]) AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")]) AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) +AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])]) AC_DEFUN([OPUS_PATH_NE10], [ @@ -543,6 +558,46 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ intrinsics_support="$intrinsics_support (NEON [Aarch64])" ]) + OPUS_CHECK_INTRINSICS( + [Aarch64 dotprod], + [$ARM_DOTPROD_INTR_CFLAGS], + [OPUS_ARM_MAY_HAVE_DOTPROD], + [OPUS_ARM_PRESUME_DOTPROD], + [[#include <arm_neon.h> + ]], + [[ + static int32x4_t acc; + static int8x16_t a, b; + acc = vdotq_s32(acc, a, b); + ]] + ) + AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"], + [ + OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS" + AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS]) + ] + ) + + AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"], + [ + AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics]) + intrinsics_support="$intrinsics_support (DOTPROD)" + + AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], + [ + AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics]) + intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])" + ]) + + AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"], + [AS_IF([test x"$rtcd_support" = x"no"], + [rtcd_support="ARM (DOTPROD Intrinsics)"], + [rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])]) + + ] + ) + + AS_IF([test x"$intrinsics_support" = x""], [intrinsics_support=no], [intrinsics_support="ARM$intrinsics_support"]) @@ -760,6 +815,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ ]) AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"]) +AM_CONDITIONAL([HAVE_ARM_DOTPROD], + [test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"]) AM_CONDITIONAL([HAVE_ARM_NEON_INTR], [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"]) AM_CONDITIONAL([HAVE_ARM_NE10], diff --git a/dnn/arm/arm_dnn_map.c b/dnn/arm/arm_dnn_map.c new file mode 100644 index 00000000..e7ffecaf --- /dev/null +++ b/dnn/arm/arm_dnn_map.c @@ -0,0 +1,54 @@ +/* Copyright (c) 2018-2019 Mozilla + 2023 Amazon */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "arm/armcpu.h" +#include "nnet.h" + +#if defined(OPUS_HAVE_RTCD) + +#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD)) + +void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( + const LinearLayer *linear, + float *out, + const float *in +) = { + compute_linear_c, /* default */ + compute_linear_c, + compute_linear_c, + MAY_HAVE_NEON(compute_linear), /* neon */ + MAY_HAVE_DOTPROD(compute_linear) /* dotprod */ +}; + +#endif + + +#endif diff --git a/dnn/arm/dnn_arm.h b/dnn/arm/dnn_arm.h new file mode 100644 index 00000000..91ca2b53 --- /dev/null +++ b/dnn/arm/dnn_arm.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2011-2019 Mozilla + 2023 Amazon */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef DNN_ARM_H +#define DNN_ARM_H + +#include "cpu_support.h" +#include "opus_types.h" + +void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in); +void compute_linear_neon(const LinearLayer *linear, float *out, const float *in); + + +#if defined(OPUS_ARM_PRESUME_DOTPROD) + +#define OVERRIDE_COMPUTE_LINEAR +#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in)) + +#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD) + +#define OVERRIDE_COMPUTE_LINEAR +#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in)) + +#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) + +extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( + const LinearLayer *linear, + float *out, + const float *in + ); +#define OVERRIDE_COMPUTE_LINEAR +#define compute_linear(linear, out, in, arch) \ + ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in)) + + +#endif + + + +#endif /* DNN_ARM_H */ diff --git a/dnn/arm/nnet_dotprod.c b/dnn/arm/nnet_dotprod.c new file mode 100644 index 00000000..1354ed33 --- /dev/null +++ b/dnn/arm/nnet_dotprod.c @@ -0,0 +1,38 @@ +/* Copyright (c) 2018-2019 Mozilla + 2023 Amazon */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef __ARM_FEATURE_DOTPROD +#error nnet_dotprod.c is being compiled without DOTPROD enabled +#endif + +#define RTCD_ARCH dotprod + +#include "nnet_arch.h" diff --git a/dnn/arm/nnet_neon.c b/dnn/arm/nnet_neon.c new file mode 100644 index 00000000..fb636f85 --- /dev/null +++ b/dnn/arm/nnet_neon.c @@ -0,0 +1,38 @@ +/* Copyright (c) 2018-2019 Mozilla + 2023 Amazon */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if !(defined(__ARM_NEON__) || defined(__ARM_NEON)) +#error nnet_neon.c is being compiled without Neon enabled +#endif + +#define RTCD_ARCH neon + +#include "nnet_arch.h" @@ -191,6 +191,10 @@ void compute_activation_c(float *output, const float *input, int N, int activati void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); +#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +#include "arm/dnn_arm.h" +#endif + #if defined(OPUS_X86_MAY_HAVE_SSE2) #include "x86/dnn_x86.h" #endif diff --git a/lpcnet_headers.mk b/lpcnet_headers.mk index d3aa1516..da610ca1 100644 --- a/lpcnet_headers.mk +++ b/lpcnet_headers.mk @@ -14,7 +14,8 @@ dnn/vec_neon.h \ dnn/pitchdnn.h \ dnn/pitchdnn_data.h \ dnn/x86/dnn_x86.h \ -dnn/nnet_arch.h +dnn/nnet_arch.h \ +dnn/arm/dnn_arm.h DRED_HEAD = \ silk/dred_coding.h \ diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk index ee3d79fd..9b8863ad 100644 --- a/lpcnet_sources.mk +++ b/lpcnet_sources.mk @@ -27,3 +27,7 @@ DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c + +DNN_SOURCES_ARM_RTCD = dnn/arm/arm_dnn_map.c +DNN_SOURCES_DOTPROD = dnn/arm/nnet_dotprod.c +DNN_SOURCES_NEON = dnn/arm/nnet_neon.c |