Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/celt
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-11-23 03:05:52 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-11-25 11:15:51 +0300
commit5c3795b2879108f897d465f36885408d5325b77b (patch)
treef62640891900a80083e7fea65c8a90392e2dcfdf /celt
parent984f35b313d57280e3e1b108ba3418e7e6232e22 (diff)
Adding dotprod instruction to ARM rtcd
Used for DNN matrix multiplies
Diffstat (limited to 'celt')
-rw-r--r--celt/arm/arm_celt_map.c31
-rw-r--r--celt/arm/armcpu.c21
-rw-r--r--celt/arm/armcpu.h13
-rw-r--r--celt/cpu_support.h5
4 files changed, 57 insertions, 13 deletions
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index ca988b66..cbaea495 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
- celt_inner_prod_neon /* NEON */
+ celt_inner_prod_neon,/* NEON */
+ celt_inner_prod_neon /* DOTPROD */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
- dual_inner_prod_neon /* NEON */
+ dual_inner_prod_neon,/* NEON */
+ dual_inner_prod_neon /* DOTPROD */
};
# endif
@@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
- MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */
};
# endif
@@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
- celt_pitch_xcorr_float_neon /* Neon */
+ celt_pitch_xcorr_float_neon, /* Neon */
+ celt_pitch_xcorr_float_neon /* DOTPROD */
};
# endif
# endif /* FIXED_POINT */
@@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
+ xcorr_kernel_neon_fixed /* DOTPROD */
};
#endif
@@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
- opus_fft_alloc_arm_neon /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon, /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
- opus_fft_free_arm_neon /* Neon with NE10 */
+ opus_fft_free_arm_neon, /* Neon with NE10 */
+ opus_fft_free_arm_neon /* DOTPROD with NE10 */
};
# endif /* CUSTOM_MODES */
@@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
- opus_fft_neon /* Neon with NE10 */
+ opus_fft_neon, /* Neon with NE10 */
+ opus_fft_neon /* DOTPROD with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
- opus_ifft_neon /* Neon with NE10 */
+ opus_ifft_neon, /* Neon with NE10 */
+ opus_ifft_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
- clt_mdct_forward_neon /* Neon with NE10 */
+ clt_mdct_forward_neon, /* Neon with NE10 */
+ clt_mdct_forward_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
- clt_mdct_backward_neon /* Neon with NE10 */
+ clt_mdct_backward_neon, /* Neon with NE10 */
+ clt_mdct_backward_neon /* DOTPROD with NE10 */
};
# endif /* HAVE_ARM_NE10 */
diff --git a/celt/arm/armcpu.c b/celt/arm/armcpu.c
index c7d16e6d..6fbfb904 100644
--- a/celt/arm/armcpu.c
+++ b/celt/arm/armcpu.c
@@ -43,6 +43,7 @@
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
+#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@@ -127,6 +128,11 @@ opus_uint32 opus_cpu_capabilities(void)
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+ p = strstr(buf, " asimddp");
+ if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
}
# endif
@@ -144,6 +150,13 @@ opus_uint32 opus_cpu_capabilities(void)
# endif
}
+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+ flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+
fclose(cpuinfo);
}
return flags;
@@ -180,7 +193,13 @@ static int opus_select_arch_impl(void)
}
arch++;
- celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
+ celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ return arch;
+ }
+ arch++;
+
+ celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
return arch;
}
diff --git a/celt/arm/armcpu.h b/celt/arm/armcpu.h
index 820262ff..6d5803d8 100644
--- a/celt/arm/armcpu.h
+++ b/celt/arm/armcpu.h
@@ -46,6 +46,12 @@
# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+# define MAY_HAVE_DOTPROD(name) name ## _dotprod
+# else
+# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
+# endif
+
# if defined(OPUS_ARM_PRESUME_EDSP)
# define PRESUME_EDSP(name) name ## _edsp
# else
@@ -64,6 +70,12 @@
# define PRESUME_NEON(name) PRESUME_MEDIA(name)
# endif
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+# define PRESUME_DOTPROD(name) name ## _dotprod
+# else
+# define PRESUME_DOTPROD(name) PRESUME_NEON(name)
+# endif
+
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
@@ -71,6 +83,7 @@ int opus_select_arch(void);
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
+#define OPUS_ARCH_ARM_DOTPROD (4)
# endif
diff --git a/celt/cpu_support.h b/celt/cpu_support.h
index fdd9fb64..9f13d8ae 100644
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -35,13 +35,14 @@
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
-/* We currently support 4 ARM variants:
+/* We currently support 5 ARM variants:
* arch[0] -> ARMv4
* arch[1] -> ARMv5E
* arch[2] -> ARMv6
* arch[3] -> NEON
+ * arch[4] -> NEON+DOTPROD
*/
-#define OPUS_ARCHMASK 3
+#define OPUS_ARCHMASK 7
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \