diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-28 02:08:20 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-28 02:08:20 +0300 |
commit | c143b72c4c321b5907cbe839079efd3b2455ac9a (patch) | |
tree | a87661cc2e7f31292b8de7655375a67632a2c502 | |
parent | ee1bb69f2d7c086df72a01632b0563c92896e8b1 (diff) |
Enabling DNN optimizations for ARMv7
Adds RTCD tables for compute_activation() and compute_conv2d()
-rw-r--r-- | dnn/arm/arm_dnn_map.c | 34 | ||||
-rw-r--r-- | dnn/arm/dnn_arm.h | 40 |
2 files changed, 74 insertions, 0 deletions
diff --git a/dnn/arm/arm_dnn_map.c b/dnn/arm/arm_dnn_map.c index e7ffecaf..98a2a312 100644 --- a/dnn/arm/arm_dnn_map.c +++ b/dnn/arm/arm_dnn_map.c @@ -50,5 +50,39 @@ void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( #endif +#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON) + +void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])( + float *output, + const float *input, + int N, + int activation +) = { + compute_activation_c, /* default */ + compute_activation_c, + compute_activation_c, + MAY_HAVE_NEON(compute_activation), /* neon */ + MAY_HAVE_DOTPROD(compute_activation) /* dotprod */ +}; + +void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])( + const Conv2dLayer *conv, + float *out, + float *mem, + const float *in, + int height, + int hstride, + int activation +) = { + compute_conv2d_c, /* default */ + compute_conv2d_c, + compute_conv2d_c, + MAY_HAVE_NEON(compute_conv2d), /* neon */ + MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */ +}; + + +#endif + #endif diff --git a/dnn/arm/dnn_arm.h b/dnn/arm/dnn_arm.h index 91ca2b53..d7ac7452 100644 --- a/dnn/arm/dnn_arm.h +++ b/dnn/arm/dnn_arm.h @@ -34,6 +34,11 @@ void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in); void compute_linear_neon(const LinearLayer *linear, float *out, const float *in); +void compute_activation_neon(float *output, const float *input, int N, int activation); +void compute_activation_dotprod(float *output, const float *input, int N, int activation); + +void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); +void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); #if defined(OPUS_ARM_PRESUME_DOTPROD) @@ -59,6 +64,41 @@ extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( #endif +#if defined(OPUS_ARM_PRESUME_NEON) + +#define OVERRIDE_COMPUTE_ACTIVATION +#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation)) +#define OVERRIDE_COMPUTE_CONV2D +#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation)) + +#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) + +extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])( + float *output, + const float *input, + int N, + int activation + ); +#define OVERRIDE_COMPUTE_ACTIVATION +#define compute_activation(output, input, N, activation, arch) \ + ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation)) + + +extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])( + const Conv2dLayer *conv, + float *out, + float *mem, + const float *in, + int height, + int hstride, + int activation + ); +#define OVERRIDE_COMPUTE_CONV2D +#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \ + ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation)) + + +#endif #endif /* DNN_ARM_H */ |