diff options
-rw-r--r-- | dnn/arm/arm_dnn_map.c | 34 | ||||
-rw-r--r-- | dnn/arm/dnn_arm.h | 40 |
2 files changed, 74 insertions, 0 deletions
diff --git a/dnn/arm/arm_dnn_map.c b/dnn/arm/arm_dnn_map.c index e7ffecaf..98a2a312 100644 --- a/dnn/arm/arm_dnn_map.c +++ b/dnn/arm/arm_dnn_map.c @@ -50,5 +50,39 @@ void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( #endif +#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON) + +void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])( + float *output, + const float *input, + int N, + int activation +) = { + compute_activation_c, /* default */ + compute_activation_c, + compute_activation_c, + MAY_HAVE_NEON(compute_activation), /* neon */ + MAY_HAVE_DOTPROD(compute_activation) /* dotprod */ +}; + +void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])( + const Conv2dLayer *conv, + float *out, + float *mem, + const float *in, + int height, + int hstride, + int activation +) = { + compute_conv2d_c, /* default */ + compute_conv2d_c, + compute_conv2d_c, + MAY_HAVE_NEON(compute_conv2d), /* neon */ + MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */ +}; + + +#endif + #endif diff --git a/dnn/arm/dnn_arm.h b/dnn/arm/dnn_arm.h index 91ca2b53..d7ac7452 100644 --- a/dnn/arm/dnn_arm.h +++ b/dnn/arm/dnn_arm.h @@ -34,6 +34,11 @@ void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in); void compute_linear_neon(const LinearLayer *linear, float *out, const float *in); +void compute_activation_neon(float *output, const float *input, int N, int activation); +void compute_activation_dotprod(float *output, const float *input, int N, int activation); + +void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); +void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); #if defined(OPUS_ARM_PRESUME_DOTPROD) @@ -59,6 +64,41 @@ extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( #endif +#if defined(OPUS_ARM_PRESUME_NEON) + +#define OVERRIDE_COMPUTE_ACTIVATION +#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation)) +#define OVERRIDE_COMPUTE_CONV2D +#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation)) + +#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) + +extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])( + float *output, + const float *input, + int N, + int activation + ); +#define OVERRIDE_COMPUTE_ACTIVATION +#define compute_activation(output, input, N, activation, arch) \ + ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation)) + + +extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])( + const Conv2dLayer *conv, + float *out, + float *mem, + const float *in, + int height, + int hstride, + int activation + ); +#define OVERRIDE_COMPUTE_CONV2D +#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \ + ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation)) + + +#endif #endif /* DNN_ARM_H */ |