diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-28 00:44:11 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-11-28 00:44:11 +0300 |
commit | 7cc30ec6817dec403ec98f1e0da30dfc7473f413 (patch) | |
tree | 2f6c91f15ed0575f79b1860de010b5ea7311a11a | |
parent | d4506af5a9309dda4f798c70ce38dd95632e9b8d (diff) |
Force vectorization for DNN primitives
Avoids having to write intrinsics for simple loops
-rw-r--r-- | dnn/nnet_arch.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/dnn/nnet_arch.h b/dnn/nnet_arch.h index 3c53e619..b61ebf54 100644 --- a/dnn/nnet_arch.h +++ b/dnn/nnet_arch.h @@ -38,6 +38,13 @@ #define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH) +/* Force vectorization on for DNN code because some of the loops rely on + compiler vectorization rather than explicitly using intrinsics. */ +#ifdef __GNUC__ +#pragma GCC push_options +#pragma GCC optimize("tree-vectorize") +#endif + #define MAX_ACTIVATIONS (4096) @@ -216,4 +223,8 @@ void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, } } +#ifdef __GNUC__ +#pragma GCC pop_options +#endif + #endif |