Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-03-08 22:22:24 +0300
committerKenneth Heafield <github@kheafield.com>2020-03-08 22:22:24 +0300
commit261a5fbcf7558fc3c2ac22b33fe0c2930d440fc3 (patch)
treef912e6a3cec9691e509d3e63eb3672ae6a357d2c
parent1323f245adff16b351b61310625b743b22e90059 (diff)
Change to INTGEMM_COMPILER_SUPPORTS_AVX512BW and update test
-rw-r--r--CMakeLists.txt12
-rw-r--r--avx512_gemm.h2
-rw-r--r--benchmarks/benchmark.cc6
-rw-r--r--benchmarks/benchmark_quantizer.cc2
-rw-r--r--benchmarks/biasmultiply.cc2
-rw-r--r--callbacks.h2
-rw-r--r--compile_test_avx512.cc16
-rw-r--r--compile_test_avx512bw.cc18
-rw-r--r--interleave.h6
-rw-r--r--intgemm.h8
-rw-r--r--intgemm_config.h.in2
-rw-r--r--intrinsics.h2
-rw-r--r--kernels.h2
-rw-r--r--multiply.h4
-rw-r--r--test/add127_test.cc10
-rw-r--r--test/kernels/add_bias_test.cc2
-rw-r--r--test/kernels/bitwise_not_test.cc2
-rw-r--r--test/kernels/downcast_test.cc6
-rw-r--r--test/kernels/exp_test.cc2
-rw-r--r--test/kernels/floor_test.cc2
-rw-r--r--test/kernels/multiply_sat_test.cc2
-rw-r--r--test/kernels/multiply_test.cc2
-rw-r--r--test/kernels/quantize_test.cc2
-rw-r--r--test/kernels/relu_test.cc2
-rw-r--r--test/kernels/rescale_test.cc2
-rw-r--r--test/kernels/sigmoid_test.cc2
-rw-r--r--test/kernels/tanh_test.cc2
-rw-r--r--test/kernels/unquantize_test.cc2
-rw-r--r--test/kernels/upcast_test.cc6
-rw-r--r--test/kernels/write_test.cc2
-rw-r--r--test/multiply_test.cc8
-rw-r--r--test/prepare_b_quantized_transposed.cc2
-rw-r--r--test/prepare_b_transposed.cc2
-rw-r--r--test/quantize_test.cc2
34 files changed, 73 insertions, 73 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f29ccff..bbb9b83 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,15 +16,13 @@ else()
add_compile_options(-Wall -Wextra -pedantic -Werror -Wno-unknown-pragmas)
endif()
-# Check if compiler supports AVX512
-try_compile(INTGEMM_COMPILER_SUPPORTS_AVX512
+# Check if compiler supports AVX512BW
+try_compile(INTGEMM_COMPILER_SUPPORTS_AVX512BW
${CMAKE_CURRENT_BINARY_DIR}/compile_tests
- ${CMAKE_CURRENT_SOURCE_DIR}/compile_test_avx512.cc
- #Hack: pass compiler arguments as definitions because the test code overrides CXX_FLAGS :'(
- COMPILE_DEFINITIONS -mavx512f -mavx512bw -mavx512dq)
+ ${CMAKE_CURRENT_SOURCE_DIR}/compile_test_avx512bw.cc)
-if(NOT INTGEMM_COMPILER_SUPPORTS_AVX512)
- message(WARNING "${Orange}Not building AVX512-based multiplication because your compiler is too old.\nFor details rerun cmake with --debug-trycompile then try to build in compile_tests/CMakeFiles/CMakeTmp.${ColourReset}")
+if(NOT INTGEMM_COMPILER_SUPPORTS_AVX512BW)
+ message(WARNING "${Orange}Not building AVX512BW-based multiplication because your compiler is too old.\nFor details rerun cmake with --debug-trycompile then try to build in compile_tests/CMakeFiles/CMakeTmp.${ColourReset}")
endif()
try_compile(INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
diff --git a/avx512_gemm.h b/avx512_gemm.h
index 267dc6d..cdbfff5 100644
--- a/avx512_gemm.h
+++ b/avx512_gemm.h
@@ -2,7 +2,7 @@
#include "intgemm_config.h"
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#include "interleave.h"
#include "kernels.h"
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index 26b0ac5..6063d5c 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -196,7 +196,7 @@ int main(int, char ** argv) {
RunAll<AVX2_16bit>(matrices, end, stats.avx2_16bit);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
std::cerr << "AVX512 8bit, 100 samples..." << std::endl;
for (int samples = 0; samples < kSamples; ++samples) {
RandomMatrices *end = (samples < 4) ? matrices_end : full_sample;
@@ -225,7 +225,7 @@ int main(int, char ** argv) {
std::cout << "Multiply\t" << matrices[i].A_rows << '\t' << matrices[i].width << '\t' << matrices[i].B_cols << '\t' << "Samples=" << (kOutlierThreshold * stats.sse2_16bit[i].size()) << '\n';
Print<SSSE3_8bit>(stats.ssse3_8bit, i);
Print<AVX2_8bit>(stats.avx2_8bit, i);
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
Print<AVX512_8bit>(stats.avx512_8bit, i);
#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
@@ -233,7 +233,7 @@ int main(int, char ** argv) {
#endif
Print<SSE2_16bit>(stats.sse2_16bit, i);
Print<AVX2_16bit>(stats.avx2_16bit, i);
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
Print<AVX512_16bit>(stats.avx512_16bit, i);
#endif
}
diff --git a/benchmarks/benchmark_quantizer.cc b/benchmarks/benchmark_quantizer.cc
index b9b0782..eb96499 100644
--- a/benchmarks/benchmark_quantizer.cc
+++ b/benchmarks/benchmark_quantizer.cc
@@ -36,7 +36,7 @@ int main() {
}
QuantizerBench<intgemm::SSSE3_8bit>(in.begin(), out.begin(), count);
QuantizerBench<intgemm::AVX2_8bit>(in.begin(), out.begin(), count);
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
QuantizerBench<intgemm::AVX512_8bit>(in.begin(), out.begin(), count);
#endif
}
diff --git a/benchmarks/biasmultiply.cc b/benchmarks/biasmultiply.cc
index 220422a..515536e 100644
--- a/benchmarks/biasmultiply.cc
+++ b/benchmarks/biasmultiply.cc
@@ -197,7 +197,7 @@ int main(int argc, char ** argv) {
}
std::cout << repeat << " iterations of Shifted AVX2 took: " << newTimeAVX2.count() << " seconds." << std::endl;
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
if (kCPU < CPUType::AVX512BW) return 0;
std::chrono::duration<double> oldAVX512_nobias = testOld_nobias<AVX512_8bit>(1, 64, 8);
for (int i = 0; i<repeat; i++) {
diff --git a/callbacks.h b/callbacks.h
index c8a29df..24f9009 100644
--- a/callbacks.h
+++ b/callbacks.h
@@ -18,7 +18,7 @@
#include "callbacks/implementations.inl"
#undef CALLBACKS_THIS_IS_AVX2
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#define CALLBACKS_THIS_IS_AVX512BW
#include "callbacks/implementations.inl"
#undef CALLBACKS_THIS_IS_AVX512BW
diff --git a/compile_test_avx512.cc b/compile_test_avx512.cc
deleted file mode 100644
index f56cc12..0000000
--- a/compile_test_avx512.cc
+++ /dev/null
@@ -1,16 +0,0 @@
-// Some compilers don't have AVX512BW support. Test for them.
-#include <immintrin.h>
-
-#include <iostream>
-
-int main() {
- // AVX512F
- __m512i value = _mm512_set1_epi32(1);
- // AVX512BW
- value = _mm512_maddubs_epi16(value, value);
-
- __m256i value2 = _mm256_set1_epi8(1);
- // AVX512DQ
- value = _mm512_inserti32x8(value, value2, 1);
- return *(int*)&value && __builtin_cpu_supports("avx512f");
-}
diff --git a/compile_test_avx512bw.cc b/compile_test_avx512bw.cc
new file mode 100644
index 0000000..8d07551
--- /dev/null
+++ b/compile_test_avx512bw.cc
@@ -0,0 +1,18 @@
+// Some compilers don't have AVX512BW support. Test for them.
+#include <immintrin.h>
+
+#if defined __INTEL_COMPILER
+#define INTGEMM_AVX512BW __attribute__ ((target ("avx512f")))
+#else
+#define INTGEMM_AVX512BW __attribute__ ((target ("avx512bw")))
+#endif
+
+INTGEMM_AVX512BW int Test() {
+ // AVX512BW
+ __m512i value = _mm512_set1_epi32(1);
+ value = _mm512_maddubs_epi16(value, value);
+ return *(int*)&value;
+}
+
+int main() {
+}
diff --git a/interleave.h b/interleave.h
index 9bd4fdd..231be46 100644
--- a/interleave.h
+++ b/interleave.h
@@ -28,7 +28,7 @@ INTGEMM_INTERLEAVE_N(target, type, 64)
INTGEMM_INTERLEAVE(INTGEMM_SSE2, __m128i)
INTGEMM_INTERLEAVE(INTGEMM_AVX2, __m256i)
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
INTGEMM_INTERLEAVE(INTGEMM_AVX512BW, __m512i)
#endif
@@ -44,7 +44,7 @@ target static inline void Swap(Register &a, Register &b) { \
INTGEMM_SWAP(INTGEMM_SSE2, __m128i)
INTGEMM_SWAP(INTGEMM_AVX2, __m256i)
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_SWAP(INTGEMM_AVX512BW, __m512i)
#endif
@@ -97,7 +97,7 @@ target static inline void Transpose16InLane(Register &r0, Register &r1, Register
INTGEMM_TRANSPOSE16(INTGEMM_SSE2, __m128i)
INTGEMM_TRANSPOSE16(INTGEMM_AVX2, __m256i)
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_TRANSPOSE16(INTGEMM_AVX512BW, __m512i)
#endif
diff --git a/intgemm.h b/intgemm.h
index ba86e53..6d4d95f 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -51,7 +51,7 @@
#include "avx512_gemm.h"
#include "avx512vnni_gemm.h"
-#if defined(__GNUC__) && defined(INTGEMM_COMPILER_SUPPORTS_AVX512)
+#if defined(__GNUC__) && defined(INTGEMM_COMPILER_SUPPORTS_AVX512BW)
#include "cpuid.h"
#endif
@@ -119,7 +119,7 @@ struct Unsupported_8bit {
constexpr static const char *const kName = "8-bit Unsupported";
};
-#ifndef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifndef INTGEMM_COMPILER_SUPPORTS_AVX512BW
// These won't ever be called in this capacity, but it does let the code below compile.
typedef Unsupported_16bit AVX512_16bit;
typedef Unsupported_8bit AVX512_8bit;
@@ -136,7 +136,7 @@ typedef Unsupported_8bit AVX512VNNI_8bit;
#endif
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
// gcc 5.4.0 bizarrely supports avx512bw targets but not __builtin_cpu_supports("avx512bw"). So implement it manually.
inline bool CheckAVX512BW() {
__builtin_cpu_init ();
@@ -185,7 +185,7 @@ template <class T> T ChooseCPU(T
return avx512vnni;
}
#endif
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
if (CheckAVX512BW()) {
return avx512bw;
}
diff --git a/intgemm_config.h.in b/intgemm_config.h.in
index 11be91c..920e9ae 100644
--- a/intgemm_config.h.in
+++ b/intgemm_config.h.in
@@ -1,4 +1,4 @@
#pragma once
-#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX512
+#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX512BW
#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
diff --git a/intrinsics.h b/intrinsics.h
index 5fe3159..d662472 100644
--- a/intrinsics.h
+++ b/intrinsics.h
@@ -381,7 +381,7 @@ INTGEMM_AVX2 static inline __m256i xor_si(__m256i a, __m256i b) {
* AVX512
*
*/
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
INTGEMM_AVX512BW static inline __m512i abs_epi8(__m512i arg) {
return _mm512_abs_epi8(arg);
diff --git a/kernels.h b/kernels.h
index ef63fec..84631b5 100644
--- a/kernels.h
+++ b/kernels.h
@@ -16,7 +16,7 @@
#include "kernels/implementations.inl"
#undef KERNELS_THIS_IS_AVX2
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#define KERNELS_THIS_IS_AVX512BW
#include "kernels/implementations.inl"
#undef KERNELS_THIS_IS_AVX512BW
diff --git a/multiply.h b/multiply.h
index a9766d3..a313c16 100644
--- a/multiply.h
+++ b/multiply.h
@@ -36,7 +36,7 @@ INTGEMM_AVX2 static inline __m256i PermuteSummer(__m256i pack0123, __m256i pack4
return _mm256_add_epi32(rev, blended);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_AVX512BW static inline __m256i PermuteSummer(__m512i pack0123, __m512i pack4567) {
// Form [0th 128-bit register of pack0123, 0st 128-bit register of pack4567, 2nd 128-bit register of pack0123, 2nd 128-bit register of pack4567]
@@ -104,7 +104,7 @@ target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Reg
INTGEMM_PACK0123(INTGEMM_SSE2, __m128i)
INTGEMM_PACK0123(INTGEMM_AVX2, __m256i)
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_PACK0123(INTGEMM_AVX512BW, __m512i)
#endif
diff --git a/test/add127_test.cc b/test/add127_test.cc
index c271a0c..d959b14 100644
--- a/test/add127_test.cc
+++ b/test/add127_test.cc
@@ -295,7 +295,7 @@ TEST_CASE("PrepareBias AVX2", "[Add127]") {
TEST_CASE("PrepareBias AVX512F", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepareBias<AVX512_8bit>(256,256);
TestPrepareBias<AVX512_8bit>(2048,256);
TestPrepareBias<AVX512_8bit>(512,512);
@@ -321,7 +321,7 @@ TEST_CASE("PrepareA AVX2", "[Add127]") {
TEST_CASE("PrepareA AVX512F", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepareA<AVX512_8bit>(64,64);
TestPrepareA<AVX512_8bit>(256,256);
TestPrepareA<AVX512_8bit>(512,512);
@@ -352,7 +352,7 @@ TEST_CASE ("Multiply AVX2 8bit Shift with bias", "[Add127]") {
TestMultiplyBiasNew<AVX2_8bit>(248, 256, 256, 0.48, 0.64, 0.16, 0.15);
TestMultiplyBiasNew<AVX2_8bit>(200, 256, 256, 0.55, 0.74, 0.17, 0.16);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift with bias", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
TestMultiplyBiasNew<AVX512_8bit>(1, 64, 8, 0.0001, 0.05, 0.03, 0.001);
@@ -400,7 +400,7 @@ TEST_CASE ("Multiply AVX2 8bit Shift vs nonshift", "[Add127]") {
TestMultiplyShiftNonShift<AVX2_8bit>(248, 256, 256, 0.0001, 0.64, 0.16, 0.0001);
TestMultiplyShiftNonShift<AVX2_8bit>(200, 256, 256, 0.0001, 0.74, 0.17, 0.0001);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift vs nonshift", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
TestMultiplyShiftNonShift<AVX512_8bit>(1, 64, 8, 0.0001, 0.05, 0.03, 0.001);
@@ -448,7 +448,7 @@ TEST_CASE ("Multiply AVX2 8bit Shift vs Int", "[Add127]") {
TestMultiplyShiftInt<AVX2_8bit>(248, 256, 256, 0.0001f, 0.64, 0.16, 0.0001f);
TestMultiplyShiftInt<AVX2_8bit>(200, 256, 256, 0.0001f, 0.74, 0.17, 0.0001f);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift vs Int", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
TestMultiplyShiftInt<AVX512_8bit>(1, 64, 8, 0.0001f, 0.05, 0.03, 0.0001f);
diff --git a/test/kernels/add_bias_test.cc b/test/kernels/add_bias_test.cc
index 7b10b56..2dd4e3d 100644
--- a/test/kernels/add_bias_test.cc
+++ b/test/kernels/add_bias_test.cc
@@ -48,7 +48,7 @@ KERNEL_TEST_CASE("add_bias/int AVX2") { return kernel_add_bias_test<CPUType::AVX
KERNEL_TEST_CASE("add_bias/float AVX2") { return kernel_add_bias_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("add_bias/double AVX2") { return kernel_add_bias_test<CPUType::AVX2, double>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_add_bias_test<CPUType::AVX512BW, int8_t>();
template INTGEMM_AVX512BW void kernel_add_bias_test<CPUType::AVX512BW, int16_t>();
template INTGEMM_AVX512BW void kernel_add_bias_test<CPUType::AVX512BW, int>();
diff --git a/test/kernels/bitwise_not_test.cc b/test/kernels/bitwise_not_test.cc
index 02b700b..309be7e 100644
--- a/test/kernels/bitwise_not_test.cc
+++ b/test/kernels/bitwise_not_test.cc
@@ -30,7 +30,7 @@ KERNEL_TEST_CASE("bitwise_not SSE2") { return kernel_bitwise_not_test<CPUType::S
template INTGEMM_AVX2 void kernel_bitwise_not_test<CPUType::AVX2>();
KERNEL_TEST_CASE("bitwise_not AVX2") { return kernel_bitwise_not_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_bitwise_not_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("bitwise_not AVX512BW") { return kernel_bitwise_not_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/downcast_test.cc b/test/kernels/downcast_test.cc
index 5ecc084..d3261c7 100644
--- a/test/kernels/downcast_test.cc
+++ b/test/kernels/downcast_test.cc
@@ -32,7 +32,7 @@ KERNEL_TEST_CASE("downcast32to8 SSE2") { return kernel_downcast32to8_test<CPUTyp
template INTGEMM_AVX2 void kernel_downcast32to8_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast32to8 AVX2") { return kernel_downcast32to8_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast32to8_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("downcast32to8 AVX512BW") { return kernel_downcast32to8_test<CPUType::AVX512BW>(); }
#endif
@@ -62,7 +62,7 @@ KERNEL_TEST_CASE("downcast32to16 SSE2") { return kernel_downcast32to16_test<CPUT
template INTGEMM_AVX2 void kernel_downcast32to16_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast32to16 AVX2") { return kernel_downcast32to16_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast32to16_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("downcast32to16 AVX512BW") { return kernel_downcast32to16_test<CPUType::AVX512BW>(); }
#endif
@@ -92,7 +92,7 @@ KERNEL_TEST_CASE("downcast16to8 SSE2") { return kernel_downcast16to8_test<CPUTyp
template INTGEMM_AVX2 void kernel_downcast16to8_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast16to8 AVX2") { return kernel_downcast16to8_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast16to8_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("downcast16to8 AVX512BW") { return kernel_downcast16to8_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/exp_test.cc b/test/kernels/exp_test.cc
index d54f2ca..cf85562 100644
--- a/test/kernels/exp_test.cc
+++ b/test/kernels/exp_test.cc
@@ -27,7 +27,7 @@ void kernel_exp_approx_taylor_test() {
template INTGEMM_AVX2 void kernel_exp_approx_taylor_test<CPUType::AVX2>();
KERNEL_TEST_CASE("exp_approx_taylor AVX2") { return kernel_exp_approx_taylor_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_exp_approx_taylor_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("exp_approx_taylor AVX512BW") { return kernel_exp_approx_taylor_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/floor_test.cc b/test/kernels/floor_test.cc
index 10914a3..365d16d 100644
--- a/test/kernels/floor_test.cc
+++ b/test/kernels/floor_test.cc
@@ -30,7 +30,7 @@ KERNEL_TEST_CASE("floor SSE2") { return kernel_floor_test<CPUType::SSE2>(); }
template INTGEMM_AVX2 void kernel_floor_test<CPUType::AVX2>();
KERNEL_TEST_CASE("floor AVX2") { return kernel_floor_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_floor_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("floor AVX512BW") { return kernel_floor_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/multiply_sat_test.cc b/test/kernels/multiply_sat_test.cc
index 87fc09a..36a9b9f 100644
--- a/test/kernels/multiply_sat_test.cc
+++ b/test/kernels/multiply_sat_test.cc
@@ -41,7 +41,7 @@ template INTGEMM_AVX2 void kernel_multiply_sat_test<CPUType::AVX2, int16_t>();
KERNEL_TEST_CASE("multiply_sat/int8 AVX2") { return kernel_multiply_sat_test<CPUType::AVX2, int8_t>(); }
KERNEL_TEST_CASE("multiply_sat/int16 AVX2") { return kernel_multiply_sat_test<CPUType::AVX2, int16_t>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_multiply_sat_test<CPUType::AVX512BW, int8_t>();
template INTGEMM_AVX512BW void kernel_multiply_sat_test<CPUType::AVX512BW, int16_t>();
KERNEL_TEST_CASE("multiply_sat/int8 AVX512BW") { return kernel_multiply_sat_test<CPUType::AVX512BW, int8_t>(); }
diff --git a/test/kernels/multiply_test.cc b/test/kernels/multiply_test.cc
index 0eea965..30f1640 100644
--- a/test/kernels/multiply_test.cc
+++ b/test/kernels/multiply_test.cc
@@ -48,7 +48,7 @@ KERNEL_TEST_CASE("multiply/int AVX2") { return kernel_multiply_test<CPUType::AVX
KERNEL_TEST_CASE("multiply/float AVX2") { return kernel_multiply_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("multiply/double AVX2") { return kernel_multiply_test<CPUType::AVX2, double>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_multiply_test<CPUType::AVX512BW, int8_t>();
template INTGEMM_AVX512BW void kernel_multiply_test<CPUType::AVX512BW, int16_t>();
template INTGEMM_AVX512BW void kernel_multiply_test<CPUType::AVX512BW, int>();
diff --git a/test/kernels/quantize_test.cc b/test/kernels/quantize_test.cc
index c9eae0a..07bfe21 100644
--- a/test/kernels/quantize_test.cc
+++ b/test/kernels/quantize_test.cc
@@ -31,7 +31,7 @@ KERNEL_TEST_CASE("quantize SSE2") { return kernel_quantize_test<CPUType::SSE2>()
template INTGEMM_AVX2 void kernel_quantize_test<CPUType::AVX2>();
KERNEL_TEST_CASE("quantize AVX2") { return kernel_quantize_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_quantize_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("quantize AVX512BW") { return kernel_quantize_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/relu_test.cc b/test/kernels/relu_test.cc
index 25a212a..c291dea 100644
--- a/test/kernels/relu_test.cc
+++ b/test/kernels/relu_test.cc
@@ -46,7 +46,7 @@ KERNEL_TEST_CASE("relu/int AVX2") { return kernel_relu_test<CPUType::AVX2, int>(
KERNEL_TEST_CASE("relu/float AVX2") { return kernel_relu_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("relu/double AVX2") { return kernel_relu_test<CPUType::AVX2, double>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_relu_test<CPUType::AVX512BW, int8_t>();
template INTGEMM_AVX512BW void kernel_relu_test<CPUType::AVX512BW, int16_t>();
template INTGEMM_AVX512BW void kernel_relu_test<CPUType::AVX512BW, int>();
diff --git a/test/kernels/rescale_test.cc b/test/kernels/rescale_test.cc
index d380c8d..2f79d39 100644
--- a/test/kernels/rescale_test.cc
+++ b/test/kernels/rescale_test.cc
@@ -32,7 +32,7 @@ KERNEL_TEST_CASE("rescale SSE2") { return kernel_rescale_test<CPUType::SSE2>();
template INTGEMM_AVX2 void kernel_rescale_test<CPUType::AVX2>();
KERNEL_TEST_CASE("rescale AVX2") { return kernel_rescale_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_rescale_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("rescale AVX512BW") { return kernel_rescale_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/sigmoid_test.cc b/test/kernels/sigmoid_test.cc
index e4743e2..a8b0b3c 100644
--- a/test/kernels/sigmoid_test.cc
+++ b/test/kernels/sigmoid_test.cc
@@ -34,7 +34,7 @@ void kernel_sigmoid_test() {
template INTGEMM_AVX2 void kernel_sigmoid_test<CPUType::AVX2>();
KERNEL_TEST_CASE("sigmoid AVX2") { return kernel_sigmoid_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_sigmoid_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("sigmoid AVX512BW") { return kernel_sigmoid_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/tanh_test.cc b/test/kernels/tanh_test.cc
index 737ac9b..2d688ea 100644
--- a/test/kernels/tanh_test.cc
+++ b/test/kernels/tanh_test.cc
@@ -27,7 +27,7 @@ void kernel_tanh_test() {
template INTGEMM_AVX2 void kernel_tanh_test<CPUType::AVX2>();
KERNEL_TEST_CASE("tanh AVX2") { return kernel_tanh_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_tanh_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("tanh AVX512BW") { return kernel_tanh_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/unquantize_test.cc b/test/kernels/unquantize_test.cc
index 6f40da6..20c3d6a 100644
--- a/test/kernels/unquantize_test.cc
+++ b/test/kernels/unquantize_test.cc
@@ -31,7 +31,7 @@ KERNEL_TEST_CASE("unquantize SSE2") { return kernel_unquantize_test<CPUType::SSE
template INTGEMM_AVX2 void kernel_unquantize_test<CPUType::AVX2>();
KERNEL_TEST_CASE("unquantize AVX2") { return kernel_unquantize_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_unquantize_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("unquantize AVX512BW") { return kernel_unquantize_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/upcast_test.cc b/test/kernels/upcast_test.cc
index df6a62e..497c734 100644
--- a/test/kernels/upcast_test.cc
+++ b/test/kernels/upcast_test.cc
@@ -33,7 +33,7 @@ KERNEL_TEST_CASE("upcast8to16 SSE2") { return kernel_upcast8to16_test<CPUType::S
template INTGEMM_AVX2 void kernel_upcast8to16_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast8to16 AVX2") { return kernel_upcast8to16_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast8to16_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("upcast8to16 AVX512BW") { return kernel_upcast8to16_test<CPUType::AVX512BW>(); }
#endif
@@ -65,7 +65,7 @@ KERNEL_TEST_CASE("upcast16to32 SSE2") { return kernel_upcast16to32_test<CPUType:
template INTGEMM_AVX2 void kernel_upcast16to32_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast16to32 AVX2") { return kernel_upcast16to32_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast16to32_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("upcast16to32 AVX512BW") { return kernel_upcast16to32_test<CPUType::AVX512BW>(); }
#endif
@@ -99,7 +99,7 @@ KERNEL_TEST_CASE("upcast8to32 SSE2") { return kernel_upcast8to32_test<CPUType::S
template INTGEMM_AVX2 void kernel_upcast8to32_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast8to32 AVX2") { return kernel_upcast8to32_test<CPUType::AVX2>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast8to32_test<CPUType::AVX512BW>();
KERNEL_TEST_CASE("upcast8to32 AVX512BW") { return kernel_upcast8to32_test<CPUType::AVX512BW>(); }
#endif
diff --git a/test/kernels/write_test.cc b/test/kernels/write_test.cc
index aeaafcb..f2e3d35 100644
--- a/test/kernels/write_test.cc
+++ b/test/kernels/write_test.cc
@@ -46,7 +46,7 @@ KERNEL_TEST_CASE("write/int AVX2") { return kernel_write_test<CPUType::AVX2, int
KERNEL_TEST_CASE("write/float AVX2") { return kernel_write_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("write/double AVX2") { return kernel_write_test<CPUType::AVX2, double>(); }
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_write_test<CPUType::AVX512BW, int8_t>();
template INTGEMM_AVX512BW void kernel_write_test<CPUType::AVX512BW, int16_t>();
template INTGEMM_AVX512BW void kernel_write_test<CPUType::AVX512BW, int>();
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index 7316bb0..260dd76 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -82,7 +82,7 @@ template <class Routine> void TestPrepare(Index rows = 32, Index cols = 16) {
TEST_CASE("Prepare AVX512", "[prepare]") {
if (kCPU < CPUType::AVX512BW) return;
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepare<AVX512_8bit>(64, 8);
TestPrepare<AVX512_8bit>(256, 32);
TestPrepare<AVX512_16bit>(64, 8);
@@ -147,7 +147,7 @@ template <class Routine> void TestSelectColumnsB(Index rows = 64, Index cols = 1
TEST_CASE("SelectColumnsB AVX512", "[select]") {
if (kCPU < CPUType::AVX512BW) return;
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestSelectColumnsB<AVX512_8bit>();
TestSelectColumnsB<AVX512_16bit>(256, 256);
#endif
@@ -223,7 +223,7 @@ TEST_CASE("MaxAbsolute AVX2", "[max]") {
TEST_CASE("MaxAbsolute AVX512F", "[max]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestMaxAbsolute<avx512f::MaxAbsolute>();
#endif
}
@@ -434,7 +434,7 @@ TEST_CASE ("Multiply AVX2 16bit with bias", "[biased_multiply]") {
TestMultiplyBias<AVX2_16bit>(200, 256, 256, .1, 1, 0.01);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512 8bit", "[multiply]") {
if (kCPU < CPUType::AVX512BW) return;
TestMultiply<AVX512_8bit>(8, 256, 256, 0, 0.25, 0.062);
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc
index 68c0a29..3a5faaf 100644
--- a/test/prepare_b_quantized_transposed.cc
+++ b/test/prepare_b_quantized_transposed.cc
@@ -80,7 +80,7 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
CHECK(TestMany<AVX2_16bit>(32, 128));
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("PrepareBQuantizedTransposed AVX512", "") {
if (kCPU < CPUType::AVX512BW)
return;
diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc
index 219e56a..1a4ed88 100644
--- a/test/prepare_b_transposed.cc
+++ b/test/prepare_b_transposed.cc
@@ -81,7 +81,7 @@ TEST_CASE("PrepareBTransposed AVX2", "") {
CHECK(TestMany<AVX2_16bit>(8, 128, 2.0f));
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("PrepareBTransposed AVX512", "") {
if (kCPU < CPUType::AVX512BW)
return;
diff --git a/test/quantize_test.cc b/test/quantize_test.cc
index 3263812..ee27261 100644
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -86,7 +86,7 @@ TEST_CASE ("Quantize AVX2", "[quantize]") {
TestMany<AVX2_8bit>(1);
TestMany<AVX2_16bit>(16);
}
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Quantize AVX512", "[quantize]") {
if (kCPU < CPUType::AVX512BW) return;
TestMany<AVX512_8bit>(1);