Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-11-18 00:10:45 +0300
committerKenneth Heafield <github@kheafield.com>2020-11-18 00:10:45 +0300
commitd85e98ddceb02989551170b993b0f27fc5a109eb (patch)
treecd120c92bf17a279886baa01bc35cb61ab061577
parent8f28282c3bd854922da638024d2659be52e892e9 (diff)
Make AVX2 optional if compiler doesn't support it
Mostly for emscripten
-rw-r--r--CMakeLists.txt11
-rw-r--r--benchmarks/benchmark.cc3
-rw-r--r--benchmarks/benchmark_quantizer.cc2
-rw-r--r--benchmarks/biasmultiply.cc2
-rw-r--r--compile_test_avx2.cc17
-rw-r--r--intgemm/avx2_gemm.h6
-rw-r--r--intgemm/callbacks.h2
-rw-r--r--intgemm/interleave.h7
-rw-r--r--intgemm/intgemm.cc8
-rw-r--r--intgemm/intgemm.h7
-rw-r--r--intgemm/intgemm_config.h.in1
-rw-r--r--intgemm/intrinsics.h3
-rw-r--r--intgemm/kernels.h2
-rw-r--r--intgemm/multiply.h11
-rw-r--r--intgemm/stats.h4
-rw-r--r--test/add127_test.cc21
-rw-r--r--test/kernels/add_bias_test.cc2
-rw-r--r--test/kernels/bitwise_not_test.cc2
-rw-r--r--test/kernels/downcast_test.cc6
-rw-r--r--test/kernels/exp_test.cc2
-rw-r--r--test/kernels/floor_test.cc2
-rw-r--r--test/kernels/multiply_test.cc2
-rw-r--r--test/kernels/quantize_test.cc2
-rw-r--r--test/kernels/relu_test.cc2
-rw-r--r--test/kernels/rescale_test.cc2
-rw-r--r--test/kernels/sigmoid_test.cc2
-rw-r--r--test/kernels/tanh_test.cc2
-rw-r--r--test/kernels/unquantize_test.cc2
-rw-r--r--test/kernels/upcast_test.cc6
-rw-r--r--test/kernels/write_test.cc2
-rw-r--r--test/multiply_test.cc21
-rw-r--r--test/prepare_b_quantized_transposed.cc2
-rw-r--r--test/prepare_b_transposed.cc14
-rw-r--r--test/quantize_test.cc14
34 files changed, 167 insertions, 27 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d1885f5..7c3ccfa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,19 +16,26 @@ else()
add_compile_options(-Wall -Wextra -pedantic -Werror -Wno-unknown-pragmas)
endif()
+# Check if compiler supports AVX2 (this should only catch emscripten)
+try_compile(INTGEMM_COMPILER_SUPPORTS_AVX2
+ ${CMAKE_CURRENT_BINARY_DIR}/compile_tests
+ ${CMAKE_CURRENT_SOURCE_DIR}/compile_test_avx2.cc)
+if(NOT INTGEMM_COMPILER_SUPPORTS_AVX2)
+ message(WARNING "${Orange}Not building AVX2-based multiplication because your compiler is too old.\nFor details rerun cmake with --debug-trycompile then try to build in compile_tests/CMakeFiles/CMakeTmp.${ColourReset}")
+endif()
+
# Check if compiler supports AVX512BW
try_compile(INTGEMM_COMPILER_SUPPORTS_AVX512BW
${CMAKE_CURRENT_BINARY_DIR}/compile_tests
${CMAKE_CURRENT_SOURCE_DIR}/compile_test_avx512bw.cc)
-
if(NOT INTGEMM_COMPILER_SUPPORTS_AVX512BW)
message(WARNING "${Orange}Not building AVX512BW-based multiplication because your compiler is too old.\nFor details rerun cmake with --debug-trycompile then try to build in compile_tests/CMakeFiles/CMakeTmp.${ColourReset}")
endif()
+# Check if the compiler supports AVX512VNNI
try_compile(INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
${CMAKE_CURRENT_BINARY_DIR}/compile_tests
${CMAKE_CURRENT_SOURCE_DIR}/compile_test_avx512vnni.cc)
-#No compiler flags for this test; that's part of the test!
if(NOT INTGEMM_COMPILER_SUPPORTS_AVX512VNNI)
message(WARNING "${Orange}Not building AVX512VNNI-based multiplication because your compiler is too old.\nFor details rerun cmake with --debug-trycompile then try to build in compile_tests/CMakeFiles/CMakeTmp.${ColourReset}")
endif()
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index c6133bf..2dbe483 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -154,6 +154,7 @@ int main(int, char ** argv) {
RunAll<sse2::Kernels16>(matrices, end, stats.sse2_16bit);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
std::cerr << "AVX2 8bit, 100 samples..." << std::endl;
for (int samples = 0; samples < kSamples; ++samples) {
RandomMatrices *end = (samples < 4) ? matrices_end : full_sample;
@@ -165,7 +166,7 @@ int main(int, char ** argv) {
RandomMatrices *end = (samples < 4) ? matrices_end : full_sample;
RunAll<avx2::Kernels16>(matrices, end, stats.avx2_16bit);
}
-
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
std::cerr << "AVX512 8bit, 100 samples..." << std::endl;
for (int samples = 0; samples < kSamples; ++samples) {
diff --git a/benchmarks/benchmark_quantizer.cc b/benchmarks/benchmark_quantizer.cc
index 5f36bd7..86d90dc 100644
--- a/benchmarks/benchmark_quantizer.cc
+++ b/benchmarks/benchmark_quantizer.cc
@@ -64,7 +64,9 @@ int main() {
element = dist(gen);
}
QuantizerBench<intgemm::ssse3::Kernels8>(in.begin(), out.begin(), static_cast<intgemm::Index>(count));
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
QuantizerBench<intgemm::avx2::Kernels8>(in.begin(), out.begin(), static_cast<intgemm::Index>(count));
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
QuantizerBench<intgemm::avx512bw::Kernels8>(in.begin(), out.begin(), static_cast<intgemm::Index>(count));
#endif
diff --git a/benchmarks/biasmultiply.cc b/benchmarks/biasmultiply.cc
index 490bf3b..65deadb 100644
--- a/benchmarks/biasmultiply.cc
+++ b/benchmarks/biasmultiply.cc
@@ -161,6 +161,7 @@ int main(int argc, char ** argv) {
std::cout << repeat << " iterations of Shifted SSSE3 took: " << newTimeSSSE3.count() << " seconds." << std::endl;
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
std::chrono::duration<double> oldAVX2_nobias = testOld_nobias<avx2::Kernels8>(1, 64, 8);
for (int i = 0; i<repeat; i++) {
oldAVX2_nobias += testOld_nobias<avx2::Kernels8>(8, 256, 256);
@@ -196,6 +197,7 @@ int main(int argc, char ** argv) {
}
std::cout << repeat << " iterations of Shifted AVX2 took: " << newTimeAVX2.count() << " seconds." << std::endl;
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
if (kCPU < CPUType::AVX512BW) return 0;
std::chrono::duration<double> oldAVX512_nobias = testOld_nobias<avx512bw::Kernels8>(1, 64, 8);
diff --git a/compile_test_avx2.cc b/compile_test_avx2.cc
new file mode 100644
index 0000000..794983b
--- /dev/null
+++ b/compile_test_avx2.cc
@@ -0,0 +1,17 @@
+// Some compilers don't have AVX512BW support. Test for them.
+#include <immintrin.h>
+
+#if defined(_MSC_VER)
+#define INTGEMM_AVX2
+#else
+#define INTGEMM_AVX2 __attribute__ ((target ("avx2")))
+#endif
+
+INTGEMM_AVX2 int Test() {
+ __m256i value = _mm256_set1_epi32(1);
+ value = _mm256_abs_epi8(value);
+ return *(int*)&value;
+}
+
+int main() {
+}
diff --git a/intgemm/avx2_gemm.h b/intgemm/avx2_gemm.h
index 5e81475..6e01679 100644
--- a/intgemm/avx2_gemm.h
+++ b/intgemm/avx2_gemm.h
@@ -1,5 +1,9 @@
#pragma once
+#include "intgemm/intgemm_config.h"
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
+
#include "interleave.h"
#include "kernels.h"
#include "multiply.h"
@@ -224,3 +228,5 @@ struct Kernels8 {
} // namespace avx2
} // namespace intgemm
+
+#endif
diff --git a/intgemm/callbacks.h b/intgemm/callbacks.h
index 23d3be1..c304466 100644
--- a/intgemm/callbacks.h
+++ b/intgemm/callbacks.h
@@ -14,9 +14,11 @@
#include "callbacks/implementations.inl"
#undef CALLBACKS_THIS_IS_SSE2
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#define CALLBACKS_THIS_IS_AVX2
#include "callbacks/implementations.inl"
#undef CALLBACKS_THIS_IS_AVX2
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#define CALLBACKS_THIS_IS_AVX512BW
diff --git a/intgemm/interleave.h b/intgemm/interleave.h
index 1ec686b..95f05ce 100644
--- a/intgemm/interleave.h
+++ b/intgemm/interleave.h
@@ -26,7 +26,10 @@ INTGEMM_INTERLEAVE_N(target, type, 32) \
INTGEMM_INTERLEAVE_N(target, type, 64)
INTGEMM_INTERLEAVE(INTGEMM_SSE2, __m128i)
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_INTERLEAVE(INTGEMM_AVX2, __m256i)
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
INTGEMM_INTERLEAVE(INTGEMM_AVX512BW, __m512i)
#endif
@@ -42,7 +45,9 @@ target static inline void Swap(Register &a, Register &b) { \
} \
INTGEMM_SWAP(INTGEMM_SSE2, __m128i)
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_SWAP(INTGEMM_AVX2, __m256i)
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_SWAP(INTGEMM_AVX512BW, __m512i)
@@ -95,7 +100,9 @@ target static inline void Transpose16InLane(Register &r0, Register &r1, Register
} \
INTGEMM_TRANSPOSE16(INTGEMM_SSE2, __m128i)
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_TRANSPOSE16(INTGEMM_AVX2, __m256i)
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_TRANSPOSE16(INTGEMM_AVX512BW, __m512i)
diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc
index f859b9a..d45cf60 100644
--- a/intgemm/intgemm.cc
+++ b/intgemm/intgemm.cc
@@ -43,6 +43,12 @@ const char *const Int8Shift::kName = ChooseCPU(avx512vnni::Kernels8::kName, avx5
const CPUType kCPU = ChooseCPU(CPUType::AVX512VNNI, CPUType::AVX512BW, CPUType::AVX2, CPUType::SSSE3, CPUType::SSE2, CPUType::UNSUPPORTED);
+#if !defined(INTGEMM_COMPILER_SUPPORTS_AVX2)
+namespace avx2{
+using sse2::MaxAbsolute;
+using sse2::VectorMeanStd;
+} // namespace avx2
+#endif
#if !defined(INTGEMM_COMPILER_SUPPORTS_AVX512BW)
namespace avx512bw {
using avx2::MaxAbsolute;
@@ -58,8 +64,10 @@ constexpr const char *const Unsupported_16bit::kName;
constexpr const char *const Unsupported_8bit::kName;
constexpr const char *const sse2::Kernels16::kName;
constexpr const char *const ssse3::Kernels8::kName;
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
constexpr const char *const avx2::Kernels8::kName;
constexpr const char *const avx2::Kernels16::kName;
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
constexpr const char *const avx512bw::Kernels8::kName;
constexpr const char *const avx512bw::Kernels16::kName;
diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h
index 8e2da02..b53387e 100644
--- a/intgemm/intgemm.h
+++ b/intgemm/intgemm.h
@@ -134,6 +134,13 @@ typedef Unsupported_8bit Kernels8;
typedef Unsupported_16bit Kernels16;
} // namespace avx512bw
#endif
+#ifndef INTGEMM_COMPILER_SUPPORTS_AVX2
+namespace avx2 {
+typedef Unsupported_8bit Kernels8;
+typedef Unsupported_16bit Kernels16;
+} // namespace avx2
+#endif
+
/* Returns:
* axx512vnni if the CPU supports AVX512VNNI
diff --git a/intgemm/intgemm_config.h.in b/intgemm/intgemm_config.h.in
index 920e9ae..a2c8cbd 100644
--- a/intgemm/intgemm_config.h.in
+++ b/intgemm/intgemm_config.h.in
@@ -1,4 +1,5 @@
#pragma once
+#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX2
#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX512BW
#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
diff --git a/intgemm/intrinsics.h b/intgemm/intrinsics.h
index 480f421..31957bc 100644
--- a/intgemm/intrinsics.h
+++ b/intgemm/intrinsics.h
@@ -215,6 +215,8 @@ INTGEMM_SSE2 static inline __m128i xor_si(__m128i a, __m128i b) {
* AVX2
*
*/
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_AVX2 static inline __m256i abs_epi8(__m256i arg) {
return _mm256_abs_epi8(arg);
}
@@ -390,6 +392,7 @@ INTGEMM_AVX2 static inline __m256i unpackhi_epi64(__m256i a, __m256i b) {
INTGEMM_AVX2 static inline __m256i xor_si(__m256i a, __m256i b) {
return _mm256_xor_si256(a, b);
}
+#endif
/*
*
diff --git a/intgemm/kernels.h b/intgemm/kernels.h
index ee35966..57036f4 100644
--- a/intgemm/kernels.h
+++ b/intgemm/kernels.h
@@ -12,9 +12,11 @@
#include "kernels/implementations.inl"
#undef KERNELS_THIS_IS_SSE2
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#define KERNELS_THIS_IS_AVX2
#include "kernels/implementations.inl"
#undef KERNELS_THIS_IS_AVX2
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#define KERNELS_THIS_IS_AVX512BW
diff --git a/intgemm/multiply.h b/intgemm/multiply.h
index e201e09..84c0655 100644
--- a/intgemm/multiply.h
+++ b/intgemm/multiply.h
@@ -13,6 +13,7 @@ INTGEMM_SSE2 static inline dvector_t<CPUType::SSE2, int> PermuteSummer(__m128i p
return { pack0123, pack4567 };
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_AVX2 static inline __m256i PermuteSummer(__m256i pack0123, __m256i pack4567) {
// This instruction generates 1s 2s 3s 4s 5f 6f 7f 8f
__m256i rev = _mm256_permute2f128_si256(pack0123, pack4567, 0x21);
@@ -20,7 +21,7 @@ INTGEMM_AVX2 static inline __m256i PermuteSummer(__m256i pack0123, __m256i pack4
__m256i blended = _mm256_blend_epi32(pack0123, pack4567, 0xf0);
return _mm256_add_epi32(rev, blended);
}
-
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
@@ -99,7 +100,9 @@ target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Reg
} \
INTGEMM_PACK0123(INTGEMM_SSE2, __m128i)
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_PACK0123(INTGEMM_AVX2, __m256i)
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_PACK0123(INTGEMM_AVX512BW, __m512i)
@@ -111,10 +114,12 @@ INTGEMM_SSE2 static inline void RunCallback(Callback& callback_impl, dvector_t<C
callback_impl(total.second, callbacks::OutputBufferInfo(row_idx, col_idx + 4, rows, cols));
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template <typename Callback>
INTGEMM_AVX2 static inline void RunCallback(Callback& callback_impl, vector_t<CPUType::AVX2, int> total, Index row_idx, Index col_idx, Index rows, Index cols) {
callback_impl(total, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols));
}
+#endif
// 16-bit multiplier for INTGEMM_SSE2, INTGEMM_AVX2, and AVX512.
// C = A * B * unquant_mult
@@ -374,7 +379,7 @@ template <typename Callback> target static void Multiply(const int16_t *A, const
* 256-bit. We had to wait for INTGEMM_AVX2 to get 256-bit versions of vpsignb and
* vpmaddubsw. That's why this code is generic over 128-bit or 256-bit.
*/
-
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_AVX2 inline static void InnerINTGEMM_AVX2(
__m256i a, const __m256i *b,
__m256i &sum0, __m256i &sum1, __m256i &sum2, __m256i &sum3,
@@ -514,7 +519,7 @@ INTGEMM_AVX2 inline static void InnerINTGEMM_AVX2(
sum7 = adds_epi16(sum7, maddubs_epi16(a_positive, sign_epi8(b[7], a)));
#endif
}
-
+#endif
// For INTGEMM_SSSE3 without AVX
INTGEMM_SSSE3 inline static void InnerINTGEMM_SSSE3(
diff --git a/intgemm/stats.h b/intgemm/stats.h
index 6f9eda2..9573c4b 100644
--- a/intgemm/stats.h
+++ b/intgemm/stats.h
@@ -32,12 +32,14 @@ INTGEMM_SSE2 static inline float AddFloat32(__m128 a) {
return *reinterpret_cast<float*>(&a);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
INTGEMM_AVX2 static inline float MaxFloat32(__m256 a) {
return MaxFloat32(max_ps(_mm256_castps256_ps128(a), _mm256_extractf128_ps(a, 1)));
}
INTGEMM_AVX2 static inline float AddFloat32(__m256 a) {
return AddFloat32(add_ps(_mm256_castps256_ps128(a), _mm256_extractf128_ps(a, 1)));
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
// Find the maximum float.
@@ -61,9 +63,11 @@ constexpr int32_t kFloatAbsoluteMask = 0x7fffffff;
#include "stats.inl"
#undef INTGEMM_THIS_IS_SSE2
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#define INTGEMM_THIS_IS_AVX2
#include "stats.inl"
#undef INTGEMM_THIS_IS_AVX2
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
#define INTGEMM_THIS_IS_AVX512DQ
diff --git a/test/add127_test.cc b/test/add127_test.cc
index b7ce49b..723e143 100644
--- a/test/add127_test.cc
+++ b/test/add127_test.cc
@@ -287,21 +287,23 @@ TEST_CASE("PrepareBias SSSE3", "[Add127]") {
TestPrepareBias<ssse3::Kernels8>(512,512);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("PrepareBias AVX2", "[Add127]") {
if (kCPU < CPUType::AVX2) return;
TestPrepareBias<avx2::Kernels8>(256,256);
TestPrepareBias<avx2::Kernels8>(2048,256);
TestPrepareBias<avx2::Kernels8>(512,512);
}
+#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("PrepareBias AVX512F", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepareBias<avx512bw::Kernels8>(256,256);
TestPrepareBias<avx512bw::Kernels8>(2048,256);
TestPrepareBias<avx512bw::Kernels8>(512,512);
- #endif
}
+#endif
//A
TEST_CASE("PrepareA SSSE3", "[Add127]") {
@@ -312,6 +314,7 @@ TEST_CASE("PrepareA SSSE3", "[Add127]") {
TestPrepareA<ssse3::Kernels8>(2048,256);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("PrepareA AVX2", "[Add127]") {
if (kCPU < CPUType::AVX2) return;
TestPrepareA<avx2::Kernels8>(64,64);
@@ -319,16 +322,17 @@ TEST_CASE("PrepareA AVX2", "[Add127]") {
TestPrepareA<avx2::Kernels8>(512,512);
TestPrepareA<avx2::Kernels8>(2048,256);
}
+#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("PrepareA AVX512F", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepareA<avx512bw::Kernels8>(64,64);
TestPrepareA<avx512bw::Kernels8>(256,256);
TestPrepareA<avx512bw::Kernels8>(512,512);
TestPrepareA<avx512bw::Kernels8>(2048,256);
- #endif
}
+#endif
// Multiply
@@ -343,6 +347,7 @@ TEST_CASE ("Multiply SSSE3 8bit Shift with bias", "[Add127]") {
TestMultiplyBiasNew<ssse3::Kernels8>(200, 256, 256, 0.55f, 0.74f, 0.17f, 0.16f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE ("Multiply AVX2 8bit Shift with bias", "[Add127]") {
if (kCPU < CPUType::AVX2) return;
TestMultiplyBiasNew<avx2::Kernels8>(1, 64, 8, 0.11f, 0.11f, 0.06f, 0.05f);
@@ -353,6 +358,8 @@ TEST_CASE ("Multiply AVX2 8bit Shift with bias", "[Add127]") {
TestMultiplyBiasNew<avx2::Kernels8>(248, 256, 256, 0.48f, 0.64f, 0.16f, 0.15f);
TestMultiplyBiasNew<avx2::Kernels8>(200, 256, 256, 0.55f, 0.74f, 0.17f, 0.16f);
}
+#endif
+
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift with bias", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
@@ -391,6 +398,7 @@ TEST_CASE ("Multiply SSSE3 8bit Shift vs nonshift", "[Add127]") {
TestMultiplyShiftNonShift<ssse3::Kernels8>(200, 256, 256, 1, 0.74f, 0.17f, 0.006f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE ("Multiply AVX2 8bit Shift vs nonshift", "[Add127]") {
if (kCPU < CPUType::AVX2) return;
TestMultiplyShiftNonShift<avx2::Kernels8>(1, 64, 8, 0.00001f, 0.11f, 0.06f, 0.00001f);
@@ -401,6 +409,8 @@ TEST_CASE ("Multiply AVX2 8bit Shift vs nonshift", "[Add127]") {
TestMultiplyShiftNonShift<avx2::Kernels8>(248, 256, 256, 0.0001f, 0.64f, 0.16f, 0.0001f);
TestMultiplyShiftNonShift<avx2::Kernels8>(200, 256, 256, 0.0001f, 0.74f, 0.17f, 0.0001f);
}
+#endif
+
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift vs nonshift", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
@@ -439,6 +449,7 @@ TEST_CASE ("Multiply SSSE3 8bit Shift vs Int", "[Add127]") {
TestMultiplyShiftInt<ssse3::Kernels8>(200, 256, 256, 0.0001f, 0.74f, 0.17f, 0.0001f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE ("Multiply AVX2 8bit Shift vs Int", "[Add127]") {
if (kCPU < CPUType::AVX2) return;
TestMultiplyShiftInt<avx2::Kernels8>(1, 64, 8, 0.0001f, 0.11f, 0.06f, 0.0001f);
@@ -449,6 +460,8 @@ TEST_CASE ("Multiply AVX2 8bit Shift vs Int", "[Add127]") {
TestMultiplyShiftInt<avx2::Kernels8>(248, 256, 256, 0.0001f, 0.64f, 0.16f, 0.0001f);
TestMultiplyShiftInt<avx2::Kernels8>(200, 256, 256, 0.0001f, 0.74f, 0.17f, 0.0001f);
}
+#endif
+
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512F 8bit Shift vs Int", "[Add127]") {
if (kCPU < CPUType::AVX512BW) return;
diff --git a/test/kernels/add_bias_test.cc b/test/kernels/add_bias_test.cc
index 492c669..b9e5fd9 100644
--- a/test/kernels/add_bias_test.cc
+++ b/test/kernels/add_bias_test.cc
@@ -37,6 +37,7 @@ KERNEL_TEST_CASE("add_bias/int SSE2") { return kernel_add_bias_test<CPUType::SSE
KERNEL_TEST_CASE("add_bias/float SSE2") { return kernel_add_bias_test<CPUType::SSE2, float>(); }
KERNEL_TEST_CASE("add_bias/double SSE2") { return kernel_add_bias_test<CPUType::SSE2, double>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_add_bias_test<CPUType::AVX2, int8_t>();
template INTGEMM_AVX2 void kernel_add_bias_test<CPUType::AVX2, int16_t>();
template INTGEMM_AVX2 void kernel_add_bias_test<CPUType::AVX2, int>();
@@ -47,6 +48,7 @@ KERNEL_TEST_CASE("add_bias/int16 AVX2") { return kernel_add_bias_test<CPUType::A
KERNEL_TEST_CASE("add_bias/int AVX2") { return kernel_add_bias_test<CPUType::AVX2, int>(); }
KERNEL_TEST_CASE("add_bias/float AVX2") { return kernel_add_bias_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("add_bias/double AVX2") { return kernel_add_bias_test<CPUType::AVX2, double>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_add_bias_test<CPUType::AVX512BW, int8_t>();
diff --git a/test/kernels/bitwise_not_test.cc b/test/kernels/bitwise_not_test.cc
index e908c43..6c28c95 100644
--- a/test/kernels/bitwise_not_test.cc
+++ b/test/kernels/bitwise_not_test.cc
@@ -28,8 +28,10 @@ void kernel_bitwise_not_test() {
template INTGEMM_SSE2 void kernel_bitwise_not_test<CPUType::SSE2>();
KERNEL_TEST_CASE("bitwise_not SSE2") { return kernel_bitwise_not_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_bitwise_not_test<CPUType::AVX2>();
KERNEL_TEST_CASE("bitwise_not AVX2") { return kernel_bitwise_not_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_bitwise_not_test<CPUType::AVX512BW>();
diff --git a/test/kernels/downcast_test.cc b/test/kernels/downcast_test.cc
index 5f9db66..0f2ccd0 100644
--- a/test/kernels/downcast_test.cc
+++ b/test/kernels/downcast_test.cc
@@ -30,8 +30,10 @@ void kernel_downcast32to8_test() {
template INTGEMM_SSE2 void kernel_downcast32to8_test<CPUType::SSE2>();
KERNEL_TEST_CASE("downcast32to8 SSE2") { return kernel_downcast32to8_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_downcast32to8_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast32to8 AVX2") { return kernel_downcast32to8_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast32to8_test<CPUType::AVX512BW>();
@@ -60,8 +62,10 @@ void kernel_downcast32to16_test() {
template INTGEMM_SSE2 void kernel_downcast32to16_test<CPUType::SSE2>();
KERNEL_TEST_CASE("downcast32to16 SSE2") { return kernel_downcast32to16_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_downcast32to16_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast32to16 AVX2") { return kernel_downcast32to16_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast32to16_test<CPUType::AVX512BW>();
@@ -90,8 +94,10 @@ void kernel_downcast16to8_test() {
template INTGEMM_SSE2 void kernel_downcast16to8_test<CPUType::SSE2>();
KERNEL_TEST_CASE("downcast16to8 SSE2") { return kernel_downcast16to8_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_downcast16to8_test<CPUType::AVX2>();
KERNEL_TEST_CASE("downcast16to8 AVX2") { return kernel_downcast16to8_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_downcast16to8_test<CPUType::AVX512BW>();
diff --git a/test/kernels/exp_test.cc b/test/kernels/exp_test.cc
index 838e228..9f535f2 100644
--- a/test/kernels/exp_test.cc
+++ b/test/kernels/exp_test.cc
@@ -25,8 +25,10 @@ void kernel_exp_approx_taylor_test() {
CHECK_EPS(output[i], exp(input[i]), 0.001f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_exp_approx_taylor_test<CPUType::AVX2>();
KERNEL_TEST_CASE("exp_approx_taylor AVX2") { return kernel_exp_approx_taylor_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_exp_approx_taylor_test<CPUType::AVX512BW>();
diff --git a/test/kernels/floor_test.cc b/test/kernels/floor_test.cc
index 2659c3f..9b7a214 100644
--- a/test/kernels/floor_test.cc
+++ b/test/kernels/floor_test.cc
@@ -28,8 +28,10 @@ void kernel_floor_test() {
template INTGEMM_SSE2 void kernel_floor_test<CPUType::SSE2>();
KERNEL_TEST_CASE("floor SSE2") { return kernel_floor_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_floor_test<CPUType::AVX2>();
KERNEL_TEST_CASE("floor AVX2") { return kernel_floor_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_floor_test<CPUType::AVX512BW>();
diff --git a/test/kernels/multiply_test.cc b/test/kernels/multiply_test.cc
index 029e3ac..fc1a51e 100644
--- a/test/kernels/multiply_test.cc
+++ b/test/kernels/multiply_test.cc
@@ -38,6 +38,7 @@ KERNEL_TEST_CASE("multiply/int SSE2") { return kernel_multiply_test<CPUType::SSE
KERNEL_TEST_CASE("multiply/float SSE2") { return kernel_multiply_test<CPUType::SSE2, float>(); }
KERNEL_TEST_CASE("multiply/double SSE2") { return kernel_multiply_test<CPUType::SSE2, double>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_multiply_test<CPUType::AVX2, int8_t>();
template INTGEMM_AVX2 void kernel_multiply_test<CPUType::AVX2, int16_t>();
template INTGEMM_AVX2 void kernel_multiply_test<CPUType::AVX2, int>();
@@ -48,6 +49,7 @@ KERNEL_TEST_CASE("multiply/int16 AVX2") { return kernel_multiply_test<CPUType::A
KERNEL_TEST_CASE("multiply/int AVX2") { return kernel_multiply_test<CPUType::AVX2, int>(); }
KERNEL_TEST_CASE("multiply/float AVX2") { return kernel_multiply_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("multiply/double AVX2") { return kernel_multiply_test<CPUType::AVX2, double>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_multiply_test<CPUType::AVX512BW, int8_t>();
diff --git a/test/kernels/quantize_test.cc b/test/kernels/quantize_test.cc
index ae3c068..93280f7 100644
--- a/test/kernels/quantize_test.cc
+++ b/test/kernels/quantize_test.cc
@@ -28,8 +28,10 @@ void kernel_quantize_test() {
template INTGEMM_SSE2 void kernel_quantize_test<CPUType::SSE2>();
KERNEL_TEST_CASE("quantize SSE2") { return kernel_quantize_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_quantize_test<CPUType::AVX2>();
KERNEL_TEST_CASE("quantize AVX2") { return kernel_quantize_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_quantize_test<CPUType::AVX512BW>();
diff --git a/test/kernels/relu_test.cc b/test/kernels/relu_test.cc
index 6fcef98..8fd30ae 100644
--- a/test/kernels/relu_test.cc
+++ b/test/kernels/relu_test.cc
@@ -36,6 +36,7 @@ KERNEL_TEST_CASE("relu/int SSE2") { return kernel_relu_test<CPUType::SSE2, int>(
KERNEL_TEST_CASE("relu/float SSE2") { return kernel_relu_test<CPUType::SSE2, float>(); }
KERNEL_TEST_CASE("relu/double SSE2") { return kernel_relu_test<CPUType::SSE2, double>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_relu_test<CPUType::AVX2, int8_t>();
template INTGEMM_AVX2 void kernel_relu_test<CPUType::AVX2, int16_t>();
template INTGEMM_AVX2 void kernel_relu_test<CPUType::AVX2, int>();
@@ -46,6 +47,7 @@ KERNEL_TEST_CASE("relu/int16 AVX2") { return kernel_relu_test<CPUType::AVX2, int
KERNEL_TEST_CASE("relu/int AVX2") { return kernel_relu_test<CPUType::AVX2, int>(); }
KERNEL_TEST_CASE("relu/float AVX2") { return kernel_relu_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("relu/double AVX2") { return kernel_relu_test<CPUType::AVX2, double>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_relu_test<CPUType::AVX512BW, int8_t>();
diff --git a/test/kernels/rescale_test.cc b/test/kernels/rescale_test.cc
index 280b513..13937ed 100644
--- a/test/kernels/rescale_test.cc
+++ b/test/kernels/rescale_test.cc
@@ -30,8 +30,10 @@ void kernel_rescale_test() {
template INTGEMM_SSE2 void kernel_rescale_test<CPUType::SSE2>();
KERNEL_TEST_CASE("rescale SSE2") { return kernel_rescale_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_rescale_test<CPUType::AVX2>();
KERNEL_TEST_CASE("rescale AVX2") { return kernel_rescale_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_rescale_test<CPUType::AVX512BW>();
diff --git a/test/kernels/sigmoid_test.cc b/test/kernels/sigmoid_test.cc
index af9dad1..7827593 100644
--- a/test/kernels/sigmoid_test.cc
+++ b/test/kernels/sigmoid_test.cc
@@ -32,8 +32,10 @@ void kernel_sigmoid_test() {
CHECK_EPS(output[i], sigmoid_ref(input[i]), 0.001f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_sigmoid_test<CPUType::AVX2>();
KERNEL_TEST_CASE("sigmoid AVX2") { return kernel_sigmoid_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_sigmoid_test<CPUType::AVX512BW>();
diff --git a/test/kernels/tanh_test.cc b/test/kernels/tanh_test.cc
index e2c36f5..1d00042 100644
--- a/test/kernels/tanh_test.cc
+++ b/test/kernels/tanh_test.cc
@@ -25,8 +25,10 @@ void kernel_tanh_test() {
CHECK_EPS(output[i], tanh(input[i]), 0.001f);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_tanh_test<CPUType::AVX2>();
KERNEL_TEST_CASE("tanh AVX2") { return kernel_tanh_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_tanh_test<CPUType::AVX512BW>();
diff --git a/test/kernels/unquantize_test.cc b/test/kernels/unquantize_test.cc
index ee4bc80..edfafa5 100644
--- a/test/kernels/unquantize_test.cc
+++ b/test/kernels/unquantize_test.cc
@@ -28,8 +28,10 @@ void kernel_unquantize_test() {
template INTGEMM_SSE2 void kernel_unquantize_test<CPUType::SSE2>();
KERNEL_TEST_CASE("unquantize SSE2") { return kernel_unquantize_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_unquantize_test<CPUType::AVX2>();
KERNEL_TEST_CASE("unquantize AVX2") { return kernel_unquantize_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_unquantize_test<CPUType::AVX512BW>();
diff --git a/test/kernels/upcast_test.cc b/test/kernels/upcast_test.cc
index 92be1bd..0733922 100644
--- a/test/kernels/upcast_test.cc
+++ b/test/kernels/upcast_test.cc
@@ -33,8 +33,10 @@ void kernel_upcast8to16_test() {
template INTGEMM_SSE2 void kernel_upcast8to16_test<CPUType::SSE2>();
KERNEL_TEST_CASE("upcast8to16 SSE2") { return kernel_upcast8to16_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_upcast8to16_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast8to16 AVX2") { return kernel_upcast8to16_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast8to16_test<CPUType::AVX512BW>();
@@ -65,8 +67,10 @@ void kernel_upcast16to32_test() {
template INTGEMM_SSE2 void kernel_upcast16to32_test<CPUType::SSE2>();
KERNEL_TEST_CASE("upcast16to32 SSE2") { return kernel_upcast16to32_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_upcast16to32_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast16to32 AVX2") { return kernel_upcast16to32_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast16to32_test<CPUType::AVX512BW>();
@@ -100,8 +104,10 @@ void kernel_upcast8to32_test() {
template INTGEMM_SSE2 void kernel_upcast8to32_test<CPUType::SSE2>();
KERNEL_TEST_CASE("upcast8to32 SSE2") { return kernel_upcast8to32_test<CPUType::SSE2>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_upcast8to32_test<CPUType::AVX2>();
KERNEL_TEST_CASE("upcast8to32 AVX2") { return kernel_upcast8to32_test<CPUType::AVX2>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_upcast8to32_test<CPUType::AVX512BW>();
diff --git a/test/kernels/write_test.cc b/test/kernels/write_test.cc
index c263fca..a136a86 100644
--- a/test/kernels/write_test.cc
+++ b/test/kernels/write_test.cc
@@ -36,6 +36,7 @@ KERNEL_TEST_CASE("write/int SSE2") { return kernel_write_test<CPUType::SSE2, int
KERNEL_TEST_CASE("write/float SSE2") { return kernel_write_test<CPUType::SSE2, float>(); }
KERNEL_TEST_CASE("write/double SSE2") { return kernel_write_test<CPUType::SSE2, double>(); }
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template INTGEMM_AVX2 void kernel_write_test<CPUType::AVX2, int8_t>();
template INTGEMM_AVX2 void kernel_write_test<CPUType::AVX2, int16_t>();
template INTGEMM_AVX2 void kernel_write_test<CPUType::AVX2, int>();
@@ -46,6 +47,7 @@ KERNEL_TEST_CASE("write/int16 AVX2") { return kernel_write_test<CPUType::AVX2, i
KERNEL_TEST_CASE("write/int AVX2") { return kernel_write_test<CPUType::AVX2, int>(); }
KERNEL_TEST_CASE("write/float AVX2") { return kernel_write_test<CPUType::AVX2, float>(); }
KERNEL_TEST_CASE("write/double AVX2") { return kernel_write_test<CPUType::AVX2, double>(); }
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
template INTGEMM_AVX512BW void kernel_write_test<CPUType::AVX512BW, int8_t>();
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index 6c16edd..47201c0 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -82,21 +82,23 @@ template <class Routine> void TestPrepare(Index rows = 32, Index cols = 16) {
PrintMatrix(reference.begin(), rows, cols) << "Routine" << '\n' << PrintMatrix(test.begin(), rows, cols));
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("Prepare AVX512", "[prepare]") {
if (kCPU < CPUType::AVX512BW) return;
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestPrepare<avx512bw::Kernels8>(64, 8);
TestPrepare<avx512bw::Kernels8>(256, 32);
TestPrepare<avx512bw::Kernels16>(64, 8);
TestPrepare<avx512bw::Kernels16>(256, 32);
-#endif
}
+#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("Prepare AVX2", "[prepare]") {
if (kCPU < CPUType::AVX2) return;
TestPrepare<avx2::Kernels8>(64, 32);
TestPrepare<avx2::Kernels16>(64, 32);
}
+#endif
TEST_CASE("Prepare SSSE3", "[prepare]") {
if (kCPU < CPUType::SSSE3) return;
@@ -147,19 +149,21 @@ template <class Routine> void TestSelectColumnsB(Index rows = 64, Index cols = 1
PrintMatrix(ref.begin(), rows, kSelectCols) << PrintMatrix(test.begin(), rows, kSelectCols));
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("SelectColumnsB AVX512", "[select]") {
if (kCPU < CPUType::AVX512BW) return;
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestSelectColumnsB<avx512bw::Kernels8>();
TestSelectColumnsB<avx512bw::Kernels16>(256, 256);
-#endif
}
+#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("SelectColumnsB AVX2", "[select]") {
if (kCPU < CPUType::AVX2) return;
TestSelectColumnsB<avx2::Kernels8>(256, 256);
TestSelectColumnsB<avx2::Kernels16>(256, 256);
}
+#endif
TEST_CASE("SelectColumnsB SSSE3", "[select]") {
if (kCPU < CPUType::SSSE3) return;
@@ -218,17 +222,19 @@ TEST_CASE("MaxAbsolute SSE2", "[max]") {
TestMaxAbsolute<sse2::MaxAbsolute>();
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("MaxAbsolute AVX2", "[max]") {
if (kCPU < CPUType::AVX2) return;
TestMaxAbsolute<avx2::MaxAbsolute>();
}
+#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("MaxAbsolute AVX512BW", "[max]") {
if (kCPU < CPUType::AVX512BW) return;
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TestMaxAbsolute<avx512bw::MaxAbsolute>();
- #endif
}
+#endif
// Based on https://arxiv.org/abs/1705.01991
@@ -396,6 +402,8 @@ TEST_CASE ("Multiply SSSE3 8bit with bias", "[biased_multiply]") {
TestMultiplyBias<ssse3::Kernels8>(200, 256, 256, 1.8f, 1.9f, 0.1f, 0.011f);
}
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE ("Multiply AVX2 8bit", "[multiply]") {
if (kCPU < CPUType::AVX2) return;
TestMultiply<avx2::Kernels8>(8, 256, 256, .1f, 1, 0.1f);
@@ -435,6 +443,7 @@ TEST_CASE ("Multiply AVX2 16bit with bias", "[biased_multiply]") {
TestMultiplyBias<avx2::Kernels16>(248, 256, 256, .1f, 1, 0.01f);
TestMultiplyBias<avx2::Kernels16>(200, 256, 256, .1f, 1, 0.01f);
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE ("Multiply AVX512 8bit", "[multiply]") {
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc
index 1437e0a..e27992a 100644
--- a/test/prepare_b_quantized_transposed.cc
+++ b/test/prepare_b_quantized_transposed.cc
@@ -72,6 +72,7 @@ TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") {
CHECK(TestMany<ssse3::Kernels8>(32, 128));
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
if (kCPU < CPUType::AVX2)
return;
@@ -79,6 +80,7 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
CHECK(TestMany<avx2::Kernels8>(32, 128));
CHECK(TestMany<avx2::Kernels16>(32, 128));
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("PrepareBQuantizedTransposed AVX512", "") {
diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc
index bc35138..1ba88df 100644
--- a/test/prepare_b_transposed.cc
+++ b/test/prepare_b_transposed.cc
@@ -73,6 +73,7 @@ TEST_CASE("PrepareBTransposed SSSE3", "") {
CHECK(TestMany<ssse3::Kernels8>(4, 128, 2.0f));
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("PrepareBTransposed AVX2", "") {
if (kCPU < CPUType::AVX2)
return;
@@ -80,15 +81,16 @@ TEST_CASE("PrepareBTransposed AVX2", "") {
CHECK(TestMany<avx2::Kernels8>(8, 128, 2.0f));
CHECK(TestMany<avx2::Kernels16>(8, 128, 2.0f));
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- TEST_CASE("PrepareBTransposed AVX512", "") {
- if (kCPU < CPUType::AVX512BW)
- return;
+TEST_CASE("PrepareBTransposed AVX512", "") {
+ if (kCPU < CPUType::AVX512BW)
+ return;
- CHECK(TestMany<avx512bw::Kernels8>(16, 128, 2.0f));
- CHECK(TestMany<avx512bw::Kernels16>(16, 128, 2.0f));
- }
+ CHECK(TestMany<avx512bw::Kernels8>(16, 128, 2.0f));
+ CHECK(TestMany<avx512bw::Kernels16>(16, 128, 2.0f));
+}
#endif
}
diff --git a/test/quantize_test.cc b/test/quantize_test.cc
index 550ec66..e9f7980 100644
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -128,17 +128,19 @@ TEST_CASE ("Quantize SSSE3", "[quantize]") {
TestMany<ssse3::Kernels8>(1);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE ("Quantize AVX2", "[quantize]") {
if (kCPU < CPUType::AVX2) return;
TestMany<avx2::Kernels8>(1);
TestMany<avx2::Kernels16>(16);
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- TEST_CASE ("Quantize AVX512", "[quantize]") {
- if (kCPU < CPUType::AVX512BW) return;
- TestMany<avx512bw::Kernels8>(1);
- TestMany<avx512bw::Kernels16>(16);
- }
+TEST_CASE ("Quantize AVX512", "[quantize]") {
+ if (kCPU < CPUType::AVX512BW) return;
+ TestMany<avx512bw::Kernels8>(1);
+ TestMany<avx512bw::Kernels16>(16);
+}
#endif
TEST_CASE("QuantizeStd SSSE3", "[VectorMeanStd]") {
@@ -157,6 +159,7 @@ TEST_CASE("QuantizeStd SSSE3", "[VectorMeanStd]") {
testVectorMeanStd<sse2::VectorMeanStd>(120832, true);
}
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("QuantizeStd AVX2", "[VectorMeanStd]") {
if (kCPU < CPUType::AVX2) return;
testVectorMeanStd<avx2::VectorMeanStd>(64);
@@ -172,6 +175,7 @@ TEST_CASE("QuantizeStd AVX2", "[VectorMeanStd]") {
testVectorMeanStd<avx2::VectorMeanStd>(120832);
testVectorMeanStd<avx2::VectorMeanStd>(120832, true);
}
+#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
TEST_CASE("QuantizeStd AVX512BW", "[VectorMeanStd]") {