From 65276ad59ab9cd5b2bc623c2411f481f79aa7c5c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 4 Jan 2021 20:36:22 +0000 Subject: Fix compilation on icc 19.1.0.20191121 --- intgemm/avx512_gemm.h | 2 +- intgemm/avx512vnni_gemm.h | 6 ++--- intgemm/callbacks/implementations.inl | 47 ++++++++++++++++++++--------------- intgemm/multiply.h | 6 ++--- test/multiply_test.cc | 10 ++++++-- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/intgemm/avx512_gemm.h b/intgemm/avx512_gemm.h index a69b2dc..90f67ee 100644 --- a/intgemm/avx512_gemm.h +++ b/intgemm/avx512_gemm.h @@ -391,7 +391,7 @@ struct Kernels8 { Register pack4567 = Pack0123(sum4, sum5, sum6, sum7); auto total = PermuteSummer(pack0123, pack4567); - callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); + callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); } } } diff --git a/intgemm/avx512vnni_gemm.h b/intgemm/avx512vnni_gemm.h index 747bdf9..28e8c14 100644 --- a/intgemm/avx512vnni_gemm.h +++ b/intgemm/avx512vnni_gemm.h @@ -75,7 +75,7 @@ struct Kernels8 : public AVX512BW::Kernels8 { Register pack0123 = Pack0123(sum0, sum1, sum2, sum3); Register pack4567 = Pack0123(sum4, sum5, sum6, sum7); auto total = PermuteSummer(pack0123, pack4567); - callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); + callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); } } } @@ -116,7 +116,7 @@ struct Kernels8 : public AVX512BW::Kernels8 { Register pack0123 = Pack0123(sum0, sum1, sum2, sum3); Register pack4567 = Pack0123(sum4, sum5, sum6, sum7); auto total = PermuteSummer(pack0123, pack4567); - callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); + callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols)); } } } @@ -153,7 +153,7 @@ struct Kernels8 : public AVX512BW::Kernels8 { Register pack0123 = Pack0123(sum0, sum1, sum2, sum3); Register pack4567 = Pack0123(sum4, sum5, sum6, sum7); auto total = PermuteSummer(pack0123, pack4567); - callback_impl(total, callbacks::OutputBufferInfo(0, B0_colidx, 1, B_cols)); + callback_impl.Run(total, callbacks::OutputBufferInfo(0, B0_colidx, 1, B_cols)); } } diff --git a/intgemm/callbacks/implementations.inl b/intgemm/callbacks/implementations.inl index 47d2aa4..9a8f9e1 100644 --- a/intgemm/callbacks/implementations.inl +++ b/intgemm/callbacks/implementations.inl @@ -1,13 +1,13 @@ /* This file is included multiple times, once per architecture. */ #if defined(CALLBACKS_THIS_IS_SSE2) #define CPU_NAME SSE2 - #define CPU_ATTR INTGEMM_SSE2 + #define INTGEMM_TARGET INTGEMM_SSE2 #elif defined(CALLBACKS_THIS_IS_AVX2) #define CPU_NAME AVX2 - #define CPU_ATTR INTGEMM_AVX2 + #define INTGEMM_TARGET INTGEMM_AVX2 #elif defined(CALLBACKS_THIS_IS_AVX512BW) #define CPU_NAME AVX512BW - #define CPU_ATTR INTGEMM_AVX512BW + #define INTGEMM_TARGET INTGEMM_AVX512BW #else #error "Only SSE2, AVX2 and AVX512BW are supported" #endif @@ -22,6 +22,13 @@ #define vd vector_t #endif +/* Intel compiler 19.1.0.166 20191121 fails to link constructors with target attributes */ +#ifdef __INTEL_COMPILER +#define INTGEMM_TARGET_CONSTRUCTOR +#else +#define INTGEMM_TARGET_CONSTRUCTOR INTGEMM_TARGET +#endif + namespace intgemm { namespace callbacks { @@ -42,9 +49,9 @@ namespace callbacks { template class CallbackImpl> { public: - CPU_ATTR CallbackImpl(const std::tuple& configs) : callbacks(init_callbacks(configs, make_sequence())) {} + explicit CallbackImpl(const std::tuple& configs) : callbacks(init_callbacks(configs, make_sequence())) {} - CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { + INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) { run_callbacks(input, info, callbacks, make_sequence()); } @@ -60,11 +67,11 @@ private: #define RUN_CALLBACKS_PIPELINE_IMPL(vtype) \ template \ - CPU_ATTR static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence) { \ + INTGEMM_TARGET static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence) { \ std::get(tuple)(input, info); \ } \ template \ - CPU_ATTR static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence) { \ + INTGEMM_TARGET static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence) { \ auto output = std::get(tuple)(input, info); \ run_callbacks(output, info, tuple, sequence()); \ } @@ -81,8 +88,8 @@ private: */ template <> class CallbackImpl { public: - CPU_ATTR CallbackImpl(const Dummy&) {} - CPU_ATTR void operator()(vi, const OutputBufferInfo&) {} + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Dummy&) {} + INTGEMM_TARGET void Run(vi, const OutputBufferInfo&) {} }; /* @@ -91,9 +98,9 @@ public: template class CallbackImpl> { public: - CPU_ATTR CallbackImpl(const Write& config) : config(config) {} + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Write& config) : config(config) {} - CPU_ATTR void operator()(vector_t input, const OutputBufferInfo& info) { + INTGEMM_TARGET void Run(vector_t input, const OutputBufferInfo& info) { kernels::write(input, config.output_addr, info.row_idx * info.cols + info.col_idx); } @@ -106,11 +113,11 @@ private: */ template <> class CallbackImpl { public: - CPU_ATTR CallbackImpl(const Unquantize& config) : config(config) { + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Unquantize& config) : config(config) { unquant_mult = set1_ps(config.unquant_mult); } - CPU_ATTR vf operator()(vi input, const OutputBufferInfo&) { + INTGEMM_TARGET vf Run(vi input, const OutputBufferInfo&) { return kernels::unquantize(input, unquant_mult); } @@ -124,11 +131,11 @@ private: */ template <> class CallbackImpl { public: - CPU_ATTR CallbackImpl(const UnquantizeAndWrite& config) : config(config) { + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const UnquantizeAndWrite& config) : config(config) { unquant_mult = set1_ps(config.unquant_mult); } - CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { + INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) { // Workaround gcc 5 internal compiler error that can't read register members in debug. vf mult_reg; #if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER) @@ -150,9 +157,9 @@ private: */ template <> class CallbackImpl { public: - CPU_ATTR CallbackImpl(const AddBiasAndWrite& config) : config(config) {} + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const AddBiasAndWrite& config) : config(config) {} - CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { + INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) { auto result = kernels::add_bias(input, config.bias_addr, info.col_idx); kernels::write(result, config.output_addr, info.row_idx * info.cols + info.col_idx); } @@ -166,11 +173,11 @@ private: */ template <> class CallbackImpl { public: - CPU_ATTR CallbackImpl(const UnquantizeAndAddBiasAndWrite& config) : config(config) { + explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const UnquantizeAndAddBiasAndWrite& config) : config(config) { unquant_mult = set1_ps(config.unquant_mult); } - CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { + INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) { // Workaround gcc 5 internal compiler error that can't read register members in debug. vf mult_reg; #if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER) @@ -191,7 +198,7 @@ private: } #undef CPU_NAME -#undef CPU_ATTR +#undef INTGEMM_TARGET #undef vi #undef vf #undef vd diff --git a/intgemm/multiply.h b/intgemm/multiply.h index 84c0655..8d411f3 100644 --- a/intgemm/multiply.h +++ b/intgemm/multiply.h @@ -110,14 +110,14 @@ INTGEMM_PACK0123(INTGEMM_AVX512BW, __m512i) template INTGEMM_SSE2 static inline void RunCallback(Callback& callback_impl, dvector_t total, Index row_idx, Index col_idx, Index rows, Index cols) { - callback_impl(total.first, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols)); - callback_impl(total.second, callbacks::OutputBufferInfo(row_idx, col_idx + 4, rows, cols)); + callback_impl.Run(total.first, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols)); + callback_impl.Run(total.second, callbacks::OutputBufferInfo(row_idx, col_idx + 4, rows, cols)); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 template INTGEMM_AVX2 static inline void RunCallback(Callback& callback_impl, vector_t total, Index row_idx, Index col_idx, Index rows, Index cols) { - callback_impl(total, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols)); + callback_impl.Run(total, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols)); } #endif diff --git a/test/multiply_test.cc b/test/multiply_test.cc index 5395d40..186b0f9 100644 --- a/test/multiply_test.cc +++ b/test/multiply_test.cc @@ -20,7 +20,10 @@ namespace intgemm { -INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") { +#ifndef __INTEL_COMPILER +INTGEMM_SSE2 +#endif +TEST_CASE("Transpose 16", "[transpose]") { if (kCPU < CPUType::SSE2) return; const unsigned N = 8; AlignedVector input(N * N); @@ -38,7 +41,10 @@ INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") { } } -INTGEMM_SSSE3 TEST_CASE("Transpose 8", "[transpose]") { +#ifndef __INTEL_COMPILER +INTGEMM_SSSE3 +#endif +TEST_CASE("Transpose 8", "[transpose]") { if (kCPU < CPUType::SSSE3) return; const unsigned N = 16; AlignedVector input(N * N); -- cgit v1.2.3