Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2021-01-04 23:36:22 +0300
committerKenneth Heafield <github@kheafield.com>2021-01-04 23:36:22 +0300
commit65276ad59ab9cd5b2bc623c2411f481f79aa7c5c (patch)
treedee1cdb23b97d60f15d55ba52b933987834ef838
parent1318506945c0dfc0af8f24be15be31323140c40a (diff)
Fix compilation on icc 19.1.0.20191121
-rw-r--r--intgemm/avx512_gemm.h2
-rw-r--r--intgemm/avx512vnni_gemm.h6
-rw-r--r--intgemm/callbacks/implementations.inl47
-rw-r--r--intgemm/multiply.h6
-rw-r--r--test/multiply_test.cc10
5 files changed, 42 insertions, 29 deletions
diff --git a/intgemm/avx512_gemm.h b/intgemm/avx512_gemm.h
index a69b2dc..90f67ee 100644
--- a/intgemm/avx512_gemm.h
+++ b/intgemm/avx512_gemm.h
@@ -391,7 +391,7 @@ struct Kernels8 {
Register pack4567 = Pack0123(sum4, sum5, sum6, sum7);
auto total = PermuteSummer(pack0123, pack4567);
- callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
+ callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
}
}
}
diff --git a/intgemm/avx512vnni_gemm.h b/intgemm/avx512vnni_gemm.h
index 747bdf9..28e8c14 100644
--- a/intgemm/avx512vnni_gemm.h
+++ b/intgemm/avx512vnni_gemm.h
@@ -75,7 +75,7 @@ struct Kernels8 : public AVX512BW::Kernels8 {
Register pack0123 = Pack0123(sum0, sum1, sum2, sum3);
Register pack4567 = Pack0123(sum4, sum5, sum6, sum7);
auto total = PermuteSummer(pack0123, pack4567);
- callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
+ callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
}
}
}
@@ -116,7 +116,7 @@ struct Kernels8 : public AVX512BW::Kernels8 {
Register pack0123 = Pack0123(sum0, sum1, sum2, sum3);
Register pack4567 = Pack0123(sum4, sum5, sum6, sum7);
auto total = PermuteSummer(pack0123, pack4567);
- callback_impl(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
+ callback_impl.Run(total, callbacks::OutputBufferInfo(A_rowidx, B0_colidx, A_rows, B_cols));
}
}
}
@@ -153,7 +153,7 @@ struct Kernels8 : public AVX512BW::Kernels8 {
Register pack0123 = Pack0123(sum0, sum1, sum2, sum3);
Register pack4567 = Pack0123(sum4, sum5, sum6, sum7);
auto total = PermuteSummer(pack0123, pack4567);
- callback_impl(total, callbacks::OutputBufferInfo(0, B0_colidx, 1, B_cols));
+ callback_impl.Run(total, callbacks::OutputBufferInfo(0, B0_colidx, 1, B_cols));
}
}
diff --git a/intgemm/callbacks/implementations.inl b/intgemm/callbacks/implementations.inl
index 47d2aa4..9a8f9e1 100644
--- a/intgemm/callbacks/implementations.inl
+++ b/intgemm/callbacks/implementations.inl
@@ -1,13 +1,13 @@
/* This file is included multiple times, once per architecture. */
#if defined(CALLBACKS_THIS_IS_SSE2)
#define CPU_NAME SSE2
- #define CPU_ATTR INTGEMM_SSE2
+ #define INTGEMM_TARGET INTGEMM_SSE2
#elif defined(CALLBACKS_THIS_IS_AVX2)
#define CPU_NAME AVX2
- #define CPU_ATTR INTGEMM_AVX2
+ #define INTGEMM_TARGET INTGEMM_AVX2
#elif defined(CALLBACKS_THIS_IS_AVX512BW)
#define CPU_NAME AVX512BW
- #define CPU_ATTR INTGEMM_AVX512BW
+ #define INTGEMM_TARGET INTGEMM_AVX512BW
#else
#error "Only SSE2, AVX2 and AVX512BW are supported"
#endif
@@ -22,6 +22,13 @@
#define vd vector_t<CPUType::AVX2, double>
#endif
+/* Intel compiler 19.1.0.166 20191121 fails to link constructors with target attributes */
+#ifdef __INTEL_COMPILER
+#define INTGEMM_TARGET_CONSTRUCTOR
+#else
+#define INTGEMM_TARGET_CONSTRUCTOR INTGEMM_TARGET
+#endif
+
namespace intgemm {
namespace callbacks {
@@ -42,9 +49,9 @@ namespace callbacks {
template <typename... Configs>
class CallbackImpl<CPUType::CPU_NAME, std::tuple<Configs...>> {
public:
- CPU_ATTR CallbackImpl(const std::tuple<Configs...>& configs) : callbacks(init_callbacks(configs, make_sequence<sizeof...(Configs)>())) {}
+ explicit CallbackImpl(const std::tuple<Configs...>& configs) : callbacks(init_callbacks(configs, make_sequence<sizeof...(Configs)>())) {}
- CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) {
+ INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) {
run_callbacks(input, info, callbacks, make_sequence<sizeof...(Configs)>());
}
@@ -60,11 +67,11 @@ private:
#define RUN_CALLBACKS_PIPELINE_IMPL(vtype) \
template <unsigned FirstIndex> \
- CPU_ATTR static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence<FirstIndex>) { \
+ INTGEMM_TARGET static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence<FirstIndex>) { \
std::get<FirstIndex>(tuple)(input, info); \
} \
template <unsigned FirstIndex, unsigned SecondIndex, unsigned... RestIndices> \
- CPU_ATTR static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence<FirstIndex, SecondIndex, RestIndices...>) { \
+ INTGEMM_TARGET static inline void run_callbacks(vtype input, const OutputBufferInfo& info, CallbacksTupleType& tuple, sequence<FirstIndex, SecondIndex, RestIndices...>) { \
auto output = std::get<FirstIndex>(tuple)(input, info); \
run_callbacks(output, info, tuple, sequence<SecondIndex, RestIndices...>()); \
}
@@ -81,8 +88,8 @@ private:
*/
template <> class CallbackImpl<CPUType::CPU_NAME, Dummy> {
public:
- CPU_ATTR CallbackImpl(const Dummy&) {}
- CPU_ATTR void operator()(vi, const OutputBufferInfo&) {}
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Dummy&) {}
+ INTGEMM_TARGET void Run(vi, const OutputBufferInfo&) {}
};
/*
@@ -91,9 +98,9 @@ public:
template <typename Type>
class CallbackImpl<CPUType::CPU_NAME, Write<Type>> {
public:
- CPU_ATTR CallbackImpl(const Write<Type>& config) : config(config) {}
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Write<Type>& config) : config(config) {}
- CPU_ATTR void operator()(vector_t<CPUType::CPU_NAME, Type> input, const OutputBufferInfo& info) {
+ INTGEMM_TARGET void Run(vector_t<CPUType::CPU_NAME, Type> input, const OutputBufferInfo& info) {
kernels::write(input, config.output_addr, info.row_idx * info.cols + info.col_idx);
}
@@ -106,11 +113,11 @@ private:
*/
template <> class CallbackImpl<CPUType::CPU_NAME, Unquantize> {
public:
- CPU_ATTR CallbackImpl(const Unquantize& config) : config(config) {
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const Unquantize& config) : config(config) {
unquant_mult = set1_ps<vf>(config.unquant_mult);
}
- CPU_ATTR vf operator()(vi input, const OutputBufferInfo&) {
+ INTGEMM_TARGET vf Run(vi input, const OutputBufferInfo&) {
return kernels::unquantize(input, unquant_mult);
}
@@ -124,11 +131,11 @@ private:
*/
template <> class CallbackImpl<CPUType::CPU_NAME, UnquantizeAndWrite> {
public:
- CPU_ATTR CallbackImpl(const UnquantizeAndWrite& config) : config(config) {
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const UnquantizeAndWrite& config) : config(config) {
unquant_mult = set1_ps<vf>(config.unquant_mult);
}
- CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) {
+ INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) {
// Workaround gcc 5 internal compiler error that can't read register members in debug.
vf mult_reg;
#if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER)
@@ -150,9 +157,9 @@ private:
*/
template <> class CallbackImpl<CPUType::CPU_NAME, AddBiasAndWrite> {
public:
- CPU_ATTR CallbackImpl(const AddBiasAndWrite& config) : config(config) {}
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const AddBiasAndWrite& config) : config(config) {}
- CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) {
+ INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) {
auto result = kernels::add_bias(input, config.bias_addr, info.col_idx);
kernels::write(result, config.output_addr, info.row_idx * info.cols + info.col_idx);
}
@@ -166,11 +173,11 @@ private:
*/
template <> class CallbackImpl<CPUType::CPU_NAME, UnquantizeAndAddBiasAndWrite> {
public:
- CPU_ATTR CallbackImpl(const UnquantizeAndAddBiasAndWrite& config) : config(config) {
+ explicit INTGEMM_TARGET_CONSTRUCTOR CallbackImpl(const UnquantizeAndAddBiasAndWrite& config) : config(config) {
unquant_mult = set1_ps<vf>(config.unquant_mult);
}
- CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) {
+ INTGEMM_TARGET void Run(vi input, const OutputBufferInfo& info) {
// Workaround gcc 5 internal compiler error that can't read register members in debug.
vf mult_reg;
#if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER)
@@ -191,7 +198,7 @@ private:
}
#undef CPU_NAME
-#undef CPU_ATTR
+#undef INTGEMM_TARGET
#undef vi
#undef vf
#undef vd
diff --git a/intgemm/multiply.h b/intgemm/multiply.h
index 84c0655..8d411f3 100644
--- a/intgemm/multiply.h
+++ b/intgemm/multiply.h
@@ -110,14 +110,14 @@ INTGEMM_PACK0123(INTGEMM_AVX512BW, __m512i)
template <typename Callback>
INTGEMM_SSE2 static inline void RunCallback(Callback& callback_impl, dvector_t<CPUType::SSE2, int> total, Index row_idx, Index col_idx, Index rows, Index cols) {
- callback_impl(total.first, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols));
- callback_impl(total.second, callbacks::OutputBufferInfo(row_idx, col_idx + 4, rows, cols));
+ callback_impl.Run(total.first, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols));
+ callback_impl.Run(total.second, callbacks::OutputBufferInfo(row_idx, col_idx + 4, rows, cols));
}
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
template <typename Callback>
INTGEMM_AVX2 static inline void RunCallback(Callback& callback_impl, vector_t<CPUType::AVX2, int> total, Index row_idx, Index col_idx, Index rows, Index cols) {
- callback_impl(total, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols));
+ callback_impl.Run(total, callbacks::OutputBufferInfo(row_idx, col_idx, rows, cols));
}
#endif
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index 5395d40..186b0f9 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -20,7 +20,10 @@
namespace intgemm {
-INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") {
+#ifndef __INTEL_COMPILER
+INTGEMM_SSE2
+#endif
+TEST_CASE("Transpose 16", "[transpose]") {
if (kCPU < CPUType::SSE2) return;
const unsigned N = 8;
AlignedVector<int16_t> input(N * N);
@@ -38,7 +41,10 @@ INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") {
}
}
-INTGEMM_SSSE3 TEST_CASE("Transpose 8", "[transpose]") {
+#ifndef __INTEL_COMPILER
+INTGEMM_SSSE3
+#endif
+TEST_CASE("Transpose 8", "[transpose]") {
if (kCPU < CPUType::SSSE3) return;
const unsigned N = 16;
AlignedVector<int8_t> input(N * N);