diff options
author | Kenneth Heafield <kheafiel@amazon.com> | 2020-09-16 00:39:10 +0300 |
---|---|---|
committer | Kenneth Heafield <kheafiel@amazon.com> | 2020-09-16 00:39:10 +0300 |
commit | 4172dcc209e6793dd920dec9cf9c9fc81605bd9d (patch) | |
tree | 5c21bf21e2c14f3c93338abc3cf4b070c49994f8 | |
parent | cf2ef83dde1743171ab7baba810ae33e50b56577 (diff) |
Workaround gcc 5.5 bug with asm copies
-rw-r--r-- | intgemm/callbacks/implementations.inl | 18 | ||||
-rw-r--r-- | test/kernels/upcast_test.cc | 4 |
2 files changed, 20 insertions, 2 deletions
diff --git a/intgemm/callbacks/implementations.inl b/intgemm/callbacks/implementations.inl index d2b7d95..47d2aa4 100644 --- a/intgemm/callbacks/implementations.inl +++ b/intgemm/callbacks/implementations.inl @@ -129,7 +129,14 @@ public: } CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { - auto result = kernels::unquantize(input, unquant_mult); + // Workaround gcc 5 internal compiler error that can't read register members in debug. + vf mult_reg; +#if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER) + asm ("vmovdqa %1, %0" : "=x" (mult_reg) : "m" (unquant_mult)); +#else + mult_reg = unquant_mult; +#endif + auto result = kernels::unquantize(input, mult_reg); kernels::write(result, config.output_addr, info.row_idx * info.cols + info.col_idx); } @@ -164,7 +171,14 @@ public: } CPU_ATTR void operator()(vi input, const OutputBufferInfo& info) { - auto result = kernels::unquantize(input, unquant_mult); + // Workaround gcc 5 internal compiler error that can't read register members in debug. + vf mult_reg; +#if !defined(__OPTIMIZE__) && (__GNUC__ == 5) && !defined(__clang__) && !defined(__INTEL_COMPILER) + asm ("vmovdqa %1, %0" : "=x" (mult_reg) : "m" (unquant_mult)); +#else + mult_reg = unquant_mult; +#endif + auto result = kernels::unquantize(input, mult_reg); result = kernels::add_bias(result, config.bias_addr, info.col_idx); kernels::write(result, config.output_addr, info.row_idx * info.cols + info.col_idx); } diff --git a/test/kernels/upcast_test.cc b/test/kernels/upcast_test.cc index 55171eb..92be1bd 100644 --- a/test/kernels/upcast_test.cc +++ b/test/kernels/upcast_test.cc @@ -1,3 +1,5 @@ +// This test triggers an internal compiler error in gcc 5. +#if defined(__OPTIMIZE__) || defined(__clang__) || defined(__INTEL_COMPILER) || !defined(__GNUC__) || (__GNUC__ != 5) #include "../test.h" #include "../../intgemm/aligned.h" #include "../../intgemm/kernels.h" @@ -71,6 +73,7 @@ template INTGEMM_AVX512BW void kernel_upcast16to32_test<CPUType::AVX512BW>(); KERNEL_TEST_CASE("upcast16to32 AVX512BW") { return kernel_upcast16to32_test<CPUType::AVX512BW>(); } #endif + template <CPUType CPUType_> void kernel_upcast8to32_test() { if (kCPU < CPUType_) @@ -106,3 +109,4 @@ KERNEL_TEST_CASE("upcast8to32 AVX512BW") { return kernel_upcast8to32_test<CPUTyp #endif } +#endif |