diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-04-19 20:46:16 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-04-19 20:46:16 +0300 |
commit | be4e5b24fb5e4143420033f663c3910729551f77 (patch) | |
tree | 41a4819f8341f78bb93ad33f45c7b902a8fe7249 | |
parent | 76eb8e3bfb2d3538d3b23233aedf0ae709bef31b (diff) |
Don't catch clang with the gcc hack, move VNNI to a function
-rw-r--r-- | tile/dot.inl | 75 |
1 files changed, 39 insertions, 36 deletions
diff --git a/tile/dot.inl b/tile/dot.inl index 886efef..d9f8046 100644 --- a/tile/dot.inl +++ b/tile/dot.inl @@ -18,6 +18,32 @@ #define INTGEMM_TARGET INTGEMM_SSE2 #endif +namespace intgemm { +namespace INTGEMM_ARCH { + +/* When Register is used as a template argument, gcc warns + * warning: ignoring attributes on template argument ‘Register’ {aka ‘__vector(8) long long int’} [-Wignored-attributes] + * So here is a class that doesn't warn. + */ +class RegisterRowMajorAccess { + public: + typedef Register Content; + + RegisterRowMajorAccess(Content *data, Index cols) + : data_(data), cols_(cols) {} + + RegisterRowMajorAccess Add(Index row, Index col) const { + return RegisterRowMajorAccess(data_ + row * cols_ + col, cols_); + } + + const Content &Front() const { return *data_; } + Content &Front() { return *data_; } + + private: + Content *data_; + Index cols_; +}; + /* gcc _mm512_dpbusds_epi32 is slow because it inserts spurious vmovdqa64 instructions. * Simple test program: * #include <immintrin.h> @@ -53,33 +79,18 @@ * I use: * asm ("vpdpbusds %2, %1, %0" : "+x"(c) : "x"(a), "mx"(b)); * and that works better in the test program. + * + * clang 9.0.1 deals with this fine. */ - -namespace intgemm { -namespace INTGEMM_ARCH { - -/* When Register is used as a template argument, gcc warns - * warning: ignoring attributes on template argument ‘Register’ {aka ‘__vector(8) long long int’} [-Wignored-attributes] - * So here is a class that doesn't warn. - */ -class RegisterRowMajorAccess { - public: - typedef Register Content; - - RegisterRowMajorAccess(Content *data, Index cols) - : data_(data), cols_(cols) {} - - RegisterRowMajorAccess Add(Index row, Index col) const { - return RegisterRowMajorAccess(data_ + row * cols_ + col, cols_); - } - - const Content &Front() const { return *data_; } - Content &Front() { return *data_; } - - private: - Content *data_; - Index cols_; -}; +#ifdef INTGEMM_THIS_IS_AVX512VNNI +INTGEMM_TARGET static inline void VNNI8(Register &c, Register a, Register b) { +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) + asm ("vpdpbusds %2, %1, %0" : "+x"(c) : "x"(a), "mx"(b)); +#else + c = _mm512_dpbusds_epi32(c, a, b); +#endif +} +#endif // 8-bit integer multiplication unsigned A * signed B. #if !defined(INTGEMM_THIS_IS_SSE2) // No int8 on SSE2. @@ -88,11 +99,7 @@ struct Shifted8 { const Register &a = reinterpret_cast<const Register&>(access.AFront()); const Register &b = reinterpret_cast<const Register&>(access.BFront()); #ifdef INTGEMM_THIS_IS_AVX512VNNI -#ifdef __GNUC__ - asm ("vpdpbusds %2, %1, %0" : "+x"(access.CFront()) : "x"(a), "mx"(b)); -#else - access.CFront() = _mm512_dpbusds_epi32(access.CFront(), a, b); -#endif + VNNI8(access.CFront(), a, b); #else const Register ones = set1_epi16<Register>(1); Register mult = maddubs_epi16(a, b); @@ -130,11 +137,7 @@ struct Signed8 { // c += |a| * b_signed #if defined(INTGEMM_THIS_IS_AVX512VNNI) -#ifdef __GNUC__ - asm ("vpdpbusds %2, %1, %0" : "+x"(access.CFront()) : "x"(a_positive), "mx"(b_signed)); -#else - access.CFront() = _mm512_dpbusds_epi32(access.CFront(), a_positive, b_signed); -#endif + VNNI8(access.CFront(), a_positive, b_signed); #else Register mult = maddubs_epi16(a_positive, b_signed); access.CFront() = adds_epi16(access.CFront(), mult); |