Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-04-19 20:46:16 +0300
committerKenneth Heafield <github@kheafield.com>2020-04-19 20:46:16 +0300
commitbe4e5b24fb5e4143420033f663c3910729551f77 (patch)
tree41a4819f8341f78bb93ad33f45c7b902a8fe7249
parent76eb8e3bfb2d3538d3b23233aedf0ae709bef31b (diff)
Don't catch clang with the gcc hack, move VNNI to a function
-rw-r--r--tile/dot.inl75
1 files changed, 39 insertions, 36 deletions
diff --git a/tile/dot.inl b/tile/dot.inl
index 886efef..d9f8046 100644
--- a/tile/dot.inl
+++ b/tile/dot.inl
@@ -18,6 +18,32 @@
#define INTGEMM_TARGET INTGEMM_SSE2
#endif
+namespace intgemm {
+namespace INTGEMM_ARCH {
+
+/* When Register is used as a template argument, gcc warns
+ * warning: ignoring attributes on template argument ‘Register’ {aka ‘__vector(8) long long int’} [-Wignored-attributes]
+ * So here is a class that doesn't warn.
+ */
+class RegisterRowMajorAccess {
+ public:
+ typedef Register Content;
+
+ RegisterRowMajorAccess(Content *data, Index cols)
+ : data_(data), cols_(cols) {}
+
+ RegisterRowMajorAccess Add(Index row, Index col) const {
+ return RegisterRowMajorAccess(data_ + row * cols_ + col, cols_);
+ }
+
+ const Content &Front() const { return *data_; }
+ Content &Front() { return *data_; }
+
+ private:
+ Content *data_;
+ Index cols_;
+};
+
/* gcc _mm512_dpbusds_epi32 is slow because it inserts spurious vmovdqa64 instructions.
* Simple test program:
* #include <immintrin.h>
@@ -53,33 +79,18 @@
* I use:
* asm ("vpdpbusds %2, %1, %0" : "+x"(c) : "x"(a), "mx"(b));
* and that works better in the test program.
+ *
+ * clang 9.0.1 deals with this fine.
*/
-
-namespace intgemm {
-namespace INTGEMM_ARCH {
-
-/* When Register is used as a template argument, gcc warns
- * warning: ignoring attributes on template argument ‘Register’ {aka ‘__vector(8) long long int’} [-Wignored-attributes]
- * So here is a class that doesn't warn.
- */
-class RegisterRowMajorAccess {
- public:
- typedef Register Content;
-
- RegisterRowMajorAccess(Content *data, Index cols)
- : data_(data), cols_(cols) {}
-
- RegisterRowMajorAccess Add(Index row, Index col) const {
- return RegisterRowMajorAccess(data_ + row * cols_ + col, cols_);
- }
-
- const Content &Front() const { return *data_; }
- Content &Front() { return *data_; }
-
- private:
- Content *data_;
- Index cols_;
-};
+#ifdef INTGEMM_THIS_IS_AVX512VNNI
+INTGEMM_TARGET static inline void VNNI8(Register &c, Register a, Register b) {
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+ asm ("vpdpbusds %2, %1, %0" : "+x"(c) : "x"(a), "mx"(b));
+#else
+ c = _mm512_dpbusds_epi32(c, a, b);
+#endif
+}
+#endif
// 8-bit integer multiplication unsigned A * signed B.
#if !defined(INTGEMM_THIS_IS_SSE2) // No int8 on SSE2.
@@ -88,11 +99,7 @@ struct Shifted8 {
const Register &a = reinterpret_cast<const Register&>(access.AFront());
const Register &b = reinterpret_cast<const Register&>(access.BFront());
#ifdef INTGEMM_THIS_IS_AVX512VNNI
-#ifdef __GNUC__
- asm ("vpdpbusds %2, %1, %0" : "+x"(access.CFront()) : "x"(a), "mx"(b));
-#else
- access.CFront() = _mm512_dpbusds_epi32(access.CFront(), a, b);
-#endif
+ VNNI8(access.CFront(), a, b);
#else
const Register ones = set1_epi16<Register>(1);
Register mult = maddubs_epi16(a, b);
@@ -130,11 +137,7 @@ struct Signed8 {
// c += |a| * b_signed
#if defined(INTGEMM_THIS_IS_AVX512VNNI)
-#ifdef __GNUC__
- asm ("vpdpbusds %2, %1, %0" : "+x"(access.CFront()) : "x"(a_positive), "mx"(b_signed));
-#else
- access.CFront() = _mm512_dpbusds_epi32(access.CFront(), a_positive, b_signed);
-#endif
+ VNNI8(access.CFront(), a_positive, b_signed);
#else
Register mult = maddubs_epi16(a_positive, b_signed);
access.CFront() = adds_epi16(access.CFront(), mult);