Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Bogoychev <nheart@gmail.com>2021-06-24 00:49:54 +0300
committerGitHub <noreply@github.com>2021-06-24 00:49:54 +0300
commite4b82c15a368f21903657a2d3fb3259cd0f502c8 (patch)
tree831a3e08ab24efa58ba608f62f0525e24ae2458b /intgemm/types.h
parent8abde25b13c3ab210c0dec8e23f4944e3953812d (diff)
parent6228d016ecc63470d2dbb76bd4ab7b0abe097993 (diff)
Merge branch 'kpu:master' into master
Diffstat (limited to 'intgemm/types.h')
-rw-r--r--intgemm/types.h69
1 files changed, 39 insertions, 30 deletions
diff --git a/intgemm/types.h b/intgemm/types.h
index da0429f..81b38af 100644
--- a/intgemm/types.h
+++ b/intgemm/types.h
@@ -1,10 +1,26 @@
#pragma once
#include <exception>
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#include <immintrin.h>
+#endif
+#include <emmintrin.h>
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
/* MSVC does not appear to have target attributes but is also fine with just
* using intrinsics anywhere.
+ *
+ * The Intel compiler has a bug whereby constructors with target attributes do
+ * not link. Like this program doesn't compile with icpc:
+ * class Foo {
+ * public:
+ * __attribute__ ((target ("avx2"))) Foo() {}
+ * };
+ * int main() { Foo a; }
+ *
+ * It appears to be erroneously activating function multiversioning when only
+ * one version of a constructor with target attributes is defined. Normal
+ * methods with one target attribute work fine. The Intel compiler also allows
+ * intrinsics without any target attributes so we just leave them blank.
*/
#define INTGEMM_SSE2
#define INTGEMM_SSSE3
@@ -14,23 +30,14 @@
#define INTGEMM_AVX512DQ
#define INTGEMM_AVX512VNNI
#else
- /* gcc, clang, and Intel compiler */
+ /* gcc and clang take lists of all the flavors */
#define INTGEMM_SSE2 __attribute__ ((target ("sse2")))
#define INTGEMM_SSSE3 __attribute__ ((target ("ssse3")))
#define INTGEMM_AVX2 __attribute__ ((target ("avx2")))
- #if defined(__INTEL_COMPILER)
- /* Intel compiler might not have AVX512 flavors but lets you use them anyway */
- #define INTGEMM_AVX512F __attribute__ ((target ("avx512f")))
- #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f")))
- #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f")))
- #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f")))
- #else
- /* gcc and clang take lists of all the flavors */
- #define INTGEMM_AVX512F __attribute__ ((target ("avx512f")))
- #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f,avx512bw,avx512dq")))
- #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f,avx512bw,avx512dq")))
- #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f,avx512bw,avx512dq,avx512vnni")))
- #endif
+ #define INTGEMM_AVX512F __attribute__ ((target ("avx512f")))
+ #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f,avx512bw,avx512dq")))
+ #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f,avx512bw,avx512dq")))
+ #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f,avx512bw,avx512dq,avx512vnni")))
#endif
namespace intgemm {
@@ -51,11 +58,11 @@ typedef unsigned int Index;
// If you want to detect the CPU and dispatch yourself, here's what to use:
enum class CPUType {
UNSUPPORTED = 0,
- SSE2,
- SSSE3,
- AVX2,
- AVX512BW,
- AVX512VNNI
+ SSE2 = 1,
+ SSSE3 = 2,
+ AVX2 = 3,
+ AVX512BW = 4,
+ AVX512VNNI = 5
};
// Running CPU type. This is defined in intgemm.cc (as the dispatcher).
@@ -67,28 +74,30 @@ struct MeanStd {
};
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
-namespace avx512vnni {
+namespace AVX512VNNI {
typedef __m512i Register;
typedef __m512 FRegister;
-} // namespace avx512vnni
+} // namespace AVX512VNNI
#endif
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
-namespace avx512bw {
+namespace AVX512BW {
typedef __m512i Register;
typedef __m512 FRegister;
-} // namespace avx512bw
+} // namespace AVX512BW
#endif
-namespace avx2 {
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
+namespace AVX2 {
typedef __m256i Register;
typedef __m256 FRegister;
-} // namespace avx2
-namespace ssse3 {
+} // namespace AVX2
+#endif
+namespace SSSE3 {
typedef __m128i Register;
typedef __m128 FRegister;
-} // namespace ssse3
-namespace sse2 {
+} // namespace SSSE3
+namespace SSE2 {
typedef __m128i Register;
typedef __m128 FRegister;
-} // namespace sse2
+} // namespace SSE2
} // namespace intgemm