diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2021-06-24 00:49:54 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-24 00:49:54 +0300 |
commit | e4b82c15a368f21903657a2d3fb3259cd0f502c8 (patch) | |
tree | 831a3e08ab24efa58ba608f62f0525e24ae2458b /intgemm/types.h | |
parent | 8abde25b13c3ab210c0dec8e23f4944e3953812d (diff) | |
parent | 6228d016ecc63470d2dbb76bd4ab7b0abe097993 (diff) |
Merge branch 'kpu:master' into master
Diffstat (limited to 'intgemm/types.h')
-rw-r--r-- | intgemm/types.h | 69 |
1 files changed, 39 insertions, 30 deletions
diff --git a/intgemm/types.h b/intgemm/types.h index da0429f..81b38af 100644 --- a/intgemm/types.h +++ b/intgemm/types.h @@ -1,10 +1,26 @@ #pragma once #include <exception> +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 #include <immintrin.h> +#endif +#include <emmintrin.h> -#if defined(_MSC_VER) +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) /* MSVC does not appear to have target attributes but is also fine with just * using intrinsics anywhere. + * + * The Intel compiler has a bug whereby constructors with target attributes do + * not link. Like this program doesn't compile with icpc: + * class Foo { + * public: + * __attribute__ ((target ("avx2"))) Foo() {} + * }; + * int main() { Foo a; } + * + * It appears to be erroneously activating function multiversioning when only + * one version of a constructor with target attributes is defined. Normal + * methods with one target attribute work fine. The Intel compiler also allows + * intrinsics without any target attributes so we just leave them blank. */ #define INTGEMM_SSE2 #define INTGEMM_SSSE3 @@ -14,23 +30,14 @@ #define INTGEMM_AVX512DQ #define INTGEMM_AVX512VNNI #else - /* gcc, clang, and Intel compiler */ + /* gcc and clang take lists of all the flavors */ #define INTGEMM_SSE2 __attribute__ ((target ("sse2"))) #define INTGEMM_SSSE3 __attribute__ ((target ("ssse3"))) #define INTGEMM_AVX2 __attribute__ ((target ("avx2"))) - #if defined(__INTEL_COMPILER) - /* Intel compiler might not have AVX512 flavors but lets you use them anyway */ - #define INTGEMM_AVX512F __attribute__ ((target ("avx512f"))) - #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f"))) - #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f"))) - #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f"))) - #else - /* gcc and clang take lists of all the flavors */ - #define INTGEMM_AVX512F __attribute__ ((target ("avx512f"))) - #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f,avx512bw,avx512dq"))) - #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f,avx512bw,avx512dq"))) - #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f,avx512bw,avx512dq,avx512vnni"))) - #endif + #define INTGEMM_AVX512F __attribute__ ((target ("avx512f"))) + #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f,avx512bw,avx512dq"))) + #define INTGEMM_AVX512DQ __attribute__ ((target ("avx512f,avx512bw,avx512dq"))) + #define INTGEMM_AVX512VNNI __attribute__ ((target ("avx512f,avx512bw,avx512dq,avx512vnni"))) #endif namespace intgemm { @@ -51,11 +58,11 @@ typedef unsigned int Index; // If you want to detect the CPU and dispatch yourself, here's what to use: enum class CPUType { UNSUPPORTED = 0, - SSE2, - SSSE3, - AVX2, - AVX512BW, - AVX512VNNI + SSE2 = 1, + SSSE3 = 2, + AVX2 = 3, + AVX512BW = 4, + AVX512VNNI = 5 }; // Running CPU type. This is defined in intgemm.cc (as the dispatcher). @@ -67,28 +74,30 @@ struct MeanStd { }; #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI -namespace avx512vnni { +namespace AVX512VNNI { typedef __m512i Register; typedef __m512 FRegister; -} // namespace avx512vnni +} // namespace AVX512VNNI #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW -namespace avx512bw { +namespace AVX512BW { typedef __m512i Register; typedef __m512 FRegister; -} // namespace avx512bw +} // namespace AVX512BW #endif -namespace avx2 { +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 +namespace AVX2 { typedef __m256i Register; typedef __m256 FRegister; -} // namespace avx2 -namespace ssse3 { +} // namespace AVX2 +#endif +namespace SSSE3 { typedef __m128i Register; typedef __m128 FRegister; -} // namespace ssse3 -namespace sse2 { +} // namespace SSSE3 +namespace SSE2 { typedef __m128i Register; typedef __m128 FRegister; -} // namespace sse2 +} // namespace SSE2 } // namespace intgemm |