diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2022-02-15 01:05:40 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-15 01:05:40 +0300 |
commit | a05a2e51ab524bcee954a39ee72005193f3adf7c (patch) | |
tree | faf4a2d6208b60899e3db1b832d8fbd3f3ce7a31 | |
parent | e4b82c15a368f21903657a2d3fb3259cd0f502c8 (diff) | |
parent | fc3a614351ce6e667197307d97f45db5265c96af (diff) |
Merge branch 'kpu:master' into master
-rw-r--r-- | .github/workflows/ubuntu-no-cpuid-environment.yml | 25 | ||||
-rw-r--r-- | .github/workflows/ubuntu-noexceptions.yml | 25 | ||||
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | compile_test/avx2.cc | 10 | ||||
-rw-r--r-- | compile_test/avx512bw.cc | 13 | ||||
-rw-r--r-- | compile_test/avx512vnni.cc | 14 | ||||
-rw-r--r-- | intgemm/aligned.h | 38 | ||||
-rw-r--r-- | intgemm/intgemm.cc | 46 | ||||
-rw-r--r-- | intgemm/intgemm.h | 46 | ||||
-rw-r--r-- | intgemm/types.h | 21 |
10 files changed, 191 insertions, 52 deletions
diff --git a/.github/workflows/ubuntu-no-cpuid-environment.yml b/.github/workflows/ubuntu-no-cpuid-environment.yml new file mode 100644 index 0000000..dc1862f --- /dev/null +++ b/.github/workflows/ubuntu-no-cpuid-environment.yml @@ -0,0 +1,25 @@ +name: Ubuntu No CPUID Environment Variable + +on: + push: + branches: [master, static] + pull_request: + branches: [master, static] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: cmake + run: | + cmake -E make_directory build + cd build + cmake -DINTGEMM_CPUID_ENVIRONMENT=OFF .. + - name: Compile + working-directory: build + run: cmake --build . -j2 + - name: Test + working-directory: build + run: ctest -j2 diff --git a/.github/workflows/ubuntu-noexceptions.yml b/.github/workflows/ubuntu-noexceptions.yml new file mode 100644 index 0000000..371df05 --- /dev/null +++ b/.github/workflows/ubuntu-noexceptions.yml @@ -0,0 +1,25 @@ +name: Ubuntu no exceptions + +on: + push: + branches: [master, static] + pull_request: + branches: [master, static] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: cmake + run: | + cmake -E make_directory build + cd build + cmake -DCMAKE_CXX_FLAGS=-fno-exceptions .. + - name: Compile + working-directory: build + run: cmake --build . -j2 + - name: Test + working-directory: build + run: ctest -j2 diff --git a/CMakeLists.txt b/CMakeLists.txt index af27542..c9f78fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,11 @@ if (WORMHOLE) target_compile_definitions(intgemm PUBLIC INTGEMM_WORMHOLE) endif() +option(INTGEMM_CPUID_ENVIRONMENT "Allow INTGEMM_CPUID environment variable to downgrade CPU model, which is mainly for testing." ON) +if (INTGEMM_CPUID_ENVIRONMENT) + target_compile_definitions(intgemm PRIVATE INTGEMM_CPUID_ENVIRONMENT) +endif() + if(INTGEMM_DONT_BUILD_TESTS) return() endif() diff --git a/compile_test/avx2.cc b/compile_test/avx2.cc index 8460fc0..9ed534e 100644 --- a/compile_test/avx2.cc +++ b/compile_test/avx2.cc @@ -1,7 +1,15 @@ // Some compilers don't have AVX2 support. Test for them. #include <immintrin.h> -#if defined(_MSC_VER) +// clang-cl bug doesn't include these headers when pretending to be MSVC +// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72 +#if defined(_MSC_VER) && defined(__clang__) +#include <avxintrin.h> +#include <avx2intrin.h> +#include <smmintrin.h> +#endif + +#if defined(_MSC_VER) && !defined(__clang__) #define INTGEMM_AVX2 #else #define INTGEMM_AVX2 __attribute__ ((target ("avx2"))) diff --git a/compile_test/avx512bw.cc b/compile_test/avx512bw.cc index 2cd4c6a..2361f75 100644 --- a/compile_test/avx512bw.cc +++ b/compile_test/avx512bw.cc @@ -1,7 +1,18 @@ // Some compilers don't have AVX512BW support. Test for them. #include <immintrin.h> -#if defined(_MSC_VER) +// clang-cl bug doesn't include these headers when pretending to be MSVC +// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72 +#if defined(_MSC_VER) && defined(__clang__) +#include <avxintrin.h> +#include <avx2intrin.h> +#include <smmintrin.h> +#include <avx512fintrin.h> +#include <avx512dqintrin.h> +#include <avx512bwintrin.h> +#endif + +#if defined(_MSC_VER) && !defined(__clang__) #define INTGEMM_AVX512BW #elif defined(__INTEL_COMPILER) #define INTGEMM_AVX512BW __attribute__ ((target ("avx512f"))) diff --git a/compile_test/avx512vnni.cc b/compile_test/avx512vnni.cc index 1485cde..59035e4 100644 --- a/compile_test/avx512vnni.cc +++ b/compile_test/avx512vnni.cc @@ -1,6 +1,18 @@ #include <immintrin.h> -#if defined(_MSC_VER) +// clang-cl bug doesn't include these headers when pretending to be MSVC +// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72 +#if defined(_MSC_VER) && defined(__clang__) +#include <avxintrin.h> +#include <avx2intrin.h> +#include <smmintrin.h> +#include <avx512fintrin.h> +#include <avx512dqintrin.h> +#include <avx512bwintrin.h> +#include <avx512vnniintrin.h> +#endif + +#if defined(_MSC_VER) && !defined(__clang__) #elif defined(__INTEL_COMPILER) __attribute__ ((target ("avx512f"))) #else diff --git a/intgemm/aligned.h b/intgemm/aligned.h index 6fda369..6b55ff2 100644 --- a/intgemm/aligned.h +++ b/intgemm/aligned.h @@ -2,9 +2,15 @@ #include <cstdlib> #include <new> #ifdef _MSC_VER +// Ensure _HAS_EXCEPTIONS is defined +#include <vcruntime.h> #include <malloc.h> #endif +#if !((defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS)) +#include <cstdlib> +#endif + // Aligned simple vector. namespace intgemm { @@ -17,10 +23,20 @@ template <class T> class AlignedVector { : size_(size) { #ifdef _MSC_VER mem_ = static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment)); - if (!mem_) throw std::bad_alloc(); -#else + if (!mem_) { +# if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS) + throw std::bad_alloc(); +# else + std::abort(); +# endif + } +#else if (posix_memalign(reinterpret_cast<void **>(&mem_), alignment, size * sizeof(T))) { +# if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS) throw std::bad_alloc(); +# else + std::abort(); +# endif } #endif } @@ -31,6 +47,8 @@ template <class T> class AlignedVector { } AlignedVector &operator=(AlignedVector &&from) { + if (this == &from) return *this; + release(); mem_ = from.mem_; size_ = from.size_; from.mem_ = nullptr; @@ -41,13 +59,7 @@ template <class T> class AlignedVector { AlignedVector(const AlignedVector&) = delete; AlignedVector& operator=(const AlignedVector&) = delete; - ~AlignedVector() { -#ifdef _MSC_VER - _aligned_free(mem_); -#else - std::free(mem_); -#endif - } + ~AlignedVector() { release(); } std::size_t size() const { return size_; } @@ -65,6 +77,14 @@ template <class T> class AlignedVector { private: T *mem_; std::size_t size_; + + void release() { +#ifdef _MSC_VER + _aligned_free(mem_); +#else + std::free(mem_); +#endif + } }; } // namespace intgemm diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc index 9b38e08..d6c26b9 100644 --- a/intgemm/intgemm.cc +++ b/intgemm/intgemm.cc @@ -1,10 +1,20 @@ +#if defined(WASM) +// No header for CPUID since it's hard-coded. +#elif defined(__INTEL_COMPILER) +#include <immintrin.h> +#elif defined(_MSC_VER) +#include <intrin.h> +#else +// Assume GCC and clang style. +#include <cpuid.h> +#endif + #include "intgemm.h" #include "stats.h" +#include <stdio.h> #include <stdlib.h> -#include <iostream> - namespace intgemm { namespace { @@ -77,40 +87,58 @@ CPUType RealCPUID() { #endif } +#ifdef INTGEMM_CPUID_ENVIRONMENT CPUType EnvironmentCPUID() { -#if defined(_MSC_VER) +# if defined(_MSC_VER) char env_override[11]; size_t len = 0; if (getenv_s(&len, env_override, sizeof(env_override), "INTGEMM_CPUID")) return CPUType::AVX512VNNI; if (!len) return CPUType::AVX512VNNI; -#else +# else const char *env_override = getenv("INTGEMM_CPUID"); if (!env_override) return CPUType::AVX512VNNI; /* This will be capped to actual ID */ -#endif +# endif if (!strcmp(env_override, "AVX512VNNI")) return CPUType::AVX512VNNI; if (!strcmp(env_override, "AVX512BW")) return CPUType::AVX512BW; if (!strcmp(env_override, "AVX2")) return CPUType::AVX2; if (!strcmp(env_override, "SSSE3")) return CPUType::SSSE3; if (!strcmp(env_override, "SSE2")) return CPUType::SSE2; - std::cerr << "Unrecognized INTGEMM_CPUID " << env_override << std::endl; + fprintf(stderr, "Ignoring unrecognized INTGEMM_CPUID %s\n", env_override); return CPUType::AVX512VNNI; } +#endif } // namespace CPUType GetCPUID() { - static const CPUType kLocalCPU = std::min(RealCPUID(), EnvironmentCPUID()); + static const CPUType kLocalCPU = +#ifdef INTGEMM_CPUID_ENVIRONMENT + std::min(RealCPUID(), EnvironmentCPUID()); +#else + RealCPUID(); +#endif return kLocalCPU; } const CPUType kCPU = GetCPUID(); -float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) { +void UnsupportedCPUError() { +#if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS) throw UnsupportedCPU(); +#else + fprintf(stderr, "intgemm does not support this CPU.\n"); + abort(); +#endif +} + +float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) { + UnsupportedCPUError(); + return 0.0f; } MeanStd Unsupported_VectorMeanStd(const float * /*begin*/, const float * /*end*/, bool /*absolute*/) { - throw UnsupportedCPU(); + UnsupportedCPUError(); + return MeanStd(); } void (*Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize); diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h index 977210d..2528fdb 100644 --- a/intgemm/intgemm.h +++ b/intgemm/intgemm.h @@ -41,7 +41,6 @@ #include <cstdint> -#include "intgemm/intgemm_config.h" #include "types.h" #include "sse2_gemm.h" #include "ssse3_gemm.h" @@ -49,77 +48,68 @@ #include "avx512_gemm.h" #include "avx512vnni_gemm.h" -#if defined(WASM) -// No header for CPUID since it's hard-coded. -#elif defined(__INTEL_COMPILER) -#include <immintrin.h> -#elif defined(_MSC_VER) -#include <intrin.h> -#else -// Assume GCC and clang style. -#include <cpuid.h> -#endif - /* Dispatch to functions based on runtime CPUID. This adds one call-by-variable to each call. */ namespace intgemm { +void UnsupportedCPUError(); + struct Unsupported_16bit { static void Quantize(const float *, int16_t *, float, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareB(const float *, int16_t *, float, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareBQuantizedTransposed(const int16_t *, int16_t *, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareBTransposed(const float *, int16_t *, float, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void SelectColumnsB(const int16_t *, int16_t *, Index, const Index *, const Index *) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } template <typename Callback> static void Multiply(const int16_t *, const int16_t *, Index, Index, Index, Callback) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } constexpr static const char *const kName = "16-bit Unsupported"; }; struct Unsupported_8bit { static void Quantize(const float *, int8_t *, float, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void QuantizeU(const float *, uint8_t *, float, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareA(const float *, int8_t *, float, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareBQuantizedTransposed(const int8_t *, int8_t *, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareBTransposed(const float *, int8_t *, float, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void PrepareB(const float *, int8_t *, float, Index, Index) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } template<class Callback> static void PrepareBias(const int8_t *, Index, Index, Callback) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } static void SelectColumnsB(const int8_t *, int8_t *, Index, const Index *, const Index *) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } template <typename Callback> static void Multiply(const int8_t *, const int8_t *, Index, Index, Index, Callback) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } template<class Callback> static void Multiply8Shift(const uint8_t *, const int8_t *, Index, Index, Index, Callback) { - throw UnsupportedCPU(); + UnsupportedCPUError(); } constexpr static const char *const kName = "8-bit Unsupported"; diff --git a/intgemm/types.h b/intgemm/types.h index 81b38af..44fb4e2 100644 --- a/intgemm/types.h +++ b/intgemm/types.h @@ -1,13 +1,28 @@ #pragma once +#include "intgemm/intgemm_config.h" + #include <exception> #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 #include <immintrin.h> #endif #include <emmintrin.h> -#if defined(_MSC_VER) || defined(__INTEL_COMPILER) -/* MSVC does not appear to have target attributes but is also fine with just - * using intrinsics anywhere. +// clang-cl bug doesn't include these headers when pretending to be MSVC +// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72 +#if defined(_MSC_VER) && defined(__clang__) +#include <avxintrin.h> +#include <avx2intrin.h> +#include <smmintrin.h> +#include <avx512fintrin.h> +#include <avx512dqintrin.h> +#include <avx512bwintrin.h> +#include <avx512vnniintrin.h> +#endif + +#if (defined(_MSC_VER) && !defined(__clang__)) || defined(__INTEL_COMPILER) +/* Real MSVC does not appear to have target attributes but is also fine with + * just using intrinsics anywhere. clang-cl pretending to be MSVC requires + * target attributes, so it's excluded from the above. * * The Intel compiler has a bug whereby constructors with target attributes do * not link. Like this program doesn't compile with icpc: |