Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Bogoychev <nheart@gmail.com>2022-02-15 01:05:40 +0300
committerGitHub <noreply@github.com>2022-02-15 01:05:40 +0300
commita05a2e51ab524bcee954a39ee72005193f3adf7c (patch)
treefaf4a2d6208b60899e3db1b832d8fbd3f3ce7a31
parente4b82c15a368f21903657a2d3fb3259cd0f502c8 (diff)
parentfc3a614351ce6e667197307d97f45db5265c96af (diff)
Merge branch 'kpu:master' into master
-rw-r--r--.github/workflows/ubuntu-no-cpuid-environment.yml25
-rw-r--r--.github/workflows/ubuntu-noexceptions.yml25
-rw-r--r--CMakeLists.txt5
-rw-r--r--compile_test/avx2.cc10
-rw-r--r--compile_test/avx512bw.cc13
-rw-r--r--compile_test/avx512vnni.cc14
-rw-r--r--intgemm/aligned.h38
-rw-r--r--intgemm/intgemm.cc46
-rw-r--r--intgemm/intgemm.h46
-rw-r--r--intgemm/types.h21
10 files changed, 191 insertions, 52 deletions
diff --git a/.github/workflows/ubuntu-no-cpuid-environment.yml b/.github/workflows/ubuntu-no-cpuid-environment.yml
new file mode 100644
index 0000000..dc1862f
--- /dev/null
+++ b/.github/workflows/ubuntu-no-cpuid-environment.yml
@@ -0,0 +1,25 @@
+name: Ubuntu No CPUID Environment Variable
+
+on:
+ push:
+ branches: [master, static]
+ pull_request:
+ branches: [master, static]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: cmake
+ run: |
+ cmake -E make_directory build
+ cd build
+ cmake -DINTGEMM_CPUID_ENVIRONMENT=OFF ..
+ - name: Compile
+ working-directory: build
+ run: cmake --build . -j2
+ - name: Test
+ working-directory: build
+ run: ctest -j2
diff --git a/.github/workflows/ubuntu-noexceptions.yml b/.github/workflows/ubuntu-noexceptions.yml
new file mode 100644
index 0000000..371df05
--- /dev/null
+++ b/.github/workflows/ubuntu-noexceptions.yml
@@ -0,0 +1,25 @@
+name: Ubuntu no exceptions
+
+on:
+ push:
+ branches: [master, static]
+ pull_request:
+ branches: [master, static]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: cmake
+ run: |
+ cmake -E make_directory build
+ cd build
+ cmake -DCMAKE_CXX_FLAGS=-fno-exceptions ..
+ - name: Compile
+ working-directory: build
+ run: cmake --build . -j2
+ - name: Test
+ working-directory: build
+ run: ctest -j2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index af27542..c9f78fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,6 +84,11 @@ if (WORMHOLE)
target_compile_definitions(intgemm PUBLIC INTGEMM_WORMHOLE)
endif()
+option(INTGEMM_CPUID_ENVIRONMENT "Allow INTGEMM_CPUID environment variable to downgrade CPU model, which is mainly for testing." ON)
+if (INTGEMM_CPUID_ENVIRONMENT)
+ target_compile_definitions(intgemm PRIVATE INTGEMM_CPUID_ENVIRONMENT)
+endif()
+
if(INTGEMM_DONT_BUILD_TESTS)
return()
endif()
diff --git a/compile_test/avx2.cc b/compile_test/avx2.cc
index 8460fc0..9ed534e 100644
--- a/compile_test/avx2.cc
+++ b/compile_test/avx2.cc
@@ -1,7 +1,15 @@
// Some compilers don't have AVX2 support. Test for them.
#include <immintrin.h>
-#if defined(_MSC_VER)
+// clang-cl bug doesn't include these headers when pretending to be MSVC
+// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72
+#if defined(_MSC_VER) && defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <smmintrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
#define INTGEMM_AVX2
#else
#define INTGEMM_AVX2 __attribute__ ((target ("avx2")))
diff --git a/compile_test/avx512bw.cc b/compile_test/avx512bw.cc
index 2cd4c6a..2361f75 100644
--- a/compile_test/avx512bw.cc
+++ b/compile_test/avx512bw.cc
@@ -1,7 +1,18 @@
// Some compilers don't have AVX512BW support. Test for them.
#include <immintrin.h>
-#if defined(_MSC_VER)
+// clang-cl bug doesn't include these headers when pretending to be MSVC
+// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72
+#if defined(_MSC_VER) && defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <smmintrin.h>
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512bwintrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
#define INTGEMM_AVX512BW
#elif defined(__INTEL_COMPILER)
#define INTGEMM_AVX512BW __attribute__ ((target ("avx512f")))
diff --git a/compile_test/avx512vnni.cc b/compile_test/avx512vnni.cc
index 1485cde..59035e4 100644
--- a/compile_test/avx512vnni.cc
+++ b/compile_test/avx512vnni.cc
@@ -1,6 +1,18 @@
#include <immintrin.h>
-#if defined(_MSC_VER)
+// clang-cl bug doesn't include these headers when pretending to be MSVC
+// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72
+#if defined(_MSC_VER) && defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <smmintrin.h>
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vnniintrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
#elif defined(__INTEL_COMPILER)
__attribute__ ((target ("avx512f")))
#else
diff --git a/intgemm/aligned.h b/intgemm/aligned.h
index 6fda369..6b55ff2 100644
--- a/intgemm/aligned.h
+++ b/intgemm/aligned.h
@@ -2,9 +2,15 @@
#include <cstdlib>
#include <new>
#ifdef _MSC_VER
+// Ensure _HAS_EXCEPTIONS is defined
+#include <vcruntime.h>
#include <malloc.h>
#endif
+#if !((defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS))
+#include <cstdlib>
+#endif
+
// Aligned simple vector.
namespace intgemm {
@@ -17,10 +23,20 @@ template <class T> class AlignedVector {
: size_(size) {
#ifdef _MSC_VER
mem_ = static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment));
- if (!mem_) throw std::bad_alloc();
-#else
+ if (!mem_) {
+# if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS)
+ throw std::bad_alloc();
+# else
+ std::abort();
+# endif
+ }
+#else
if (posix_memalign(reinterpret_cast<void **>(&mem_), alignment, size * sizeof(T))) {
+# if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS)
throw std::bad_alloc();
+# else
+ std::abort();
+# endif
}
#endif
}
@@ -31,6 +47,8 @@ template <class T> class AlignedVector {
}
AlignedVector &operator=(AlignedVector &&from) {
+ if (this == &from) return *this;
+ release();
mem_ = from.mem_;
size_ = from.size_;
from.mem_ = nullptr;
@@ -41,13 +59,7 @@ template <class T> class AlignedVector {
AlignedVector(const AlignedVector&) = delete;
AlignedVector& operator=(const AlignedVector&) = delete;
- ~AlignedVector() {
-#ifdef _MSC_VER
- _aligned_free(mem_);
-#else
- std::free(mem_);
-#endif
- }
+ ~AlignedVector() { release(); }
std::size_t size() const { return size_; }
@@ -65,6 +77,14 @@ template <class T> class AlignedVector {
private:
T *mem_;
std::size_t size_;
+
+ void release() {
+#ifdef _MSC_VER
+ _aligned_free(mem_);
+#else
+ std::free(mem_);
+#endif
+ }
};
} // namespace intgemm
diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc
index 9b38e08..d6c26b9 100644
--- a/intgemm/intgemm.cc
+++ b/intgemm/intgemm.cc
@@ -1,10 +1,20 @@
+#if defined(WASM)
+// No header for CPUID since it's hard-coded.
+#elif defined(__INTEL_COMPILER)
+#include <immintrin.h>
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#else
+// Assume GCC and clang style.
+#include <cpuid.h>
+#endif
+
#include "intgemm.h"
#include "stats.h"
+#include <stdio.h>
#include <stdlib.h>
-#include <iostream>
-
namespace intgemm {
namespace {
@@ -77,40 +87,58 @@ CPUType RealCPUID() {
#endif
}
+#ifdef INTGEMM_CPUID_ENVIRONMENT
CPUType EnvironmentCPUID() {
-#if defined(_MSC_VER)
+# if defined(_MSC_VER)
char env_override[11];
size_t len = 0;
if (getenv_s(&len, env_override, sizeof(env_override), "INTGEMM_CPUID")) return CPUType::AVX512VNNI;
if (!len) return CPUType::AVX512VNNI;
-#else
+# else
const char *env_override = getenv("INTGEMM_CPUID");
if (!env_override) return CPUType::AVX512VNNI; /* This will be capped to actual ID */
-#endif
+# endif
if (!strcmp(env_override, "AVX512VNNI")) return CPUType::AVX512VNNI;
if (!strcmp(env_override, "AVX512BW")) return CPUType::AVX512BW;
if (!strcmp(env_override, "AVX2")) return CPUType::AVX2;
if (!strcmp(env_override, "SSSE3")) return CPUType::SSSE3;
if (!strcmp(env_override, "SSE2")) return CPUType::SSE2;
- std::cerr << "Unrecognized INTGEMM_CPUID " << env_override << std::endl;
+ fprintf(stderr, "Ignoring unrecognized INTGEMM_CPUID %s\n", env_override);
return CPUType::AVX512VNNI;
}
+#endif
} // namespace
CPUType GetCPUID() {
- static const CPUType kLocalCPU = std::min(RealCPUID(), EnvironmentCPUID());
+ static const CPUType kLocalCPU =
+#ifdef INTGEMM_CPUID_ENVIRONMENT
+ std::min(RealCPUID(), EnvironmentCPUID());
+#else
+ RealCPUID();
+#endif
return kLocalCPU;
}
const CPUType kCPU = GetCPUID();
-float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) {
+void UnsupportedCPUError() {
+#if (defined(_MSC_VER) && !defined(__clang__)) ? (_HAS_EXCEPTIONS) : (__EXCEPTIONS)
throw UnsupportedCPU();
+#else
+ fprintf(stderr, "intgemm does not support this CPU.\n");
+ abort();
+#endif
+}
+
+float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) {
+ UnsupportedCPUError();
+ return 0.0f;
}
MeanStd Unsupported_VectorMeanStd(const float * /*begin*/, const float * /*end*/, bool /*absolute*/) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
+ return MeanStd();
}
void (*Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize);
diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h
index 977210d..2528fdb 100644
--- a/intgemm/intgemm.h
+++ b/intgemm/intgemm.h
@@ -41,7 +41,6 @@
#include <cstdint>
-#include "intgemm/intgemm_config.h"
#include "types.h"
#include "sse2_gemm.h"
#include "ssse3_gemm.h"
@@ -49,77 +48,68 @@
#include "avx512_gemm.h"
#include "avx512vnni_gemm.h"
-#if defined(WASM)
-// No header for CPUID since it's hard-coded.
-#elif defined(__INTEL_COMPILER)
-#include <immintrin.h>
-#elif defined(_MSC_VER)
-#include <intrin.h>
-#else
-// Assume GCC and clang style.
-#include <cpuid.h>
-#endif
-
/* Dispatch to functions based on runtime CPUID. This adds one call-by-variable to each call. */
namespace intgemm {
+void UnsupportedCPUError();
+
struct Unsupported_16bit {
static void Quantize(const float *, int16_t *, float, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareB(const float *, int16_t *, float, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareBQuantizedTransposed(const int16_t *, int16_t *, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareBTransposed(const float *, int16_t *, float, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void SelectColumnsB(const int16_t *, int16_t *, Index, const Index *, const Index *) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
template <typename Callback>
static void Multiply(const int16_t *, const int16_t *, Index, Index, Index, Callback) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
constexpr static const char *const kName = "16-bit Unsupported";
};
struct Unsupported_8bit {
static void Quantize(const float *, int8_t *, float, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void QuantizeU(const float *, uint8_t *, float, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareA(const float *, int8_t *, float, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareBQuantizedTransposed(const int8_t *, int8_t *, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareBTransposed(const float *, int8_t *, float, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void PrepareB(const float *, int8_t *, float, Index, Index) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
template<class Callback>
static void PrepareBias(const int8_t *, Index, Index, Callback) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
static void SelectColumnsB(const int8_t *, int8_t *, Index, const Index *, const Index *) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
template <typename Callback>
static void Multiply(const int8_t *, const int8_t *, Index, Index, Index, Callback) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
template<class Callback>
static void Multiply8Shift(const uint8_t *, const int8_t *, Index, Index, Index, Callback) {
- throw UnsupportedCPU();
+ UnsupportedCPUError();
}
constexpr static const char *const kName = "8-bit Unsupported";
diff --git a/intgemm/types.h b/intgemm/types.h
index 81b38af..44fb4e2 100644
--- a/intgemm/types.h
+++ b/intgemm/types.h
@@ -1,13 +1,28 @@
#pragma once
+#include "intgemm/intgemm_config.h"
+
#include <exception>
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#include <immintrin.h>
#endif
#include <emmintrin.h>
-#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
-/* MSVC does not appear to have target attributes but is also fine with just
- * using intrinsics anywhere.
+// clang-cl bug doesn't include these headers when pretending to be MSVC
+// https://github.com/llvm/llvm-project/blob/e9a294449575a1e1a0daca470f64914695dc9adc/clang/lib/Headers/immintrin.h#L69-L72
+#if defined(_MSC_VER) && defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <smmintrin.h>
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vnniintrin.h>
+#endif
+
+#if (defined(_MSC_VER) && !defined(__clang__)) || defined(__INTEL_COMPILER)
+/* Real MSVC does not appear to have target attributes but is also fine with
+ * just using intrinsics anywhere. clang-cl pretending to be MSVC requires
+ * target attributes, so it's excluded from the above.
*
* The Intel compiler has a bug whereby constructors with target attributes do
* not link. Like this program doesn't compile with icpc: