Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Bogoychev <nheart@gmail.com>2020-01-22 19:29:56 +0300
committerNick Someone <xapajiamnu@dhcp-91-025.inf.ed.ac.uk>2020-01-22 19:30:19 +0300
commit339f8f6b96ab2efe162c5bc3a72326393413e6ca (patch)
treef34e1ea3c7dfff7b281240c4d718b431e88e6504
parent0f368b979ed0c96918034afa301bb03cd57d6f3d (diff)
Fix CPU detection and aligned alloc on mac
Committer: Nikolay Bogoychev <nheart@gmail.com>
-rw-r--r--aligned.h4
-rw-r--r--intgemm.h2
2 files changed, 5 insertions, 1 deletions
diff --git a/aligned.h b/aligned.h
index 8a7d6cc..c07c23e 100644
--- a/aligned.h
+++ b/aligned.h
@@ -1,5 +1,6 @@
#pragma once
#include <cstdlib>
+#include <algorithm>
// 64-byte aligned simple vector.
@@ -8,7 +9,8 @@ namespace intgemm {
template <class T> class AlignedVector {
public:
explicit AlignedVector(std::size_t size)
- : mem_(static_cast<T*>(aligned_alloc(64, size * sizeof(T)))), size_(size) {}
+ : mem_(static_cast<T*>(aligned_alloc(64, std::max((size_t)64, size * sizeof(T))))), size_(size) {
+ }
AlignedVector(const AlignedVector&) = delete;
AlignedVector& operator=(const AlignedVector&) = delete;
diff --git a/intgemm.h b/intgemm.h
index 8940085..5e5f556 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -124,6 +124,7 @@ typedef Unsupported_8bit AVX512VNNI_8bit;
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
// gcc 5.4.0 bizarrely supports avx512bw targets but not __builtin_cpu_supports("avx512bw"). So implement it manually.
inline bool CheckAVX512BW() {
+ __builtin_cpu_init ();
#ifdef __INTEL_COMPILER
return _may_i_use_cpu_feature(_FEATURE_AVX512BW)
#elif __GNUC__
@@ -153,6 +154,7 @@ inline bool CheckAVX512BW() {
* unsupported otherwise
*/
template <class T> T ChooseCPU(T avx512vnni, T avx512bw, T avx2, T ssse3, T sse2, T unsupported) {
+ __builtin_cpu_init ();
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
if (
#ifdef __INTEL_COMPILER