Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <kpu@users.noreply.github.com>2021-03-04 14:39:04 +0300
committerGitHub <noreply@github.com>2021-03-04 14:39:04 +0300
commit1a176394bb0c2d243c42fe574e063924a92aa120 (patch)
treed0d01e308773d1b13b4475f98cc2c05c3d10bb42
parentbd6117a99d0ce25928e66b61de4eae680c6bc885 (diff)
parentcc71e5c2a69755009667330af1f60a4ed15b5b63 (diff)
Merge pull request #85 from kpu/wormhole
Enable SIMD shuffle pattern
-rw-r--r--CMakeLists.txt5
-rw-r--r--intgemm/intrinsics.h13
2 files changed, 18 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9089827..911e087 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,6 +74,11 @@ if (USE_OPENMP)
target_link_libraries(intgemm PUBLIC OpenMP::OpenMP_CXX)
endif()
+option(WORMHOLE "Use WASM wormhole https://bugzilla.mozilla.org/show_bug.cgi?id=1672160" OFF)
+if (WORMHOLE)
+ target_compile_definitions(intgemm PUBLIC INTGEMM_WORMHOLE)
+endif()
+
if(INTGEMM_DONT_BUILD_TESTS)
return()
endif()
diff --git a/intgemm/intrinsics.h b/intgemm/intrinsics.h
index 67b36fc..9f370cd 100644
--- a/intgemm/intrinsics.h
+++ b/intgemm/intrinsics.h
@@ -9,6 +9,9 @@
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
#include <immintrin.h>
#endif
+#ifdef INTGEMM_WORMHOLE
+#include <wasm_simd128.h>
+#endif
#include <cstdint>
@@ -92,10 +95,20 @@ template <> INTGEMM_SSE2 inline __m128 loadu_ps(const float* mem_addr) {
return _mm_loadu_ps(mem_addr);
}
INTGEMM_SSE2 static inline __m128i madd_epi16(__m128i first, __m128i second) {
+// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160
+#ifdef INTGEMM_WORMHOLE
+ return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 2 /* PMADDWD */);
+#else
return _mm_madd_epi16(first, second);
+#endif
}
INTGEMM_SSSE3 static inline __m128i maddubs_epi16(__m128i first, __m128i second) {
+// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160
+#ifdef INTGEMM_WORMHOLE
+ return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 1 /* PMADDUBSW */);
+#else
return _mm_maddubs_epi16(first, second);
+#endif
}
/*
* Missing max_epi8 for SSE2