diff options
author | Kenneth Heafield <kpu@users.noreply.github.com> | 2021-03-04 14:39:04 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-04 14:39:04 +0300 |
commit | 1a176394bb0c2d243c42fe574e063924a92aa120 (patch) | |
tree | d0d01e308773d1b13b4475f98cc2c05c3d10bb42 | |
parent | bd6117a99d0ce25928e66b61de4eae680c6bc885 (diff) | |
parent | cc71e5c2a69755009667330af1f60a4ed15b5b63 (diff) |
Merge pull request #85 from kpu/wormhole
Enable SIMD shuffle pattern
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | intgemm/intrinsics.h | 13 |
2 files changed, 18 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9089827..911e087 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,11 @@ if (USE_OPENMP) target_link_libraries(intgemm PUBLIC OpenMP::OpenMP_CXX) endif() +option(WORMHOLE "Use WASM wormhole https://bugzilla.mozilla.org/show_bug.cgi?id=1672160" OFF) +if (WORMHOLE) + target_compile_definitions(intgemm PUBLIC INTGEMM_WORMHOLE) +endif() + if(INTGEMM_DONT_BUILD_TESTS) return() endif() diff --git a/intgemm/intrinsics.h b/intgemm/intrinsics.h index 67b36fc..9f370cd 100644 --- a/intgemm/intrinsics.h +++ b/intgemm/intrinsics.h @@ -9,6 +9,9 @@ #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 #include <immintrin.h> #endif +#ifdef INTGEMM_WORMHOLE +#include <wasm_simd128.h> +#endif #include <cstdint> @@ -92,10 +95,20 @@ template <> INTGEMM_SSE2 inline __m128 loadu_ps(const float* mem_addr) { return _mm_loadu_ps(mem_addr); } INTGEMM_SSE2 static inline __m128i madd_epi16(__m128i first, __m128i second) { +// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160 +#ifdef INTGEMM_WORMHOLE + return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 2 /* PMADDWD */); +#else return _mm_madd_epi16(first, second); +#endif } INTGEMM_SSSE3 static inline __m128i maddubs_epi16(__m128i first, __m128i second) { +// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160 +#ifdef INTGEMM_WORMHOLE + return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 1 /* PMADDUBSW */); +#else return _mm_maddubs_epi16(first, second); +#endif } /* * Missing max_epi8 for SSE2 |