Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2021-01-08 13:27:54 +0300
committerKenneth Heafield <github@kheafield.com>2021-01-08 13:27:54 +0300
commita2a2c6a7497db9ec9208032250251f4960cf5f1f (patch)
treeb5144d8ae4658c528982b1f41065acc70950d7e3
parent2647d6c129ccb1cef486628685bb80a85158459a (diff)
Wormhole support https://bugzilla.mozilla.org/show_bug.cgi?id=1672160
For testing only
-rw-r--r--CMakeLists.txt5
-rw-r--r--intgemm/intrinsics.h10
2 files changed, 15 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9089827..911e087 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,6 +74,11 @@ if (USE_OPENMP)
target_link_libraries(intgemm PUBLIC OpenMP::OpenMP_CXX)
endif()
+option(WORMHOLE "Use WASM wormhole https://bugzilla.mozilla.org/show_bug.cgi?id=1672160" OFF)
+if (WORMHOLE)
+ target_compile_definitions(intgemm PUBLIC INTGEMM_WORMHOLE)
+endif()
+
if(INTGEMM_DONT_BUILD_TESTS)
return()
endif()
diff --git a/intgemm/intrinsics.h b/intgemm/intrinsics.h
index 67b36fc..528e367 100644
--- a/intgemm/intrinsics.h
+++ b/intgemm/intrinsics.h
@@ -92,10 +92,20 @@ template <> INTGEMM_SSE2 inline __m128 loadu_ps(const float* mem_addr) {
return _mm_loadu_ps(mem_addr);
}
INTGEMM_SSE2 static inline __m128i madd_epi16(__m128i first, __m128i second) {
+// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160
+#ifdef INTGEMM_WORMHOLE
+ return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 2 /* PMADDWD */);
+#else
return _mm_madd_epi16(first, second);
+#endif
}
INTGEMM_SSSE3 static inline __m128i maddubs_epi16(__m128i first, __m128i second) {
+// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160
+#ifdef INTGEMM_WORMHOLE
+ return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 1 /* PMADDUBSW */);
+#else
return _mm_maddubs_epi16(first, second);
+#endif
}
/*
* Missing max_epi8 for SSE2