Genericize the prepareb code

author: Kenneth Heafield <github@kheafield.com> 2018-06-17 21:54:41 +0300
committer: Kenneth Heafield <github@kheafield.com> 2018-06-17 21:54:41 +0300
commit: 8c29dd6d9e5e0821a92a3e2133f1b10ae4007ae2 (patch)
tree: 195ea5f013f653442e7b3a32b50e0d1b4f40d616 /avx512_gemm.cc
parent: bc1ab9d4a360cd2078d067cca9fdb6fedf78158e (diff)
1 files changed, 1 insertions, 3 deletions
diff --git a/avx512_gemm.cc b/avx512_gemm.cc
index 89c300c..ea2b85d 100644
--- a/avx512_gemm.cc
+++ b/avx512_gemm.cc
@@ -20,10 +20,8 @@ namespace {
 
 // Load from memory, multiply, and convert to int32_t.
 inline __m512i QuantizerGrab(const float *input, const __m512 quant_mult_reg) {
-  // Load 16 floats
-  __m512 val = _mm512_load_ps(input);
   // Multiply each by the quantization factor.
-  val = _mm512_mul_ps(val, quant_mult_reg);
+  __m512 val = _mm512_mul_ps(*reinterpret_cast<const __m512*>(input), quant_mult_reg);
   // Cast to 32-bit int
   return _mm512_cvtps_epi32(val);
 }
author	Kenneth Heafield <github@kheafield.com>	2018-06-17 21:54:41 +0300
committer	Kenneth Heafield <github@kheafield.com>	2018-06-17 21:54:41 +0300
commit	8c29dd6d9e5e0821a92a3e2133f1b10ae4007ae2 (patch)
tree	195ea5f013f653442e7b3a32b50e0d1b4f40d616 /avx512_gemm.cc
parent	bc1ab9d4a360cd2078d067cca9fdb6fedf78158e (diff)