diff options
-rw-r--r-- | cops.h | 16 |
1 files changed, 8 insertions, 8 deletions
@@ -16,19 +16,19 @@ private: SSE2 void InitRegisterSSE(float unquant_mult); AVX2 void InitRegisterAVX2(float unquant_mult); - float * C_; - __m128 unquant_mult_128; // Registers - __m256 unquant_mult_256; + float *C_; + __m128 unquant_mult_128_; // Registers + __m256 unquant_mult_256_; }; SSE2 void JustUnquantizeC::InitRegisterSSE(float unquant_mult) { assert(reinterpret_cast<uintptr_t>(C_) % sizeof(__m128) == 0); - unquant_mult_128 = _mm_set1_ps(unquant_mult); + unquant_mult_128_ = _mm_set1_ps(unquant_mult); } AVX2 void JustUnquantizeC::InitRegisterAVX2(float unquant_mult) { assert(reinterpret_cast<uintptr_t>(C_) % sizeof(__m256) == 0); - unquant_mult_256 = _mm256_set1_ps(unquant_mult); + unquant_mult_256_ = _mm256_set1_ps(unquant_mult); } JustUnquantizeC::JustUnquantizeC(float *C, float unquant_mult) : C_(C) { @@ -42,10 +42,10 @@ JustUnquantizeC::JustUnquantizeC(float *C, float unquant_mult) : C_(C) { SSE2 inline void JustUnquantizeC::operator()(Index rowIDX, Index cols, Index colIDX, MultiplyResult128 result){ - *reinterpret_cast<__m128*>(C_ + rowIDX*cols + colIDX) = mul_ps(cvtepi32_ps(result.pack0123), unquant_mult_128); - *reinterpret_cast<__m128*>(C_ + rowIDX*cols + colIDX + 4) = mul_ps(cvtepi32_ps(result.pack4567), unquant_mult_128); + *reinterpret_cast<__m128*>(C_ + rowIDX*cols + colIDX) = mul_ps(cvtepi32_ps(result.pack0123), unquant_mult_128_); + *reinterpret_cast<__m128*>(C_ + rowIDX*cols + colIDX + 4) = mul_ps(cvtepi32_ps(result.pack4567), unquant_mult_128_); } AVX2 inline void JustUnquantizeC::operator()(Index rowIDX, Index cols, Index colIDX, __m256i result) { - *reinterpret_cast<__m256*>(C_ + rowIDX*cols + colIDX) = mul_ps(cvtepi32_ps(result), unquant_mult_256); + *reinterpret_cast<__m256*>(C_ + rowIDX*cols + colIDX) = mul_ps(cvtepi32_ps(result), unquant_mult_256_); } } //Namespace |