Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-03-05 01:26:20 +0300
committerKenneth Heafield <github@kheafield.com>2020-03-05 01:26:20 +0300
commit61bcbae423eab96156f646a92107ca5300b8ae27 (patch)
treebcd9813b544edc3887c36816c4f9c4e8fed56861
parent2f5a75d4d05fa092bba066fd14a3fb7f778405ef (diff)
parentc14bf3f5f3bd7136e2f703f52fc1c7f94b1f0681 (diff)
Merge branch 'master' of github.com:kpu/intgemm
-rw-r--r--avx512_gemm.h14
1 files changed, 6 insertions, 8 deletions
diff --git a/avx512_gemm.h b/avx512_gemm.h
index b3499af..267dc6d 100644
--- a/avx512_gemm.h
+++ b/avx512_gemm.h
@@ -263,18 +263,16 @@ struct AVX512_8bit {
INTGEMM_AVX512BW static void QuantizeU(const float *input, uint8_t *output, float quant_mult, Index size) {
assert(size % 16 == 0);
assert(reinterpret_cast<uintptr_t>(input) % 64 == 0);
- const __m512i neg127 = _mm512_set1_epi32(-127);
- const __m128i pos127 = _mm_set1_epi8(127);
+ const __m512i pos127 = _mm512_set1_epi32(127);
+ const __m512i zero = _mm512_setzero_si512();
const __m512 quant_mult_reg = _mm512_set1_ps(quant_mult);
const float *end = input + size;
for (; input < end; input += 16, output += 16) {
__m512i asint = avx512f::QuantizerGrab(input, quant_mult_reg);
- asint = _mm512_max_epi32(asint, neg127);
-
- //First convert to 8 bit then add and finally store,
- //because _mm512_mask_cvtsepi32_storeu_epi8 saturates to signed
- __m128i as8bit = _mm512_cvtsepi32_epi8(asint);
- *reinterpret_cast<__m128i*>(output) = _mm_add_epi8(as8bit, pos127);
+ asint = _mm512_min_epi32(asint, pos127);
+ asint = _mm512_add_epi32(asint, pos127);
+ asint = _mm512_max_epi32(asint, zero);
+ _mm512_mask_cvtusepi32_storeu_epi8(output, 0xffff, asint);
}
}