diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-03-03 23:31:11 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-03-03 23:31:58 +0300 |
commit | c14bf3f5f3bd7136e2f703f52fc1c7f94b1f0681 (patch) | |
tree | b5cba298ccb718416c5b0e4ccc4cad83e7cd0fcb | |
parent | 8974d778363f197ff0ea40cf5dfe518afbeaa302 (diff) |
Cap unsigned quantized value no good reason?
-rw-r--r-- | avx512_gemm.h | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/avx512_gemm.h b/avx512_gemm.h index b8c4de1..267dc6d 100644 --- a/avx512_gemm.h +++ b/avx512_gemm.h @@ -263,14 +263,15 @@ struct AVX512_8bit { INTGEMM_AVX512BW static void QuantizeU(const float *input, uint8_t *output, float quant_mult, Index size) { assert(size % 16 == 0); assert(reinterpret_cast<uintptr_t>(input) % 64 == 0); - const __m512i neg127 = _mm512_set1_epi32(-127); const __m512i pos127 = _mm512_set1_epi32(127); + const __m512i zero = _mm512_setzero_si512(); const __m512 quant_mult_reg = _mm512_set1_ps(quant_mult); const float *end = input + size; for (; input < end; input += 16, output += 16) { __m512i asint = avx512f::QuantizerGrab(input, quant_mult_reg); - asint = _mm512_max_epi32(asint, neg127); + asint = _mm512_min_epi32(asint, pos127); asint = _mm512_add_epi32(asint, pos127); + asint = _mm512_max_epi32(asint, zero); _mm512_mask_cvtusepi32_storeu_epi8(output, 0xffff, asint); } } |