diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2020-04-20 16:38:37 +0300 |
---|---|---|
committer | Nikolay Bogoychev <nheart@gmail.com> | 2020-04-20 16:38:37 +0300 |
commit | d8124d2c43ee0e702c0b7f2af9f5aec431f97c8c (patch) | |
tree | 647cbeea9a64b4f50d6873819506763814ca2369 | |
parent | e2404d9a05148379fbc3e7cc9904b3157e7d73f9 (diff) |
Rename and move the if outside the hot loop
-rw-r--r-- | avx2_gemm.h | 2 | ||||
-rw-r--r-- | avx512_gemm.h | 2 | ||||
-rw-r--r-- | intgemm.cc | 2 | ||||
-rw-r--r-- | intgemm.h | 4 | ||||
-rw-r--r-- | multiply.h | 26 | ||||
-rw-r--r-- | sse2_gemm.h | 2 | ||||
-rw-r--r-- | test/quantize_test.cc | 84 |
7 files changed, 64 insertions, 58 deletions
diff --git a/avx2_gemm.h b/avx2_gemm.h index c1c4616..41f705b 100644 --- a/avx2_gemm.h +++ b/avx2_gemm.h @@ -192,7 +192,7 @@ class QuantizeTile8 { // Technically only requires AVX INTGEMM_MAXABSOLUTE(__m256, INTGEMM_AVX2) -INTGEMM_GETQUANTIZERSTD(__m256, INTGEMM_AVX2) +INTGEMM_EUCLIDEANNORM(__m256, INTGEMM_AVX2) } // namespace diff --git a/avx512_gemm.h b/avx512_gemm.h index c6a473e..7488ff2 100644 --- a/avx512_gemm.h +++ b/avx512_gemm.h @@ -159,7 +159,7 @@ class QuantizeTile8 { /* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */ INTGEMM_MAXABSOLUTE(__m512, INTGEMM_AVX512BW) -INTGEMM_GETQUANTIZERSTD(__m512, INTGEMM_AVX512BW) +INTGEMM_EUCLIDEANNORM(__m512, INTGEMM_AVX512BW) } // namespace @@ -40,7 +40,7 @@ const CPUType kCPU = ChooseCPU(CPUType::AVX512VNNI, CPUType::AVX512BW, CPUType:: float (*MaxAbsolute)(const float *begin, const float *end) = ChooseCPU(avx512f::MaxAbsolute, avx512f::MaxAbsolute, avx2::MaxAbsolute, sse2::MaxAbsolute, sse2::MaxAbsolute, Unsupported_MaxAbsolute); -MeanStd (*QuantizerStd)(const float *begin, const float *end, bool absolute) = ChooseCPU(avx512f::QuantizerStd, avx512f::QuantizerStd, avx2::QuantizerStd, sse2::QuantizerStd, sse2::QuantizerStd, sse2::QuantizerStd); +MeanStd (*EuclideanNorm)(const float *begin, const float *end, bool absolute) = ChooseCPU(avx512f::EuclideanNorm, avx512f::EuclideanNorm, avx2::EuclideanNorm, sse2::EuclideanNorm, sse2::EuclideanNorm, sse2::EuclideanNorm); constexpr const char *const Unsupported_16bit::kName; constexpr const char *const Unsupported_8bit::kName; @@ -420,10 +420,10 @@ extern const CPUType kCPU; extern float (*MaxAbsolute)(const float *begin, const float *end); // Get a Quantization value that is equant to the mean of the data +N standard deviations. Use 2 by default -extern MeanStd (*QuantizerStd)(const float *begin, const float *end, bool); +extern MeanStd (*EuclideanNorm)(const float *begin, const float *end, bool); static inline MeanStd GetQuantizerStd(const float * begin, const float * end, bool absolute=false) { - return QuantizerStd(begin, end, absolute); + return EuclideanNorm(begin, end, absolute); } @@ -665,9 +665,9 @@ target static inline float MaxAbsolute(const float *begin_float, const float *en return ret; \ } \ -#define INTGEMM_GETQUANTIZERSTD(Register, target) \ -target static inline MeanStd QuantizerStd(const float *begin_float, const float *end_float, bool absolute) { \ - /* Finds a quantizer value that is a certain number of standard deviations of the mean */ \ +#define INTGEMM_EUCLIDEANNORM(Register, target) \ +target static inline MeanStd EuclideanNorm(const float *begin_float, const float *end_float, bool absolute) { \ + /* Computes the euclidean norm and returns the mean and the standard deviation. Optionally it can be the mean and standard deviation in absolute terms. */ \ assert(end_float > begin_float); \ assert((end_float - begin_float) % (sizeof(Register) / sizeof(float)) == 0); \ size_t num_items = end_float - begin_float; \ @@ -675,14 +675,20 @@ target static inline MeanStd QuantizerStd(const float *begin_float, const float const Register *end = reinterpret_cast<const Register*>(end_float); \ Register squares = set1_ps<Register>(0); \ Register sums = set1_ps<Register>(0); \ - const Register mask = set1_ps<Register>(-0.f); \ - for (; begin != end; begin++) { \ - Register vec = *begin; \ - if (absolute) { \ - vec = andnot_ps(mask, vec); \ + if (absolute) { \ + const Register mask = set1_ps<Register>(-0.f); \ + for (; begin != end; begin++) { \ + Register vec = *begin; \ + vec = andnot_ps(mask, vec); \ + squares = add_ps(squares, mul_ps(vec, vec)); \ + sums = add_ps(sums, vec); \ + } \ + } else { \ + for (; begin != end; begin++) { \ + Register vec = *begin; \ + squares = add_ps(squares, mul_ps(vec, vec)); \ + sums = add_ps(sums, vec); \ } \ - squares = add_ps(squares, mul_ps(vec, vec)); \ - sums = add_ps(sums, vec); \ } \ float squares_sum = horizontalSum(squares); \ float normal_sums = horizontalSum(sums); \ diff --git a/sse2_gemm.h b/sse2_gemm.h index 91221d9..84b2f27 100644 --- a/sse2_gemm.h +++ b/sse2_gemm.h @@ -53,7 +53,7 @@ class QuantizeTile16 { INTGEMM_MAXABSOLUTE(__m128, INTGEMM_SSE2) -INTGEMM_GETQUANTIZERSTD(__m128, INTGEMM_SSE2) +INTGEMM_EUCLIDEANNORM(__m128, INTGEMM_SSE2) } //namespace // This should be pure INTGEMM_SSE2 (and below). diff --git a/test/quantize_test.cc b/test/quantize_test.cc index d868b51..9e99274 100644 --- a/test/quantize_test.cc +++ b/test/quantize_test.cc @@ -30,7 +30,7 @@ void QuantizeRef(const float *input, int8_t *output, float quant_mult, std::size } } -MeanStd QuantizerStddRef(AlignedVector<float>& vals, int num_items, bool absolute) { +MeanStd EuclideanNorm(AlignedVector<float>& vals, int num_items, bool absolute) { float normal_sums = 0; float squares_sum = 0; if (absolute) { @@ -47,7 +47,7 @@ MeanStd QuantizerStddRef(AlignedVector<float>& vals, int num_items, bool absolut } template <MeanStd (*Backend) (const float *, const float *, bool)> -void testQuantizerStd(int num_items, bool absolute=false) { +void testEuclideanNorm(int num_items, bool absolute=false) { std::mt19937 gen; std::uniform_real_distribution<float> dist(-1.0f, 1.0f); AlignedVector<float> inputVec(num_items); @@ -56,7 +56,7 @@ void testQuantizerStd(int num_items, bool absolute=false) { it = dist(gen); } - MeanStd reference = QuantizerStddRef(inputVec, num_items, absolute); + MeanStd reference = EuclideanNorm(inputVec, num_items, absolute); MeanStd fast = Backend(inputVec.begin(), inputVec.end(), absolute); float meanDifference = fabs(reference.mean - fast.mean); @@ -132,53 +132,53 @@ TEST_CASE ("Quantize AVX2", "[quantize]") { } #endif -TEST_CASE("QuantizeStd SSSE3", "[quantizerSTD]") { +TEST_CASE("QuantizeStd SSSE3", "[EuclideanNorm]") { if (kCPU < CPUType::SSSE3) return; - testQuantizerStd<sse2::QuantizerStd>(64); - testQuantizerStd<sse2::QuantizerStd>(64, true); - testQuantizerStd<sse2::QuantizerStd>(256); - testQuantizerStd<sse2::QuantizerStd>(256, true); - testQuantizerStd<sse2::QuantizerStd>(2048); - testQuantizerStd<sse2::QuantizerStd>(2048, true); - testQuantizerStd<sse2::QuantizerStd>(65536); - testQuantizerStd<sse2::QuantizerStd>(65536, true); - testQuantizerStd<sse2::QuantizerStd>(81920); - testQuantizerStd<sse2::QuantizerStd>(81920, true); - testQuantizerStd<sse2::QuantizerStd>(120832); - testQuantizerStd<sse2::QuantizerStd>(120832, true); + testEuclideanNorm<sse2::EuclideanNorm>(64); + testEuclideanNorm<sse2::EuclideanNorm>(64, true); + testEuclideanNorm<sse2::EuclideanNorm>(256); + testEuclideanNorm<sse2::EuclideanNorm>(256, true); + testEuclideanNorm<sse2::EuclideanNorm>(2048); + testEuclideanNorm<sse2::EuclideanNorm>(2048, true); + testEuclideanNorm<sse2::EuclideanNorm>(65536); + testEuclideanNorm<sse2::EuclideanNorm>(65536, true); + testEuclideanNorm<sse2::EuclideanNorm>(81920); + testEuclideanNorm<sse2::EuclideanNorm>(81920, true); + testEuclideanNorm<sse2::EuclideanNorm>(120832); + testEuclideanNorm<sse2::EuclideanNorm>(120832, true); } -TEST_CASE("QuantizeStd AVX2", "[quantizerSTD]") { +TEST_CASE("QuantizeStd AVX2", "[EuclideanNorm]") { if (kCPU < CPUType::AVX2) return; - testQuantizerStd<avx2::QuantizerStd>(64); - testQuantizerStd<avx2::QuantizerStd>(64, true); - testQuantizerStd<avx2::QuantizerStd>(256); - testQuantizerStd<avx2::QuantizerStd>(256, true); - testQuantizerStd<avx2::QuantizerStd>(2048); - testQuantizerStd<avx2::QuantizerStd>(2048, true); - testQuantizerStd<avx2::QuantizerStd>(65536); - testQuantizerStd<avx2::QuantizerStd>(65536, true); - testQuantizerStd<avx2::QuantizerStd>(81920); - testQuantizerStd<avx2::QuantizerStd>(81920, true); - testQuantizerStd<avx2::QuantizerStd>(120832); - testQuantizerStd<avx2::QuantizerStd>(120832, true); + testEuclideanNorm<avx2::EuclideanNorm>(64); + testEuclideanNorm<avx2::EuclideanNorm>(64, true); + testEuclideanNorm<avx2::EuclideanNorm>(256); + testEuclideanNorm<avx2::EuclideanNorm>(256, true); + testEuclideanNorm<avx2::EuclideanNorm>(2048); + testEuclideanNorm<avx2::EuclideanNorm>(2048, true); + testEuclideanNorm<avx2::EuclideanNorm>(65536); + testEuclideanNorm<avx2::EuclideanNorm>(65536, true); + testEuclideanNorm<avx2::EuclideanNorm>(81920); + testEuclideanNorm<avx2::EuclideanNorm>(81920, true); + testEuclideanNorm<avx2::EuclideanNorm>(120832); + testEuclideanNorm<avx2::EuclideanNorm>(120832, true); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW -TEST_CASE("QuantizeStd AVX512", "[quantizerSTD]") { +TEST_CASE("QuantizeStd AVX512", "[EuclideanNorm]") { if (kCPU < CPUType::AVX512BW) return; - testQuantizerStd<avx512f::QuantizerStd>(64); - testQuantizerStd<avx512f::QuantizerStd>(64, true); - testQuantizerStd<avx512f::QuantizerStd>(256); - testQuantizerStd<avx512f::QuantizerStd>(256, true); - testQuantizerStd<avx512f::QuantizerStd>(2048); - testQuantizerStd<avx512f::QuantizerStd>(2048, true); - testQuantizerStd<avx512f::QuantizerStd>(65536); - testQuantizerStd<avx512f::QuantizerStd>(65536, true); - testQuantizerStd<avx512f::QuantizerStd>(81920); - testQuantizerStd<avx512f::QuantizerStd>(81920, true); - testQuantizerStd<avx512f::QuantizerStd>(120832); - testQuantizerStd<avx512f::QuantizerStd>(120832, true); + testEuclideanNorm<avx512f::EuclideanNorm>(64); + testEuclideanNorm<avx512f::EuclideanNorm>(64, true); + testEuclideanNorm<avx512f::EuclideanNorm>(256); + testEuclideanNorm<avx512f::EuclideanNorm>(256, true); + testEuclideanNorm<avx512f::EuclideanNorm>(2048); + testEuclideanNorm<avx512f::EuclideanNorm>(2048, true); + testEuclideanNorm<avx512f::EuclideanNorm>(65536); + testEuclideanNorm<avx512f::EuclideanNorm>(65536, true); + testEuclideanNorm<avx512f::EuclideanNorm>(81920); + testEuclideanNorm<avx512f::EuclideanNorm>(81920, true); + testEuclideanNorm<avx512f::EuclideanNorm>(120832); + testEuclideanNorm<avx512f::EuclideanNorm>(120832, true); } #endif |