diff options
author | Mateusz Chudyk <mateuszchudyk@gmail.com> | 2020-02-05 21:47:57 +0300 |
---|---|---|
committer | Mateusz Chudyk <mateuszchudyk@gmail.com> | 2020-02-05 21:48:08 +0300 |
commit | 12becc0f51e085f9f16177ce4f01d7e6fc136188 (patch) | |
tree | b4034677477391df85e57a77aec41b0ce74745cb | |
parent | d9c1cc53363b2ab03a87ac094b3358d5fce7c61d (diff) |
Update PrepareB[Quantized]Transposed tests
-rw-r--r-- | interleave.h | 4 | ||||
-rw-r--r-- | test/prepare_b_quantized_transposed.cc | 34 | ||||
-rw-r--r-- | test/prepare_b_transposed.cc | 35 |
3 files changed, 37 insertions, 36 deletions
diff --git a/interleave.h b/interleave.h index 30e0dea..41ac8b7 100644 --- a/interleave.h +++ b/interleave.h @@ -238,6 +238,8 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f * Prepare B matrix. * B matrix has to be transposed and quantized. * Cols has to be a multiple of sizeof(Register) / sizeof(Integer). + * + * cols and rows describe size of transposed B. */ #define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \ target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \ @@ -261,6 +263,8 @@ target static inline void PrepareBQuantizedTransposed(const Integer* input, Inte * Prepare B matrix. * B matrix has to be transposed. * Cols has to be a multiple of sizeof(Register) / sizeof(float). + * + * cols and rows describe size of transposed B. */ #define INTGEMM_PREPARE_B_TRANSPOSED(target, Quantizer, integer) \ target static inline void PrepareBTransposed(const float* input, integer* output, float quant_mult, Index cols, Index rows) { \ diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc index b7b2257..a096c21 100644 --- a/test/prepare_b_quantized_transposed.cc +++ b/test/prepare_b_quantized_transposed.cc @@ -13,27 +13,27 @@ namespace intgemm { namespace { template <typename Backend> -void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index rows, Index cols) { +void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index B_transposed_cols, Index B_transposed_rows) { using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>; constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer); auto output_it = output; - for (Index r = 0; r < rows; r += 8) - for (Index c = 0; c < cols; c += vec_len) + for (Index r = 0; r < B_transposed_rows; r += 8) + for (Index c = 0; c < B_transposed_cols; c += vec_len) for (Index ri = 0; ri < 8; ++ri) for (Index ci = 0; ci < vec_len; ++ci) - *output_it++ = input[(r + ri) * cols + c + ci]; + *output_it++ = input[(r + ri) * B_transposed_cols + c + ci]; } template <typename Backend> -bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Index cols) { +bool Test(const AlignedVector<typename Backend::Integer>& input, Index B_rows, Index B_cols) { bool success = true; AlignedVector<typename Backend::Integer> output(input.size()); - Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows); + Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), B_rows, B_cols); AlignedVector<typename Backend::Integer> reference(input.size()); - PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols); + PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), B_rows, B_cols); for (std::size_t i = 0; i < output.size(); ++i) { if (output[i] != reference[i]) { @@ -46,10 +46,8 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind } template <typename Backend> -bool TestMany() { - const static Index rows = 128; - const static Index cols = 128; - AlignedVector<typename Backend::Integer> input(rows * cols); +bool TestMany(Index B_rows, Index B_cols) { + AlignedVector<typename Backend::Integer> input(B_rows * B_cols); std::generate(input.begin(), input.end(), []() { static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer); @@ -57,29 +55,29 @@ bool TestMany() { return (value++) % divider; }); - return Test<Backend>(input, rows, cols); + return Test<Backend>(input, B_rows, B_cols); } TEST_CASE("PrepareBQuantizedTransposed SSE2", "") { if (kCPU < CPUType::SSE2) return; - CHECK(TestMany<SSE2_16bit>()); + CHECK(TestMany<SSE2_16bit>(32, 128)); } TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") { if (kCPU < CPUType::SSSE3) return; - CHECK(TestMany<SSSE3_8bit>()); + CHECK(TestMany<SSSE3_8bit>(32, 128)); } TEST_CASE("PrepareBQuantizedTransposed AVX2", "") { if (kCPU < CPUType::AVX2) return; - CHECK(TestMany<AVX2_8bit>()); - CHECK(TestMany<AVX2_16bit>()); + CHECK(TestMany<AVX2_8bit>(32, 128)); + CHECK(TestMany<AVX2_16bit>(32, 128)); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 @@ -87,8 +85,8 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") { if (kCPU < CPUType::AVX512BW) return; - CHECK(TestMany<AVX512_8bit>()); - CHECK(TestMany<AVX512_16bit>()); + CHECK(TestMany<AVX512_8bit>(32, 128)); + CHECK(TestMany<AVX512_16bit>(32, 128)); } #endif diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc index 45730a7..219e56a 100644 --- a/test/prepare_b_transposed.cc +++ b/test/prepare_b_transposed.cc @@ -13,28 +13,28 @@ namespace intgemm { namespace { template <typename Backend> -void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index rows, Index cols) { +void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index B_transposed_cols, Index B_transposed_rows) { using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>; constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer); - for (Index i = 0; i < rows * cols / 8; i += vec_len) + for (Index i = 0; i < B_transposed_rows * B_transposed_cols / 8; i += vec_len) for (Index j = 0; j < 8; ++j) for (Index k = 0; k < vec_len; ++k) { - Index col = (i + k) % cols; - Index row = 8 * ((i + k) / cols) + j; - *output++ = input[row * cols + col] * quant_mult; + Index col = (i + k) % B_transposed_cols; + Index row = 8 * ((i + k) / B_transposed_cols) + j; + *output++ = input[row * B_transposed_cols + col] * quant_mult; } } template <typename Backend> -bool Test(const AlignedVector<float>& input, Index rows, Index cols, float quant_mult) { +bool Test(const AlignedVector<float>& input, Index B_rows, Index B_cols, float quant_mult) { bool success = true; AlignedVector<typename Backend::Integer> output(input.size()); - Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, cols, rows); + Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, B_rows, B_cols); AlignedVector<typename Backend::Integer> reference(input.size()); - PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, rows, cols); + PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, B_rows, B_cols); for (std::size_t i = 0; i < output.size(); ++i) { if (output[i] != reference[i]) { @@ -47,9 +47,8 @@ bool Test(const AlignedVector<float>& input, Index rows, Index cols, float quant } template <typename Backend> -bool TestMany(Index rows, Index cols) { - AlignedVector<float> input(rows * cols); - const float quant_mult = 2.f; +bool TestMany(Index B_rows, Index B_cols, float quant_mult) { + AlignedVector<float> input(B_rows * B_cols); std::generate(input.begin(), input.end(), []() { static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer); @@ -57,29 +56,29 @@ bool TestMany(Index rows, Index cols) { return (value++) % divider; }); - return Test<Backend>(input, rows, cols, quant_mult); + return Test<Backend>(input, B_rows, B_cols, quant_mult); } TEST_CASE("PrepareBTransposed SSE2", "") { if (kCPU < CPUType::SSE2) return; - CHECK(TestMany<SSE2_16bit>(128, 4)); + CHECK(TestMany<SSE2_16bit>(4, 128, 2.0f)); } TEST_CASE("PrepareBTransposed SSSE3", "") { if (kCPU < CPUType::SSSE3) return; - CHECK(TestMany<SSSE3_8bit>(128, 4)); + CHECK(TestMany<SSSE3_8bit>(4, 128, 2.0f)); } TEST_CASE("PrepareBTransposed AVX2", "") { if (kCPU < CPUType::AVX2) return; - CHECK(TestMany<AVX2_8bit>(128, 8)); - CHECK(TestMany<AVX2_16bit>(128, 8)); + CHECK(TestMany<AVX2_8bit>(8, 128, 2.0f)); + CHECK(TestMany<AVX2_16bit>(8, 128, 2.0f)); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 @@ -87,8 +86,8 @@ TEST_CASE("PrepareBTransposed AVX2", "") { if (kCPU < CPUType::AVX512BW) return; - CHECK(TestMany<AVX512_8bit>(128, 16)); - CHECK(TestMany<AVX512_16bit>(128, 16)); + CHECK(TestMany<AVX512_8bit>(16, 128, 2.0f)); + CHECK(TestMany<AVX512_16bit>(16, 128, 2.0f)); } #endif |