diff options
author | Mateusz Chudyk <mateuszchudyk@gmail.com> | 2020-02-06 19:24:46 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-06 19:24:46 +0300 |
commit | 54c84ff67061ccf07480bdb384f8b64211b083bd (patch) | |
tree | 1f9f1646a9760da89d5f9b606dedbd61172bcbed /test | |
parent | faa096b372df5c3bf8e060effb6437fdf26598cc (diff) | |
parent | 12becc0f51e085f9f16177ce4f01d7e6fc136188 (diff) |
Merge pull request #56 from kpu/prepare-b-transposed
Add PrepareBTransposed
Diffstat (limited to 'test')
-rw-r--r-- | test/prepare_b_quantized_transposed.cc | 34 | ||||
-rw-r--r-- | test/prepare_b_transposed.cc | 95 |
2 files changed, 111 insertions, 18 deletions
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc index b7b2257..a096c21 100644 --- a/test/prepare_b_quantized_transposed.cc +++ b/test/prepare_b_quantized_transposed.cc @@ -13,27 +13,27 @@ namespace intgemm { namespace { template <typename Backend> -void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index rows, Index cols) { +void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index B_transposed_cols, Index B_transposed_rows) { using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>; constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer); auto output_it = output; - for (Index r = 0; r < rows; r += 8) - for (Index c = 0; c < cols; c += vec_len) + for (Index r = 0; r < B_transposed_rows; r += 8) + for (Index c = 0; c < B_transposed_cols; c += vec_len) for (Index ri = 0; ri < 8; ++ri) for (Index ci = 0; ci < vec_len; ++ci) - *output_it++ = input[(r + ri) * cols + c + ci]; + *output_it++ = input[(r + ri) * B_transposed_cols + c + ci]; } template <typename Backend> -bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Index cols) { +bool Test(const AlignedVector<typename Backend::Integer>& input, Index B_rows, Index B_cols) { bool success = true; AlignedVector<typename Backend::Integer> output(input.size()); - Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows); + Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), B_rows, B_cols); AlignedVector<typename Backend::Integer> reference(input.size()); - PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols); + PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), B_rows, B_cols); for (std::size_t i = 0; i < output.size(); ++i) { if (output[i] != reference[i]) { @@ -46,10 +46,8 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind } template <typename Backend> -bool TestMany() { - const static Index rows = 128; - const static Index cols = 128; - AlignedVector<typename Backend::Integer> input(rows * cols); +bool TestMany(Index B_rows, Index B_cols) { + AlignedVector<typename Backend::Integer> input(B_rows * B_cols); std::generate(input.begin(), input.end(), []() { static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer); @@ -57,29 +55,29 @@ bool TestMany() { return (value++) % divider; }); - return Test<Backend>(input, rows, cols); + return Test<Backend>(input, B_rows, B_cols); } TEST_CASE("PrepareBQuantizedTransposed SSE2", "") { if (kCPU < CPUType::SSE2) return; - CHECK(TestMany<SSE2_16bit>()); + CHECK(TestMany<SSE2_16bit>(32, 128)); } TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") { if (kCPU < CPUType::SSSE3) return; - CHECK(TestMany<SSSE3_8bit>()); + CHECK(TestMany<SSSE3_8bit>(32, 128)); } TEST_CASE("PrepareBQuantizedTransposed AVX2", "") { if (kCPU < CPUType::AVX2) return; - CHECK(TestMany<AVX2_8bit>()); - CHECK(TestMany<AVX2_16bit>()); + CHECK(TestMany<AVX2_8bit>(32, 128)); + CHECK(TestMany<AVX2_16bit>(32, 128)); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 @@ -87,8 +85,8 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") { if (kCPU < CPUType::AVX512BW) return; - CHECK(TestMany<AVX512_8bit>()); - CHECK(TestMany<AVX512_16bit>()); + CHECK(TestMany<AVX512_8bit>(32, 128)); + CHECK(TestMany<AVX512_16bit>(32, 128)); } #endif diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc new file mode 100644 index 0000000..219e56a --- /dev/null +++ b/test/prepare_b_transposed.cc @@ -0,0 +1,95 @@ +#include "test.h" +#include "../aligned.h" +#include "../avx2_gemm.h" +#include "../avx512_gemm.h" +#include "../sse2_gemm.h" +#include "../ssse3_gemm.h" + +#include <cstring> +#include <iostream> +#include <math.h> + +namespace intgemm { +namespace { + +template <typename Backend> +void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index B_transposed_cols, Index B_transposed_rows) { + using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>; + constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer); + + for (Index i = 0; i < B_transposed_rows * B_transposed_cols / 8; i += vec_len) + for (Index j = 0; j < 8; ++j) + for (Index k = 0; k < vec_len; ++k) { + Index col = (i + k) % B_transposed_cols; + Index row = 8 * ((i + k) / B_transposed_cols) + j; + *output++ = input[row * B_transposed_cols + col] * quant_mult; + } +} + +template <typename Backend> +bool Test(const AlignedVector<float>& input, Index B_rows, Index B_cols, float quant_mult) { + bool success = true; + + AlignedVector<typename Backend::Integer> output(input.size()); + Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, B_rows, B_cols); + + AlignedVector<typename Backend::Integer> reference(input.size()); + PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, B_rows, B_cols); + + for (std::size_t i = 0; i < output.size(); ++i) { + if (output[i] != reference[i]) { + UNSCOPED_INFO("Error at " << i << ", output = " << int(output[i]) << ", reference = " << int(reference[i])); + success = false; + break; + } + } + return success; +} + +template <typename Backend> +bool TestMany(Index B_rows, Index B_cols, float quant_mult) { + AlignedVector<float> input(B_rows * B_cols); + + std::generate(input.begin(), input.end(), []() { + static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer); + static int value = 0; + return (value++) % divider; + }); + + return Test<Backend>(input, B_rows, B_cols, quant_mult); +} + +TEST_CASE("PrepareBTransposed SSE2", "") { + if (kCPU < CPUType::SSE2) + return; + + CHECK(TestMany<SSE2_16bit>(4, 128, 2.0f)); +} + +TEST_CASE("PrepareBTransposed SSSE3", "") { + if (kCPU < CPUType::SSSE3) + return; + + CHECK(TestMany<SSSE3_8bit>(4, 128, 2.0f)); +} + +TEST_CASE("PrepareBTransposed AVX2", "") { + if (kCPU < CPUType::AVX2) + return; + + CHECK(TestMany<AVX2_8bit>(8, 128, 2.0f)); + CHECK(TestMany<AVX2_16bit>(8, 128, 2.0f)); +} + +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 + TEST_CASE("PrepareBTransposed AVX512", "") { + if (kCPU < CPUType::AVX512BW) + return; + + CHECK(TestMany<AVX512_8bit>(16, 128, 2.0f)); + CHECK(TestMany<AVX512_16bit>(16, 128, 2.0f)); + } +#endif + +} +} |