Merge pull request #56 from kpu/prepare-b-transposed

Add PrepareBTransposed
author: Mateusz Chudyk <mateuszchudyk@gmail.com> 2020-02-06 19:24:46 +0300
committer: GitHub <noreply@github.com> 2020-02-06 19:24:46 +0300
commit: 54c84ff67061ccf07480bdb384f8b64211b083bd (patch)
tree: 1f9f1646a9760da89d5f9b606dedbd61172bcbed /test
parent: faa096b372df5c3bf8e060effb6437fdf26598cc (diff)
parent: 12becc0f51e085f9f16177ce4f01d7e6fc136188 (diff)
2 files changed, 111 insertions, 18 deletions
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc
index b7b2257..a096c21 100644
--- a/test/prepare_b_quantized_transposed.cc
+++ b/test/prepare_b_quantized_transposed.cc
@@ -13,27 +13,27 @@ namespace intgemm {
 namespace {
 
 template <typename Backend>
-void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index rows, Index cols) {
+void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index B_transposed_cols, Index B_transposed_rows) {
   using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>;
   constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer);
 
   auto output_it = output;
-  for (Index r = 0; r < rows; r += 8)
-    for (Index c = 0; c < cols; c += vec_len)
+  for (Index r = 0; r < B_transposed_rows; r += 8)
+    for (Index c = 0; c < B_transposed_cols; c += vec_len)
       for (Index ri = 0; ri < 8; ++ri)
         for (Index ci = 0; ci < vec_len; ++ci)
-          *output_it++ = input[(r + ri) * cols + c + ci];
+          *output_it++ = input[(r + ri) * B_transposed_cols + c + ci];
 }
 
 template <typename Backend>
-bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Index cols) {
+bool Test(const AlignedVector<typename Backend::Integer>& input, Index B_rows, Index B_cols) {
   bool success = true;
 
   AlignedVector<typename Backend::Integer> output(input.size());
-  Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows);
+  Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), B_rows, B_cols);
 
   AlignedVector<typename Backend::Integer> reference(input.size());
-  PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols);
+  PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), B_rows, B_cols);
 
   for (std::size_t i = 0; i < output.size(); ++i) {
     if (output[i] != reference[i]) {
@@ -46,10 +46,8 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind
 }
 
 template <typename Backend>
-bool TestMany() {
-  const static Index rows = 128;
-  const static Index cols = 128;
-  AlignedVector<typename Backend::Integer> input(rows * cols);
+bool TestMany(Index B_rows, Index B_cols) {
+  AlignedVector<typename Backend::Integer> input(B_rows * B_cols);
 
   std::generate(input.begin(), input.end(), []() {
     static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer);
@@ -57,29 +55,29 @@ bool TestMany() {
     return (value++) % divider;
   });
 
-  return Test<Backend>(input, rows, cols);
+  return Test<Backend>(input, B_rows, B_cols);
 }
 
 TEST_CASE("PrepareBQuantizedTransposed SSE2", "") {
   if (kCPU < CPUType::SSE2)
     return;
 
-  CHECK(TestMany<SSE2_16bit>());
+  CHECK(TestMany<SSE2_16bit>(32, 128));
 }
 
 TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") {
   if (kCPU < CPUType::SSSE3)
     return;
 
-  CHECK(TestMany<SSSE3_8bit>());
+  CHECK(TestMany<SSSE3_8bit>(32, 128));
 }
 
 TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
   if (kCPU < CPUType::AVX2)
     return;
 
-  CHECK(TestMany<AVX2_8bit>());
-  CHECK(TestMany<AVX2_16bit>());
+  CHECK(TestMany<AVX2_8bit>(32, 128));
+  CHECK(TestMany<AVX2_16bit>(32, 128));
 }
 
 #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
@@ -87,8 +85,8 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
     if (kCPU < CPUType::AVX512BW)
       return;
 
-    CHECK(TestMany<AVX512_8bit>());
-    CHECK(TestMany<AVX512_16bit>());
+    CHECK(TestMany<AVX512_8bit>(32, 128));
+    CHECK(TestMany<AVX512_16bit>(32, 128));
   }
 #endif
 
diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc
new file mode 100644
index 0000000..219e56a
--- /dev/null
+++ b/test/prepare_b_transposed.cc
@@ -0,0 +1,95 @@
+#include "test.h"
+#include "../aligned.h"
+#include "../avx2_gemm.h"
+#include "../avx512_gemm.h"
+#include "../sse2_gemm.h"
+#include "../ssse3_gemm.h"
+
+#include <cstring>
+#include <iostream>
+#include <math.h>
+
+namespace intgemm {
+namespace {
+
+template <typename Backend>
+void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index B_transposed_cols, Index B_transposed_rows) {
+  using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>;
+  constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer);
+
+  for (Index i = 0; i < B_transposed_rows * B_transposed_cols / 8; i += vec_len)
+    for (Index j = 0; j < 8; ++j)
+      for (Index k = 0; k < vec_len; ++k) {
+        Index col = (i + k) % B_transposed_cols;
+        Index row = 8 * ((i + k) / B_transposed_cols) + j;
+        *output++ = input[row * B_transposed_cols + col] * quant_mult;
+      }
+}
+
+template <typename Backend>
+bool Test(const AlignedVector<float>& input, Index B_rows, Index B_cols, float quant_mult) {
+  bool success = true;
+
+  AlignedVector<typename Backend::Integer> output(input.size());
+  Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, B_rows, B_cols);
+
+  AlignedVector<typename Backend::Integer> reference(input.size());
+  PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, B_rows, B_cols);
+
+  for (std::size_t i = 0; i < output.size(); ++i) {
+    if (output[i] != reference[i]) {
+      UNSCOPED_INFO("Error at " << i << ", output = " << int(output[i]) << ", reference = " << int(reference[i]));
+      success = false;
+      break;
+    }
+  }
+  return success;
+}
+
+template <typename Backend>
+bool TestMany(Index B_rows, Index B_cols, float quant_mult) {
+  AlignedVector<float> input(B_rows * B_cols);
+
+  std::generate(input.begin(), input.end(), []() {
+    static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer);
+    static int value = 0;
+    return (value++) % divider;
+  });
+
+  return Test<Backend>(input, B_rows, B_cols, quant_mult);
+}
+
+TEST_CASE("PrepareBTransposed SSE2", "") {
+  if (kCPU < CPUType::SSE2)
+    return;
+
+  CHECK(TestMany<SSE2_16bit>(4, 128, 2.0f));
+}
+
+TEST_CASE("PrepareBTransposed SSSE3", "") {
+  if (kCPU < CPUType::SSSE3)
+    return;
+
+  CHECK(TestMany<SSSE3_8bit>(4, 128, 2.0f));
+}
+
+TEST_CASE("PrepareBTransposed AVX2", "") {
+  if (kCPU < CPUType::AVX2)
+    return;
+
+  CHECK(TestMany<AVX2_8bit>(8, 128, 2.0f));
+  CHECK(TestMany<AVX2_16bit>(8, 128, 2.0f));
+}
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+  TEST_CASE("PrepareBTransposed AVX512", "") {
+    if (kCPU < CPUType::AVX512BW)
+      return;
+
+    CHECK(TestMany<AVX512_8bit>(16, 128, 2.0f));
+    CHECK(TestMany<AVX512_16bit>(16, 128, 2.0f));
+  }
+#endif
+
+}
+}
author	Mateusz Chudyk <mateuszchudyk@gmail.com>	2020-02-06 19:24:46 +0300
committer	GitHub <noreply@github.com>	2020-02-06 19:24:46 +0300
commit	54c84ff67061ccf07480bdb384f8b64211b083bd (patch)
tree	1f9f1646a9760da89d5f9b606dedbd61172bcbed /test
parent	faa096b372df5c3bf8e060effb6437fdf26598cc (diff)
parent	12becc0f51e085f9f16177ce4f01d7e6fc136188 (diff)