Welcome to mirror list, hosted at ThFree Co, Russian Federation.

prepare_b_quantized_transposed.cc « test - github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e27992a43fbcc76bcb02a8a9b32beebd1cba0bdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include "test.h"
#include "../intgemm/aligned.h"
#include "../intgemm/avx2_gemm.h"
#include "../intgemm/avx512_gemm.h"
#include "../intgemm/sse2_gemm.h"
#include "../intgemm/ssse3_gemm.h"

#include <cmath>
#include <cstring>
#include <iostream>

namespace intgemm {
namespace {

template <typename Backend>
void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index B_transposed_cols, Index B_transposed_rows) {
  using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>;
  constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer);

  auto output_it = output;
  for (Index r = 0; r < B_transposed_rows; r += 8)
    for (Index c = 0; c < B_transposed_cols; c += vec_len)
      for (Index ri = 0; ri < 8; ++ri)
        for (Index ci = 0; ci < vec_len; ++ci)
          *output_it++ = input[(r + ri) * B_transposed_cols + c + ci];
}

template <typename Backend>
bool Test(const AlignedVector<typename Backend::Integer>& input, Index B_rows, Index B_cols) {
  bool success = true;

  AlignedVector<typename Backend::Integer> output(input.size());
  Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), B_rows, B_cols);

  AlignedVector<typename Backend::Integer> reference(input.size());
  PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), B_rows, B_cols);

  for (std::size_t i = 0; i < output.size(); ++i) {
    if (output[i] != reference[i]) {
      UNSCOPED_INFO("Error at " << i << ", output = " << int(output[i]) << ", reference = " << int(reference[i]));
      success = false;
      break;
    }
  }
  return success;
}

template <typename Backend>
bool TestMany(Index B_rows, Index B_cols) {
  AlignedVector<typename Backend::Integer> input(B_rows * B_cols);

  std::generate(input.begin(), input.end(), []() {
    static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer);
    static int value = 0;
    return static_cast<typename Backend::Integer>((value++) % divider);
  });

  return Test<Backend>(input, B_rows, B_cols);
}

TEST_CASE("PrepareBQuantizedTransposed SSE2", "") {
  if (kCPU < CPUType::SSE2)
    return;

  CHECK(TestMany<sse2::Kernels16>(32, 128));
}

TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") {
  if (kCPU < CPUType::SSSE3)
    return;

  CHECK(TestMany<ssse3::Kernels8>(32, 128));
}

#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
  if (kCPU < CPUType::AVX2)
    return;

  CHECK(TestMany<avx2::Kernels8>(32, 128));
  CHECK(TestMany<avx2::Kernels16>(32, 128));
}
#endif

#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
  TEST_CASE("PrepareBQuantizedTransposed AVX512", "") {
    if (kCPU < CPUType::AVX512BW)
      return;

    CHECK(TestMany<avx512bw::Kernels8>(64, 128));
    CHECK(TestMany<avx512bw::Kernels16>(64, 128));
  }
#endif

}
}