Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2020-02-05 21:47:57 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2020-02-05 21:48:08 +0300
commit12becc0f51e085f9f16177ce4f01d7e6fc136188 (patch)
treeb4034677477391df85e57a77aec41b0ce74745cb
parentd9c1cc53363b2ab03a87ac094b3358d5fce7c61d (diff)
Update PrepareB[Quantized]Transposed tests
-rw-r--r--interleave.h4
-rw-r--r--test/prepare_b_quantized_transposed.cc34
-rw-r--r--test/prepare_b_transposed.cc35
3 files changed, 37 insertions, 36 deletions
diff --git a/interleave.h b/interleave.h
index 30e0dea..41ac8b7 100644
--- a/interleave.h
+++ b/interleave.h
@@ -238,6 +238,8 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f
* Prepare B matrix.
* B matrix has to be transposed and quantized.
* Cols has to be a multiple of sizeof(Register) / sizeof(Integer).
+ *
+ * cols and rows describe size of transposed B.
*/
#define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \
target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \
@@ -261,6 +263,8 @@ target static inline void PrepareBQuantizedTransposed(const Integer* input, Inte
* Prepare B matrix.
* B matrix has to be transposed.
* Cols has to be a multiple of sizeof(Register) / sizeof(float).
+ *
+ * cols and rows describe size of transposed B.
*/
#define INTGEMM_PREPARE_B_TRANSPOSED(target, Quantizer, integer) \
target static inline void PrepareBTransposed(const float* input, integer* output, float quant_mult, Index cols, Index rows) { \
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc
index b7b2257..a096c21 100644
--- a/test/prepare_b_quantized_transposed.cc
+++ b/test/prepare_b_quantized_transposed.cc
@@ -13,27 +13,27 @@ namespace intgemm {
namespace {
template <typename Backend>
-void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index rows, Index cols) {
+void PrepareBQuantizedTransposedRef(const typename Backend::Integer* input, typename Backend::Integer* output, Index B_transposed_cols, Index B_transposed_rows) {
using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>;
constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer);
auto output_it = output;
- for (Index r = 0; r < rows; r += 8)
- for (Index c = 0; c < cols; c += vec_len)
+ for (Index r = 0; r < B_transposed_rows; r += 8)
+ for (Index c = 0; c < B_transposed_cols; c += vec_len)
for (Index ri = 0; ri < 8; ++ri)
for (Index ci = 0; ci < vec_len; ++ci)
- *output_it++ = input[(r + ri) * cols + c + ci];
+ *output_it++ = input[(r + ri) * B_transposed_cols + c + ci];
}
template <typename Backend>
-bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Index cols) {
+bool Test(const AlignedVector<typename Backend::Integer>& input, Index B_rows, Index B_cols) {
bool success = true;
AlignedVector<typename Backend::Integer> output(input.size());
- Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows);
+ Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), B_rows, B_cols);
AlignedVector<typename Backend::Integer> reference(input.size());
- PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols);
+ PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), B_rows, B_cols);
for (std::size_t i = 0; i < output.size(); ++i) {
if (output[i] != reference[i]) {
@@ -46,10 +46,8 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind
}
template <typename Backend>
-bool TestMany() {
- const static Index rows = 128;
- const static Index cols = 128;
- AlignedVector<typename Backend::Integer> input(rows * cols);
+bool TestMany(Index B_rows, Index B_cols) {
+ AlignedVector<typename Backend::Integer> input(B_rows * B_cols);
std::generate(input.begin(), input.end(), []() {
static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer);
@@ -57,29 +55,29 @@ bool TestMany() {
return (value++) % divider;
});
- return Test<Backend>(input, rows, cols);
+ return Test<Backend>(input, B_rows, B_cols);
}
TEST_CASE("PrepareBQuantizedTransposed SSE2", "") {
if (kCPU < CPUType::SSE2)
return;
- CHECK(TestMany<SSE2_16bit>());
+ CHECK(TestMany<SSE2_16bit>(32, 128));
}
TEST_CASE("PrepareBQuantizedTransposed SSSE3", "") {
if (kCPU < CPUType::SSSE3)
return;
- CHECK(TestMany<SSSE3_8bit>());
+ CHECK(TestMany<SSSE3_8bit>(32, 128));
}
TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
if (kCPU < CPUType::AVX2)
return;
- CHECK(TestMany<AVX2_8bit>());
- CHECK(TestMany<AVX2_16bit>());
+ CHECK(TestMany<AVX2_8bit>(32, 128));
+ CHECK(TestMany<AVX2_16bit>(32, 128));
}
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
@@ -87,8 +85,8 @@ TEST_CASE("PrepareBQuantizedTransposed AVX2", "") {
if (kCPU < CPUType::AVX512BW)
return;
- CHECK(TestMany<AVX512_8bit>());
- CHECK(TestMany<AVX512_16bit>());
+ CHECK(TestMany<AVX512_8bit>(32, 128));
+ CHECK(TestMany<AVX512_16bit>(32, 128));
}
#endif
diff --git a/test/prepare_b_transposed.cc b/test/prepare_b_transposed.cc
index 45730a7..219e56a 100644
--- a/test/prepare_b_transposed.cc
+++ b/test/prepare_b_transposed.cc
@@ -13,28 +13,28 @@ namespace intgemm {
namespace {
template <typename Backend>
-void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index rows, Index cols) {
+void PrepareBTransposedRef(const float* input, typename Backend::Integer* output, float quant_mult, Index B_transposed_cols, Index B_transposed_rows) {
using vec_t = intgemm::vector_t<Backend::kUses, typename Backend::Integer>;
constexpr Index vec_len = sizeof(vec_t) / sizeof(typename Backend::Integer);
- for (Index i = 0; i < rows * cols / 8; i += vec_len)
+ for (Index i = 0; i < B_transposed_rows * B_transposed_cols / 8; i += vec_len)
for (Index j = 0; j < 8; ++j)
for (Index k = 0; k < vec_len; ++k) {
- Index col = (i + k) % cols;
- Index row = 8 * ((i + k) / cols) + j;
- *output++ = input[row * cols + col] * quant_mult;
+ Index col = (i + k) % B_transposed_cols;
+ Index row = 8 * ((i + k) / B_transposed_cols) + j;
+ *output++ = input[row * B_transposed_cols + col] * quant_mult;
}
}
template <typename Backend>
-bool Test(const AlignedVector<float>& input, Index rows, Index cols, float quant_mult) {
+bool Test(const AlignedVector<float>& input, Index B_rows, Index B_cols, float quant_mult) {
bool success = true;
AlignedVector<typename Backend::Integer> output(input.size());
- Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, cols, rows);
+ Backend::PrepareBTransposed(input.begin(), output.begin(), quant_mult, B_rows, B_cols);
AlignedVector<typename Backend::Integer> reference(input.size());
- PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, rows, cols);
+ PrepareBTransposedRef<Backend>(input.begin(), reference.begin(), quant_mult, B_rows, B_cols);
for (std::size_t i = 0; i < output.size(); ++i) {
if (output[i] != reference[i]) {
@@ -47,9 +47,8 @@ bool Test(const AlignedVector<float>& input, Index rows, Index cols, float quant
}
template <typename Backend>
-bool TestMany(Index rows, Index cols) {
- AlignedVector<float> input(rows * cols);
- const float quant_mult = 2.f;
+bool TestMany(Index B_rows, Index B_cols, float quant_mult) {
+ AlignedVector<float> input(B_rows * B_cols);
std::generate(input.begin(), input.end(), []() {
static constexpr int divider = sizeof(intgemm::vector_t<Backend::kUses, typename Backend::Integer>) / sizeof(typename Backend::Integer);
@@ -57,29 +56,29 @@ bool TestMany(Index rows, Index cols) {
return (value++) % divider;
});
- return Test<Backend>(input, rows, cols, quant_mult);
+ return Test<Backend>(input, B_rows, B_cols, quant_mult);
}
TEST_CASE("PrepareBTransposed SSE2", "") {
if (kCPU < CPUType::SSE2)
return;
- CHECK(TestMany<SSE2_16bit>(128, 4));
+ CHECK(TestMany<SSE2_16bit>(4, 128, 2.0f));
}
TEST_CASE("PrepareBTransposed SSSE3", "") {
if (kCPU < CPUType::SSSE3)
return;
- CHECK(TestMany<SSSE3_8bit>(128, 4));
+ CHECK(TestMany<SSSE3_8bit>(4, 128, 2.0f));
}
TEST_CASE("PrepareBTransposed AVX2", "") {
if (kCPU < CPUType::AVX2)
return;
- CHECK(TestMany<AVX2_8bit>(128, 8));
- CHECK(TestMany<AVX2_16bit>(128, 8));
+ CHECK(TestMany<AVX2_8bit>(8, 128, 2.0f));
+ CHECK(TestMany<AVX2_16bit>(8, 128, 2.0f));
}
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
@@ -87,8 +86,8 @@ TEST_CASE("PrepareBTransposed AVX2", "") {
if (kCPU < CPUType::AVX512BW)
return;
- CHECK(TestMany<AVX512_8bit>(128, 16));
- CHECK(TestMany<AVX512_16bit>(128, 16));
+ CHECK(TestMany<AVX512_8bit>(16, 128, 2.0f));
+ CHECK(TestMany<AVX512_16bit>(16, 128, 2.0f));
}
#endif