Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2020-02-06 21:32:34 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2020-02-06 22:39:22 +0300
commita72b13b72d04f0863decd46c5b9cdca24d962de3 (patch)
tree4065cac99a524c1d846f9602b5e25f4a832fddeb /test
parentebace9a4089535493afbaa0ec6ec7d8c366b66b3 (diff)
Straighten functions producing test references values
Diffstat (limited to 'test')
-rw-r--r--test/add127_test.cc33
-rw-r--r--test/multiply_test.cc48
-rw-r--r--test/test.cc56
-rw-r--r--test/test.h89
4 files changed, 122 insertions, 104 deletions
diff --git a/test/add127_test.cc b/test/add127_test.cc
index d1b850d..ae5c08a 100644
--- a/test/add127_test.cc
+++ b/test/add127_test.cc
@@ -81,7 +81,9 @@ template <class Routine> void TestPrepareBias(Index rows, Index cols) {
//Routine::Multiply(A_prep2.begin(), B_prep.begin(), A_rows, rows, cols, callbacks::UnquantizeAndAddBiasAndWrite(unquant_mult_forprep, goldBias.begin(), goldBias.begin()));
//CompareBiases(goldBias.begin(), inputBias.begin(), cols);
AlignedVector<float> slowint_C(cols);
- SlowRefInt(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult_forprep, A_rows, rows, cols, goldBias.begin());
+ references::Multiply(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), A_rows, rows, cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult_forprep + goldBias[info.col_idx];
+ });
CompareBiases(slowint_C.begin(), inputBias.begin(), cols);
}
@@ -127,10 +129,14 @@ template <class Routine> void TestMultiplyBiasNew(Index A_rows, Index width, Ind
// Taking the original A_preparation which means A would be int8_t
AlignedVector<int8_t> A_prep2(A.size());
Routine::PrepareA(A.begin(), A_prep2.begin(), quant_mult, A_rows, width);
- SlowRefInt(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin());
+ references::Multiply(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult + bias[info.col_idx];
+ });
AlignedVector<float> float_C(test_C.size());
- SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin());
+ references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) {
+ return sum + bias[info.col_idx];
+ });
/*ACTUAL MULTIPLICATION
*
@@ -185,7 +191,10 @@ template <class Routine> void TestMultiplyShiftNonShift(Index A_rows, Index widt
Routine::Multiply(A_prep_old.begin(), B_prep.begin(), A_rows, width, B_cols, callbacks::UnquantizeAndAddBiasAndWrite(unquant_mult, bias.begin(), slowint_C.begin()));
AlignedVector<float> float_C(test_C.size());
- SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin());
+ references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) {
+ return sum + bias[info.col_idx];
+ });
+
/*
* Multiply8 shift multiplication
*/
@@ -238,10 +247,14 @@ template <class Routine> void TestMultiplyShiftInt(Index A_rows, Index width, In
Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size());
AlignedVector<float> slowint_C(test_C.size());
// Taking the original A_preparation which means A would be int8_t
- //SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin());
+ // references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ // return sum * unquant_mult + bias[info.col_idx];
+ // });
AlignedVector<float> float_C(test_C.size());
- SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin());
+ references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) {
+ return sum + bias[info.col_idx];
+ });
/*
* Multiply8 shift multiplication
*/
@@ -252,7 +265,9 @@ template <class Routine> void TestMultiplyShiftInt(Index A_rows, Index width, In
}
AlignedVector<float> ShiftedBias(B_cols);
float unquant_mult_forprep = (-1)*(alpha)*(alpha)/(127.0f); //Minus one to invert add_ps later on
- SlowRefInt(A_prep2.begin(), B_quant.begin(), ShiftedBias.begin(), unquant_mult_forprep, 1, width, B_cols, bias.begin());
+ references::Multiply(A_prep2.begin(), B_quant.begin(), ShiftedBias.begin(), 1, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult_forprep + bias[info.col_idx];
+ });
//Now prepare Fast integer Bias
@@ -261,7 +276,9 @@ template <class Routine> void TestMultiplyShiftInt(Index A_rows, Index width, In
// Reference INT VERSION HERE with ADD127
// Taking the original A_preparation which means A would be int8_t
- SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, ShiftedBias.begin());
+ references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult + ShiftedBias[info.col_idx];
+ });
Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(),
int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance);
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index c972489..725fbca 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -18,32 +18,6 @@
namespace intgemm {
-// Rearrange a tile of simd x unroll entries.
-template <class V> void SlowRearrangeTile(const V *from, V *to, int simd, int unroll, Index cols) {
- for (int i = 0; i < unroll; ++i) {
- for (int j = 0; j < simd; ++j) {
- to[simd * i + j] = from[cols * j + i];
- }
- }
-}
-
-template <class V> void SlowRearrange(const V *from, V *to, int simd, int unroll, Index rows, Index cols) {
- for (Index c = 0; c < cols; c += unroll) {
- for (Index r = 0; r < rows; r += simd) {
- SlowRearrangeTile(from + cols * r + c, to, simd, unroll, cols);
- to += unroll * simd;
- }
- }
-}
-
-template <class V> void SlowTranspose(const V *from, V *to, Index rows, Index cols) {
- for (Index r = 0; r < rows; ++r) {
- for (Index c = 0; c < cols; ++c) {
- to[rows * c + r] = from[cols * r + c];
- }
- }
-}
-
INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") {
if (kCPU < CPUType::SSE2) return;
const unsigned N = 8;
@@ -51,7 +25,7 @@ INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") {
std::iota(input.begin(), input.end(), 0);
AlignedVector<int16_t> ref(N * N);
- SlowTranspose(input.begin(), ref.begin(), N, N);
+ references::Transpose(input.begin(), ref.begin(), N, N);
// Overwrite input.
__m128i *t = input.as<__m128i>();
@@ -69,7 +43,7 @@ INTGEMM_SSSE3 TEST_CASE("Transpose 8", "[transpose]") {
std::iota(input.begin(), input.end(), 0);
AlignedVector<int8_t> ref(input.size());
- SlowTranspose(input.begin(), ref.begin(), N, N);
+ references::Transpose(input.begin(), ref.begin(), N, N);
// Overwrite input.
__m128i *t = input.as<__m128i>();
@@ -111,7 +85,7 @@ template <class Routine> void TestPrepare(Index rows = 32, Index cols = 16) {
Routine::Quantize(input.begin(), quantized.begin(), 1, input.size());
AlignedVector<Integer> reference(input.size());
// Note this won't work for Int8/Int16 generic routines because tile sizes vary.
- SlowRearrange<Integer>(quantized.begin(), reference.begin(), Routine::kBTileRow, Routine::kBTileCol, rows, cols);
+ references::Rearragement(quantized.begin(), reference.begin(), Routine::kBTileRow, Routine::kBTileCol, rows, cols);
CHECK_MESSAGE(memcmp(reference.begin(), test.begin(), test.size() * sizeof(Integer)) == 0, Routine::kName << " Mismatch:\n" <<
"Quantized Input" << '\n' << PrintMatrix(quantized.begin(), rows, cols) << "Reference" << '\n' <<
PrintMatrix(reference.begin(), rows, cols) << "Routine" << '\n' << PrintMatrix(test.begin(), rows, cols));
@@ -323,10 +297,14 @@ template <class Routine> void TestMultiply(Index A_rows, Index width, Index B_co
Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size());
AlignedVector<float> slowint_C(test_C.size());
// Assuming A is just quantization here.
- SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols);
+ references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult;
+ });
AlignedVector<float> float_C(test_C.size());
- SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols);
+ references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) {
+ return sum;
+ });
Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(),
int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance);
@@ -372,10 +350,14 @@ template <class Routine> void TestMultiplyBias(Index A_rows, Index width, Index
Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size());
AlignedVector<float> slowint_C(test_C.size());
// Assuming A is just quantization here.
- SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin());
+ references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) {
+ return sum * unquant_mult + bias[info.col_idx];
+ });
AlignedVector<float> float_C(test_C.size());
- SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin());
+ references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) {
+ return sum + bias[info.col_idx];
+ });
Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(),
int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance);
diff --git a/test/test.cc b/test/test.cc
index 2986d82..62137a1 100644
--- a/test/test.cc
+++ b/test/test.cc
@@ -7,60 +7,8 @@ int main(int argc, char ** argv) {
namespace intgemm {
-void SlowRefFloat(const float *A, const float *B, float *C, Index A_rows, Index width, Index B_cols, const float *bias) {
- for (Index r = 0; r < A_rows; ++r) {
- for (Index c = 0; c < B_cols; ++c) {
- float sum = 0.0f;
- for (Index k = 0; k < width; ++k) {
- sum += A[r * width + k] * B[k * B_cols + c];
- }
- if (bias) {
- C[r * B_cols + c] = sum + bias[c];
- } else {
- C[r * B_cols + c] = sum;
- }
- }
- }
-}
-
-// Compute A*B slowly from integers.
-template <class Integer> void SlowRefInt(const Integer *A, const Integer *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias) {
- for (Index r = 0; r < A_rows; ++r) {
- for (Index c = 0; c < B_cols; ++c) {
- int32_t sum = 0;
- for (Index k = 0; k < width; ++k) {
- sum += static_cast<int16_t>(A[r * width + k]) * static_cast<int16_t>(B[k * B_cols + c]);
- }
- if (bias) {
- C[r * B_cols + c] = sum * unquant_mult + bias[c];
- } else {
- C[r * B_cols + c] = sum * unquant_mult;
- }
- }
- }
-}
-void SlowRefInt(const uint8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias) {
- for (Index r = 0; r < A_rows; ++r) {
- for (Index c = 0; c < B_cols; ++c) {
- int32_t sum = 0;
- for (Index k = 0; k < width; ++k) {
- sum += static_cast<int16_t>(A[r * width + k]) * static_cast<int16_t>(B[k * B_cols + c]);
- }
- if (bias) {
- C[r * B_cols + c] = sum * unquant_mult + bias[c];
- } else {
- C[r * B_cols + c] = sum * unquant_mult;
- }
- }
- }
-}
-
-template void SlowRefInt<int8_t>(const int8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias);
-template void SlowRefInt<int16_t>(const int16_t *A, const int16_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias);
-template void SlowRefInt<int32_t>(const int32_t *A, const int32_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias);
-
void Compare(const float *float_ref, const float *int_ref, const float *int_test, std::size_t size, std::string test_info,
- float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance) {
+ float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance) {
float int_sum = 0.0, float_sum = 0.0;
for (std::size_t i = 0; i < size; ++i) {
float int_diff = int_ref[i] - int_test[i];
@@ -74,4 +22,4 @@ void Compare(const float *float_ref, const float *int_ref, const float *int_test
CHECK_MESSAGE(fabs(sqrt(int_sum / size)) <= MSE_int_tolerance, test_info << "Int MSE = " << sqrt(int_sum / size));
}
-} //namespace intgemm
+} // namespace intgemm
diff --git a/test/test.h b/test/test.h
index 291ff45..7c294f8 100644
--- a/test/test.h
+++ b/test/test.h
@@ -1,11 +1,15 @@
#pragma once
+#include "intgemm_config.h"
+
#include "../3rd_party/catch.hpp"
-#include <sstream>
#include "../intgemm.h"
#include "../aligned.h"
-#include "intgemm_config.h"
+#include <math.h>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
#define CHECK_MESSAGE(cond, msg) do { INFO(msg); CHECK(cond); } while(0)
#define CHECK_FALSE_MESSAGE(cond, msg) do { INFO(msg); CHECK_FALSE(cond); } while(0)
@@ -21,13 +25,80 @@
#define KERNEL_TEST_CASE(name) TEST_CASE("Kernel: " name, "[kernel_test]")
namespace intgemm {
-void SlowRefFloat(const float *A, const float *B, float *C, Index A_rows, Index width, Index B_cols, const float *bias=nullptr);
-// Compute A*B slowly from integers.
-template <class Integer> void SlowRefInt(const Integer *A, const Integer *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias=nullptr);
-void SlowRefInt(const uint8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias=nullptr);
+void Compare(const float *float_ref, const float *int_ref, const float *int_test,
+ std::size_t size, std::string test_info, float int_tolerance,
+ float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance);
+
+/*
+ * References
+ */
+namespace references {
+
+// Quantize
+template <typename Type>
+void Quantize(const float* input, Type* output, float quant_mult, Index size) {
+ for (Index i = 0; i < size; ++i) {
+ float value = roundf(input[i] * quant_mult);
+ value = std::max<float>(std::numeric_limits<Type>::min(), value);
+ value = std::min<float>(std::numeric_limits<Type>::max(), value);
+ output[i] = value;
+ }
+}
+
+// Multiply A(float) x B(float)
+template <typename LambdaCallback>
+void MultiplyFF(const float* A, const float* B, float* C, Index A_rows, Index width, Index B_cols, LambdaCallback callback) {
+ for (Index r = 0; r < A_rows; ++r) {
+ for (Index c = 0; c < B_cols; ++c) {
+ float sum = 0.0f;
+ for (Index k = 0; k < width; ++k) {
+ sum += A[r * width + k] * B[k * B_cols + c];
+ }
+ C[r * B_cols + c] = callback(sum, {r, c, A_rows, B_cols});
+ }
+ }
+}
+
+// Multiply A(int) x B(int)
+template <typename TypeA, typename TypeB, typename LambdaCallback,
+ typename std::enable_if<std::is_integral<TypeA>::value>::type* = nullptr,
+ typename std::enable_if<std::is_integral<TypeB>::value>::type* = nullptr>
+void Multiply(const TypeA* A, const TypeB* B, float* C, Index A_rows, Index width, Index B_cols, LambdaCallback callback) {
+ for (Index r = 0; r < A_rows; ++r) {
+ for (Index c = 0; c < B_cols; ++c) {
+ int32_t sum = 0;
+ for (Index k = 0; k < width; ++k) {
+ sum += int32_t(A[r * width + k]) * int32_t(B[k * B_cols + c]);
+ }
+ C[r * B_cols + c] = callback(sum, {r, c, A_rows, B_cols});
+ }
+ }
+}
+
+// Matrix rearragement
+template <typename Type>
+void Rearragement(const Type* input, Type* output, int simd, int unroll, Index rows, Index cols) {
+ for (Index c = 0; c < cols; c += unroll) {
+ for (Index r = 0; r < rows; r += simd) {
+ for (Index i = 0; i < unroll; ++i)
+ for (Index j = 0; j < simd; ++j)
+ output[simd * i + j] = input[cols * r + c + cols * j + i];
+
+ output += unroll * simd;
+ }
+ }
+}
-void Compare(const float *float_ref, const float *int_ref, const float *int_test, std::size_t size, std::string test_info,
- float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance);
+// Transpose
+template <typename Type>
+void Transpose(const Type* input, Type* output, Index rows, Index cols) {
+ for (Index r = 0; r < rows; ++r) {
+ for (Index c = 0; c < cols; ++c) {
+ output[rows * c + r] = input[cols * r + c];
+ }
+ }
+}
-} //namespace intgemm
+} // namespace references
+} // namespace intgemm