From a72b13b72d04f0863decd46c5b9cdca24d962de3 Mon Sep 17 00:00:00 2001 From: Mateusz Chudyk Date: Thu, 6 Feb 2020 18:32:34 +0000 Subject: Straighten functions producing test references values --- test/add127_test.cc | 33 ++++++++++++++----- test/multiply_test.cc | 48 +++++++++------------------ test/test.cc | 56 ++------------------------------ test/test.h | 89 +++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 122 insertions(+), 104 deletions(-) (limited to 'test') diff --git a/test/add127_test.cc b/test/add127_test.cc index d1b850d..ae5c08a 100644 --- a/test/add127_test.cc +++ b/test/add127_test.cc @@ -81,7 +81,9 @@ template void TestPrepareBias(Index rows, Index cols) { //Routine::Multiply(A_prep2.begin(), B_prep.begin(), A_rows, rows, cols, callbacks::UnquantizeAndAddBiasAndWrite(unquant_mult_forprep, goldBias.begin(), goldBias.begin())); //CompareBiases(goldBias.begin(), inputBias.begin(), cols); AlignedVector slowint_C(cols); - SlowRefInt(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult_forprep, A_rows, rows, cols, goldBias.begin()); + references::Multiply(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), A_rows, rows, cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult_forprep + goldBias[info.col_idx]; + }); CompareBiases(slowint_C.begin(), inputBias.begin(), cols); } @@ -127,10 +129,14 @@ template void TestMultiplyBiasNew(Index A_rows, Index width, Ind // Taking the original A_preparation which means A would be int8_t AlignedVector A_prep2(A.size()); Routine::PrepareA(A.begin(), A_prep2.begin(), quant_mult, A_rows, width); - SlowRefInt(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin()); + references::Multiply(A_prep2.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult + bias[info.col_idx]; + }); AlignedVector float_C(test_C.size()); - SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin()); + references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) { + return sum + bias[info.col_idx]; + }); /*ACTUAL MULTIPLICATION * @@ -185,7 +191,10 @@ template void TestMultiplyShiftNonShift(Index A_rows, Index widt Routine::Multiply(A_prep_old.begin(), B_prep.begin(), A_rows, width, B_cols, callbacks::UnquantizeAndAddBiasAndWrite(unquant_mult, bias.begin(), slowint_C.begin())); AlignedVector float_C(test_C.size()); - SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin()); + references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) { + return sum + bias[info.col_idx]; + }); + /* * Multiply8 shift multiplication */ @@ -238,10 +247,14 @@ template void TestMultiplyShiftInt(Index A_rows, Index width, In Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size()); AlignedVector slowint_C(test_C.size()); // Taking the original A_preparation which means A would be int8_t - //SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin()); + // references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + // return sum * unquant_mult + bias[info.col_idx]; + // }); AlignedVector float_C(test_C.size()); - SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin()); + references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) { + return sum + bias[info.col_idx]; + }); /* * Multiply8 shift multiplication */ @@ -252,7 +265,9 @@ template void TestMultiplyShiftInt(Index A_rows, Index width, In } AlignedVector ShiftedBias(B_cols); float unquant_mult_forprep = (-1)*(alpha)*(alpha)/(127.0f); //Minus one to invert add_ps later on - SlowRefInt(A_prep2.begin(), B_quant.begin(), ShiftedBias.begin(), unquant_mult_forprep, 1, width, B_cols, bias.begin()); + references::Multiply(A_prep2.begin(), B_quant.begin(), ShiftedBias.begin(), 1, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult_forprep + bias[info.col_idx]; + }); //Now prepare Fast integer Bias @@ -261,7 +276,9 @@ template void TestMultiplyShiftInt(Index A_rows, Index width, In // Reference INT VERSION HERE with ADD127 // Taking the original A_preparation which means A would be int8_t - SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, ShiftedBias.begin()); + references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult + ShiftedBias[info.col_idx]; + }); Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(), int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance); diff --git a/test/multiply_test.cc b/test/multiply_test.cc index c972489..725fbca 100644 --- a/test/multiply_test.cc +++ b/test/multiply_test.cc @@ -18,32 +18,6 @@ namespace intgemm { -// Rearrange a tile of simd x unroll entries. -template void SlowRearrangeTile(const V *from, V *to, int simd, int unroll, Index cols) { - for (int i = 0; i < unroll; ++i) { - for (int j = 0; j < simd; ++j) { - to[simd * i + j] = from[cols * j + i]; - } - } -} - -template void SlowRearrange(const V *from, V *to, int simd, int unroll, Index rows, Index cols) { - for (Index c = 0; c < cols; c += unroll) { - for (Index r = 0; r < rows; r += simd) { - SlowRearrangeTile(from + cols * r + c, to, simd, unroll, cols); - to += unroll * simd; - } - } -} - -template void SlowTranspose(const V *from, V *to, Index rows, Index cols) { - for (Index r = 0; r < rows; ++r) { - for (Index c = 0; c < cols; ++c) { - to[rows * c + r] = from[cols * r + c]; - } - } -} - INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") { if (kCPU < CPUType::SSE2) return; const unsigned N = 8; @@ -51,7 +25,7 @@ INTGEMM_SSE2 TEST_CASE("Transpose 16", "[transpose]") { std::iota(input.begin(), input.end(), 0); AlignedVector ref(N * N); - SlowTranspose(input.begin(), ref.begin(), N, N); + references::Transpose(input.begin(), ref.begin(), N, N); // Overwrite input. __m128i *t = input.as<__m128i>(); @@ -69,7 +43,7 @@ INTGEMM_SSSE3 TEST_CASE("Transpose 8", "[transpose]") { std::iota(input.begin(), input.end(), 0); AlignedVector ref(input.size()); - SlowTranspose(input.begin(), ref.begin(), N, N); + references::Transpose(input.begin(), ref.begin(), N, N); // Overwrite input. __m128i *t = input.as<__m128i>(); @@ -111,7 +85,7 @@ template void TestPrepare(Index rows = 32, Index cols = 16) { Routine::Quantize(input.begin(), quantized.begin(), 1, input.size()); AlignedVector reference(input.size()); // Note this won't work for Int8/Int16 generic routines because tile sizes vary. - SlowRearrange(quantized.begin(), reference.begin(), Routine::kBTileRow, Routine::kBTileCol, rows, cols); + references::Rearragement(quantized.begin(), reference.begin(), Routine::kBTileRow, Routine::kBTileCol, rows, cols); CHECK_MESSAGE(memcmp(reference.begin(), test.begin(), test.size() * sizeof(Integer)) == 0, Routine::kName << " Mismatch:\n" << "Quantized Input" << '\n' << PrintMatrix(quantized.begin(), rows, cols) << "Reference" << '\n' << PrintMatrix(reference.begin(), rows, cols) << "Routine" << '\n' << PrintMatrix(test.begin(), rows, cols)); @@ -323,10 +297,14 @@ template void TestMultiply(Index A_rows, Index width, Index B_co Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size()); AlignedVector slowint_C(test_C.size()); // Assuming A is just quantization here. - SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols); + references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult; + }); AlignedVector float_C(test_C.size()); - SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols); + references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) { + return sum; + }); Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(), int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance); @@ -372,10 +350,14 @@ template void TestMultiplyBias(Index A_rows, Index width, Index Routine::Quantize(B.begin(), B_quant.begin(), quant_mult, B.size()); AlignedVector slowint_C(test_C.size()); // Assuming A is just quantization here. - SlowRefInt(A_prep.begin(), B_quant.begin(), slowint_C.begin(), unquant_mult, A_rows, width, B_cols, bias.begin()); + references::Multiply(A_prep.begin(), B_quant.begin(), slowint_C.begin(), A_rows, width, B_cols, [&](int32_t sum, const callbacks::OutputBufferInfo& info) { + return sum * unquant_mult + bias[info.col_idx]; + }); AlignedVector float_C(test_C.size()); - SlowRefFloat(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, bias.begin()); + references::MultiplyFF(A.begin(), B.begin(), float_C.begin(), A_rows, width, B_cols, [&](float sum, const callbacks::OutputBufferInfo& info) { + return sum + bias[info.col_idx]; + }); Compare(float_C.begin(), slowint_C.begin(), test_C.begin(), test_C.size(), info.str(), int_tolerance, float_tolerance, MSE_float_tolerance, MSE_int_tolerance); diff --git a/test/test.cc b/test/test.cc index 2986d82..62137a1 100644 --- a/test/test.cc +++ b/test/test.cc @@ -7,60 +7,8 @@ int main(int argc, char ** argv) { namespace intgemm { -void SlowRefFloat(const float *A, const float *B, float *C, Index A_rows, Index width, Index B_cols, const float *bias) { - for (Index r = 0; r < A_rows; ++r) { - for (Index c = 0; c < B_cols; ++c) { - float sum = 0.0f; - for (Index k = 0; k < width; ++k) { - sum += A[r * width + k] * B[k * B_cols + c]; - } - if (bias) { - C[r * B_cols + c] = sum + bias[c]; - } else { - C[r * B_cols + c] = sum; - } - } - } -} - -// Compute A*B slowly from integers. -template void SlowRefInt(const Integer *A, const Integer *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias) { - for (Index r = 0; r < A_rows; ++r) { - for (Index c = 0; c < B_cols; ++c) { - int32_t sum = 0; - for (Index k = 0; k < width; ++k) { - sum += static_cast(A[r * width + k]) * static_cast(B[k * B_cols + c]); - } - if (bias) { - C[r * B_cols + c] = sum * unquant_mult + bias[c]; - } else { - C[r * B_cols + c] = sum * unquant_mult; - } - } - } -} -void SlowRefInt(const uint8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias) { - for (Index r = 0; r < A_rows; ++r) { - for (Index c = 0; c < B_cols; ++c) { - int32_t sum = 0; - for (Index k = 0; k < width; ++k) { - sum += static_cast(A[r * width + k]) * static_cast(B[k * B_cols + c]); - } - if (bias) { - C[r * B_cols + c] = sum * unquant_mult + bias[c]; - } else { - C[r * B_cols + c] = sum * unquant_mult; - } - } - } -} - -template void SlowRefInt(const int8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias); -template void SlowRefInt(const int16_t *A, const int16_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias); -template void SlowRefInt(const int32_t *A, const int32_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias); - void Compare(const float *float_ref, const float *int_ref, const float *int_test, std::size_t size, std::string test_info, - float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance) { + float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance) { float int_sum = 0.0, float_sum = 0.0; for (std::size_t i = 0; i < size; ++i) { float int_diff = int_ref[i] - int_test[i]; @@ -74,4 +22,4 @@ void Compare(const float *float_ref, const float *int_ref, const float *int_test CHECK_MESSAGE(fabs(sqrt(int_sum / size)) <= MSE_int_tolerance, test_info << "Int MSE = " << sqrt(int_sum / size)); } -} //namespace intgemm +} // namespace intgemm diff --git a/test/test.h b/test/test.h index 291ff45..7c294f8 100644 --- a/test/test.h +++ b/test/test.h @@ -1,11 +1,15 @@ #pragma once +#include "intgemm_config.h" + #include "../3rd_party/catch.hpp" -#include #include "../intgemm.h" #include "../aligned.h" -#include "intgemm_config.h" +#include +#include +#include +#include #define CHECK_MESSAGE(cond, msg) do { INFO(msg); CHECK(cond); } while(0) #define CHECK_FALSE_MESSAGE(cond, msg) do { INFO(msg); CHECK_FALSE(cond); } while(0) @@ -21,13 +25,80 @@ #define KERNEL_TEST_CASE(name) TEST_CASE("Kernel: " name, "[kernel_test]") namespace intgemm { -void SlowRefFloat(const float *A, const float *B, float *C, Index A_rows, Index width, Index B_cols, const float *bias=nullptr); -// Compute A*B slowly from integers. -template void SlowRefInt(const Integer *A, const Integer *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias=nullptr); -void SlowRefInt(const uint8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols, const float *bias=nullptr); +void Compare(const float *float_ref, const float *int_ref, const float *int_test, + std::size_t size, std::string test_info, float int_tolerance, + float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance); + +/* + * References + */ +namespace references { + +// Quantize +template +void Quantize(const float* input, Type* output, float quant_mult, Index size) { + for (Index i = 0; i < size; ++i) { + float value = roundf(input[i] * quant_mult); + value = std::max(std::numeric_limits::min(), value); + value = std::min(std::numeric_limits::max(), value); + output[i] = value; + } +} + +// Multiply A(float) x B(float) +template +void MultiplyFF(const float* A, const float* B, float* C, Index A_rows, Index width, Index B_cols, LambdaCallback callback) { + for (Index r = 0; r < A_rows; ++r) { + for (Index c = 0; c < B_cols; ++c) { + float sum = 0.0f; + for (Index k = 0; k < width; ++k) { + sum += A[r * width + k] * B[k * B_cols + c]; + } + C[r * B_cols + c] = callback(sum, {r, c, A_rows, B_cols}); + } + } +} + +// Multiply A(int) x B(int) +template ::value>::type* = nullptr, + typename std::enable_if::value>::type* = nullptr> +void Multiply(const TypeA* A, const TypeB* B, float* C, Index A_rows, Index width, Index B_cols, LambdaCallback callback) { + for (Index r = 0; r < A_rows; ++r) { + for (Index c = 0; c < B_cols; ++c) { + int32_t sum = 0; + for (Index k = 0; k < width; ++k) { + sum += int32_t(A[r * width + k]) * int32_t(B[k * B_cols + c]); + } + C[r * B_cols + c] = callback(sum, {r, c, A_rows, B_cols}); + } + } +} + +// Matrix rearragement +template +void Rearragement(const Type* input, Type* output, int simd, int unroll, Index rows, Index cols) { + for (Index c = 0; c < cols; c += unroll) { + for (Index r = 0; r < rows; r += simd) { + for (Index i = 0; i < unroll; ++i) + for (Index j = 0; j < simd; ++j) + output[simd * i + j] = input[cols * r + c + cols * j + i]; + + output += unroll * simd; + } + } +} -void Compare(const float *float_ref, const float *int_ref, const float *int_test, std::size_t size, std::string test_info, - float int_tolerance, float float_tolerance, float MSE_float_tolerance, float MSE_int_tolerance); +// Transpose +template +void Transpose(const Type* input, Type* output, Index rows, Index cols) { + for (Index r = 0; r < rows; ++r) { + for (Index c = 0; c < cols; ++c) { + output[rows * c + r] = input[cols * r + c]; + } + } +} -} //namespace intgemm +} // namespace references +} // namespace intgemm -- cgit v1.2.3