diff options
Diffstat (limited to 'test/I8DepthwiseTest.cc')
-rw-r--r-- | test/I8DepthwiseTest.cc | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/test/I8DepthwiseTest.cc b/test/I8DepthwiseTest.cc new file mode 100644 index 0000000..cfde880 --- /dev/null +++ b/test/I8DepthwiseTest.cc @@ -0,0 +1,448 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include "I8DepthwiseTest.h" + +#include <cmath> +#include <cstdio> + +#include <gtest/gtest.h> + +#include "bench/AlignedVec.h" +#include "src/FbgemmI8Depthwise.h" +#include "src/RefImplementations.h" +#include "TestUtils.h" +#include "bench/BenchUtils.h" + +using namespace std; + +namespace fbgemm2 +{ + +// From Xray OCR +static vector<vector<int>> shapes = { + // N, K, H_in, W_in, stride + { 1, 272, 47, 125, 1, }, +// { 1, 272, 64, 125, 1, }, +// { 1, 272, 66, 125, 1, }, +// { 1, 272, 67, 100, 1, }, +// { 1, 272, 75, 75, 1, }, + { 1, 272, 75, 76, 1, }, +// { 1, 272, 75, 100, 1, }, +// { 1, 272, 94, 75, 1, }, +// { 1, 272, 109, 75, 1, }, + { 1, 544, 24, 63, 1, }, +// { 1, 544, 33, 63, 1, }, +// { 1, 544, 34, 50, 1, }, +// { 1, 544, 36, 63, 1, }, +// { 1, 544, 38, 38, 1, }, +// { 1, 544, 38, 40, 1, }, + { 1, 544, 47, 38, 1, }, + { 1, 1088, 7, 7, 1, }, + { 51, 1088, 7, 7, 1, }, +// { 100, 1088, 7, 7, 1, }, + + { 1, 248, 93, 250, 2, }, +// { 1, 248, 128, 250, 2, }, +// { 1, 248, 133, 200, 2, }, +// { 1, 248, 150, 150, 2, }, + { 1, 248, 150, 151, 2, }, +// { 1, 248, 150, 158, 2, }, +// { 1, 248, 188, 150, 2, }, +// { 1, 248, 225, 150, 2, }, + { 1, 272, 47, 125, 2, }, +// { 1, 272, 64, 125, 2, }, +// { 1, 272, 66, 125, 2, }, +// { 1, 272, 67, 100, 2, }, +// { 1, 272, 75, 75, 2, }, +// { 1, 272, 75, 76, 2, }, + { 1, 272, 94, 75, 2, }, + { 1, 544, 14, 14, 2, }, + { 51, 544, 14, 14, 2, }, +// { 100, 544, 14, 14, 2, }, + + { 1, 8, 4, 4, 1, }, +}; + +TEST(FBGemmDepthWiseTest, Test3x3) { + for (auto shape : shapes) { + int N = shape[0]; + int K = shape[1]; + int H = shape[2]; + int W = shape[3]; + int stride_h = shape[4]; + int stride_w = stride_h; + constexpr int R = 3, S = 3; + constexpr int PAD_T = 1, PAD_B = 1, PAD_L = 1, PAD_R = 1; + int H_OUT = (H + PAD_T + PAD_B - R) / stride_h + 1; + int W_OUT = (W + PAD_L + PAD_R - S) / stride_w + 1; + + aligned_vector<uint8_t> A(N * H * W * K); + aligned_vector<int8_t> B(K * R * S); + aligned_vector<int32_t> C_ref(N * H_OUT * W_OUT * K), C(C_ref.size()); + + randFill(A, 0, 86); + int32_t A_zero_point = 43; + + randFill(B, -16, 16); + int32_t B_zero_point = 5; + + depthwise_3x3_pad_1_ref( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B.data(), + C_ref.data()); + + int32_t minimum = *min_element(C_ref.begin(), C_ref.end()); + int32_t maximum = *max_element(C_ref.begin(), C_ref.end()); + + float C_multiplier = 255. / (maximum - minimum); + + aligned_vector<int32_t> col_offsets(K); + aligned_vector<int32_t> bias(K); + randFill(col_offsets, -100, 100); + randFill(bias, -40, 40); + int32_t C_zero_point = 5; + + aligned_vector<uint8_t> C_uint8_ref(C_ref.size()), C_uint8(C_ref.size()); + depthwise_3x3_pad_1_ref( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B_zero_point, + B.data(), + C_multiplier, + C_zero_point, + C_uint8_ref.data(), + col_offsets.data(), + bias.data()); + + Packed3x3ConvMatrix Bp(K, B.data()); + + depthwise_3x3_pad_1( + N, H, W, K, stride_h, stride_w, A_zero_point, A.data(), Bp, C.data()); + + // correctness check + for (int n = 0; n < N; ++n) { + for (int h = 0; h < H_OUT; ++h) { + for (int w = 0; w < W_OUT; ++w) { + for (int k = 0; k < K; ++k) { + int32_t expected = C_ref[((n * H_OUT + h) * W_OUT + w) * K + k]; + int32_t actual = C[((n * H_OUT + h) * W_OUT + w) * K + k]; + EXPECT_EQ(expected, actual) << + "Depthwise 3x3 results differ at (" << n << ", " << + h << ", " << w << ", " << k << ")."; + } + } + } + } + + depthwise_3x3_pad_1( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B_zero_point, + Bp, + C_multiplier, + C_zero_point, + C_uint8.data(), + col_offsets.data(), + bias.data(), + 0, + 1); + + // correctness check + for (int n = 0; n < N; ++n) { + for (int h = 0; h < H_OUT; ++h) { + for (int w = 0; w < W_OUT; ++w) { + for (int k = 0; k < K; ++k) { + int32_t expected = + C_uint8_ref[((n * H_OUT + h) * W_OUT + w) * K + k]; + int32_t actual = C_uint8[((n * H_OUT + h) * W_OUT + w) * K + k]; + EXPECT_EQ(expected, actual) << + "Depthwise 3x3 results differ at (" << n << ", " << + h << ", " << w << ", " << k << ")."; + } + } + } + } + } // for each shape +} // Test3x3 + +TEST(FBGemmDepthWiseTest, Test3x3x3) { + for (auto shape : shapes_3d) { + int N = shape[0]; + int K = shape[1]; + int T = shape[2]; + int H = shape[3]; + int W = shape[4]; + int stride_t = shape[5]; + int stride_h = stride_t; + int stride_w = stride_t; + constexpr int K_T = 3, K_H = 3, K_W = 3; + constexpr int PAD_P = 1, PAD_N = 1, PAD_T = 1, PAD_B = 1, PAD_L = 1, + PAD_R = 1; + int T_OUT = (T + PAD_P + PAD_N - K_T) / stride_t + 1; + int H_OUT = (H + PAD_T + PAD_B - K_H) / stride_h + 1; + int W_OUT = (W + PAD_L + PAD_R - K_W) / stride_w + 1; + + aligned_vector<uint8_t> A(N * T * H * W * K); + aligned_vector<int8_t> B(K * K_T * K_H * K_W); + aligned_vector<int32_t> C_ref(N * T_OUT * H_OUT * W_OUT * K), + C(C_ref.size()); + + randFill(A, 0, 86); + int32_t A_zero_point = 43; + + randFill(B, -16, 16); + int32_t B_zero_point = 5; + + depthwise_3x3x3_pad_1_ref( + N, + T, + H, + W, + K, + stride_t, + stride_h, + stride_w, + A_zero_point, + A.data(), + B.data(), + C_ref.data()); + + int32_t minimum = *min_element(C_ref.begin(), C_ref.end()); + int32_t maximum = *max_element(C_ref.begin(), C_ref.end()); + + float C_multiplier = 255. / (maximum - minimum); + + aligned_vector<int32_t> col_offsets(K); + aligned_vector<int32_t> bias(K); + randFill(col_offsets, -100, 100); + randFill(bias, -40, 40); + int32_t C_zero_point = 5; + + aligned_vector<uint8_t> C_uint8_ref(C_ref.size()), C_uint8(C_ref.size()); + depthwise_3x3x3_pad_1_ref( + N, + T, + H, + W, + K, + stride_t, + stride_h, + stride_w, + A_zero_point, + A.data(), + B_zero_point, + B.data(), + C_multiplier, + C_zero_point, + C_uint8_ref.data(), + col_offsets.data(), + bias.data()); + + Packed3x3x3ConvMatrix Bp(K, B.data()); + + depthwise_3x3x3_pad_1( + N, + T, + H, + W, + K, + stride_t, + stride_h, + stride_w, + A_zero_point, + A.data(), + Bp, + C.data()); + + // correctness check + for (int n = 0; n < N; ++n) { + for (int t = 0; t < T_OUT; ++t) { + for (int h = 0; h < H_OUT; ++h) { + for (int w = 0; w < W_OUT; ++w) { + for (int k = 0; k < K; ++k) { + int32_t expected = + C_ref[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k]; + int32_t actual = + C[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k]; + ASSERT_EQ(expected, actual) + << "Depthwise 3x3 results differ at (" << n << ", " << t + << ", " << h << ", " << w << ", " << k << ") " + << shape[0] << " " << shape[1] << " " << shape[2] << " " + << shape[3] << " " << shape[4] << " " << shape[5]; + } + } // w + } // h + } // t + } // n + + depthwise_3x3x3_pad_1( + N, T, H, W, K, stride_t, stride_h, stride_w, A_zero_point, A.data(), + B_zero_point, Bp, C_multiplier, C_zero_point, + C_uint8.data(), col_offsets.data(), bias.data(), + false /* fuse_relu */, 0, 1); + + // correctness check + for (int n = 0; n < N; ++n) { + for (int t = 0; t < T_OUT; ++t) { + for (int h = 0; h < H_OUT; ++h) { + for (int w = 0; w < W_OUT; ++w) { + for (int k = 0; k < K; ++k) { + int32_t expected = C_uint8_ref + [(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k]; + int32_t actual = + C_uint8[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k]; + EXPECT_EQ(expected, actual) + << "Depthwise 3x3 results differ at (" << n << ", " << t + << ", " << h << ", " << w << ", " << k << ")."; + } + } // w + } // h + } // t + } // n + } // for each shape +} // Test3x3x3 + +TEST(FBGemmDepthWiseTest, Test3x3PerChannelQuantization) { + for (auto shape : shapes) { + int N = shape[0]; + int K = shape[1]; + int H = shape[2]; + int W = shape[3]; + int stride_h = shape[4]; + int stride_w = stride_h; + constexpr int R = 3, S = 3; + constexpr int PAD_T = 1, PAD_B = 1, PAD_L = 1, PAD_R = 1; + int H_OUT = (H + PAD_T + PAD_B - R) / stride_h + 1; + int W_OUT = (W + PAD_L + PAD_R - S) / stride_w + 1; + + aligned_vector<uint8_t> A(N * H * W * K); + aligned_vector<int8_t> B(K * R * S); + int32_t C_num_rows = N * H_OUT * W_OUT; + aligned_vector<int32_t> C_ref(C_num_rows * K), C(C_ref.size()); + + randFill(A, 0, 86); + int32_t A_zero_point = 43; + + // Each row of G has a different range to really test per-channel + // quantization. + vector<int32_t> B_zero_point(K); + for (auto k = 0; k < K; ++k) { + aligned_vector<int8_t> Bk(R * S); + randFill(Bk, -16 + k, 16 + k); + copy(Bk.begin(), Bk.end(), B.begin() + k * R * S); + + B_zero_point[k] = 5 + k; + } + + depthwise_3x3_pad_1_ref( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B.data(), + C_ref.data()); + + aligned_vector<int32_t> C_ref_transpose(C_ref); + transpose_matrix(C_ref.data(), C_num_rows, K); + vector<float> C_multiplier(K); + for (auto k = 0; k < K; ++k) { + auto C_ref_k_begin = C_ref_transpose.begin() + k * C_num_rows; + auto C_ref_k_end = C_ref_k_begin + C_num_rows; + int32_t minimum = *min_element(C_ref_k_begin, C_ref_k_end); + int32_t maximum = *max_element(C_ref_k_begin, C_ref_k_end); + C_multiplier[k] = 255. / (maximum - minimum); + cerr << "k " << k << " minimum " << minimum << " maximum " << maximum + << " multiplier " << C_multiplier[k] << endl; + } + int32_t C_zero_point = 5; + + aligned_vector<int32_t> col_offsets(K); + aligned_vector<int32_t> bias(K); + randFill(col_offsets, -100, 100); + randFill(bias, -40, 40); + + aligned_vector<uint8_t> C_uint8_ref(C_ref.size()), C_uint8(C_ref.size()); + depthwise_3x3_per_channel_quantization_pad_1_ref( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B_zero_point.data(), + B.data(), + C_multiplier.data(), + C_zero_point, + C_uint8_ref.data(), + col_offsets.data(), + bias.data()); + + Packed3x3ConvMatrix Bp(K, B.data()); + + depthwise_3x3_per_channel_quantization_pad_1( + N, + H, + W, + K, + stride_h, + stride_w, + A_zero_point, + A.data(), + B_zero_point.data(), + Bp, + C_multiplier.data(), + C_zero_point, + C_uint8.data(), + col_offsets.data(), + bias.data(), + 0, + 1); + + // correctness check + for (int n = 0; n < N; ++n) { + for (int h = 0; h < H_OUT; ++h) { + for (int w = 0; w < W_OUT; ++w) { + for (int k = 0; k < K; ++k) { + int32_t expected = + C_uint8_ref[((n * H_OUT + h) * W_OUT + w) * K + k]; + int32_t actual = C_uint8[((n * H_OUT + h) * W_OUT + w) * K + k]; + EXPECT_EQ(expected, actual) << + "Depthwise 3x3 results differ at (" << n << ", " << + h << ", " << w << ", " << k << ")."; + } + } + } + } + } // for each shape +} // Test3x3PerChannelQuantization + +} // namespace fbgemm2 |