test/FP16Test.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 * All rights reserved.
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */
#include <random>

#ifdef _OPENMP
#include <omp.h>
#endif

#include <gtest/gtest.h>

#include "TestUtils.h"
#include "bench/BenchUtils.h"
#include "fbgemm/FbgemmFP16.h"
#include "src/RefImplementations.h"

#ifdef USE_IACA
#include "iacaMarks.h"
#endif

using namespace std;
using namespace fbgemm;

namespace {
// The template parameter is transpose of A and B
class FBGemmFP16Test
    : public testing::TestWithParam<pair<matrix_op_t, matrix_op_t>> {};
}; // namespace

INSTANTIATE_TEST_CASE_P(
    InstantiationName,
    FBGemmFP16Test,
    ::testing::Values(
      pair<matrix_op_t, matrix_op_t>(
          matrix_op_t::NoTranspose, matrix_op_t::NoTranspose),
      pair<matrix_op_t, matrix_op_t>(
          matrix_op_t::NoTranspose, matrix_op_t::Transpose)/*,
      pair<matrix_op_t, matrix_op_t>(
          matrix_op_t::Transpose, matrix_op_t::NoTranspose),
      pair<matrix_op_t, matrix_op_t>(
          matrix_op_t::Transpose, matrix_op_t::Transpose)*/));

TEST_P(FBGemmFP16Test, Test) {
  vector<vector<int>> shapes;
  random_device r;
  default_random_engine generator(r());
  uniform_int_distribution<int> dm(1, 256);
  uniform_int_distribution<int> dnk(1, 1024);
  for (int i = 0; i < 10; i++) {
    int m = dm(generator);
    int n = dnk(generator);
    int k = dnk(generator);
    shapes.push_back({m, n, k});
    if (m > 10) {
      shapes.push_back({(m / 10) * 10, n, k});
    }
  }

  float alpha = 1.f, beta = 0.f;
  matrix_op_t atrans, btrans;
  tie(atrans, btrans) = GetParam();

  for (auto s : shapes) {
    int m = s[0];
    int n = s[1];
    int k = s[2];

    cerr << "m = " << m << " n = " << n << " k = " << k;
    if (atrans == matrix_op_t::Transpose) {
      cerr << " A_transposed";
    }
    if (btrans == matrix_op_t::Transpose) {
      cerr << " B_transposed";
    }
    cerr << endl;

    // initialize with small numbers
    aligned_vector<int> Aint(m * k);
    aligned_vector<int> Bint(k * n);
    randFill(Aint, 0, 4);
    randFill(Bint, 0, 4);
    aligned_vector<float> A(Aint.begin(), Aint.end());
    aligned_vector<float> B(Bint.begin(), Bint.end());

    aligned_vector<float> C(m * n, NAN);

    aligned_vector<float> A_ref(A), B_ref(B), C_ref(C);

    if (atrans == matrix_op_t::Transpose) {
      transpose_matrix(A_ref.data(), k, m);
    }
    if (btrans == matrix_op_t::Transpose) {
      transpose_matrix(B_ref.data(), n, k);
    }

    // Gold via reference sgemm
    matmul_fp_ref(m, n, k, k, n, n, A_ref.data(), B_ref.data(), C_ref.data());

    // fbgemm fp16
    PackedGemmMatrixFP16 Bp(btrans, k, n, alpha, B.data());

#ifdef _OPENMP
#pragma omp parallel
#endif
    {
      int num_threads = fbgemm_get_num_threads();
      int tid = fbgemm_get_thread_num();

      cblas_gemm_compute(
          atrans, m, A.data(), Bp, beta, C.data(), tid, num_threads);
    }

    // correctness check
    for (int i = 0; i < m; ++i) {
      for (int j = 0; j < n; ++j) {
        float expected = C_ref[i * n + j];
        float actual = C[i * n + j];
        EXPECT_EQ(expected, actual)
            << "GEMM results differ at (" << i << ", " << j << "). ref "
            << expected << " FBGemm " << actual;
      }
    }
  }
}