1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/*
* Copyright (c) Facebook, Inc. and its affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <random>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <gtest/gtest.h>
#include "TestUtils.h"
#include "bench/BenchUtils.h"
#include "fbgemm/FbgemmFP16.h"
#include "src/RefImplementations.h"
#ifdef USE_IACA
#include "iacaMarks.h"
#endif
using namespace std;
using namespace fbgemm;
namespace {
// The template parameter is transpose of A and B
class FBGemmFP16Test
: public testing::TestWithParam<pair<matrix_op_t, matrix_op_t>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
InstantiationName,
FBGemmFP16Test,
::testing::Values(
pair<matrix_op_t, matrix_op_t>(
matrix_op_t::NoTranspose, matrix_op_t::NoTranspose),
pair<matrix_op_t, matrix_op_t>(
matrix_op_t::NoTranspose, matrix_op_t::Transpose)/*,
pair<matrix_op_t, matrix_op_t>(
matrix_op_t::Transpose, matrix_op_t::NoTranspose),
pair<matrix_op_t, matrix_op_t>(
matrix_op_t::Transpose, matrix_op_t::Transpose)*/));
TEST_P(FBGemmFP16Test, Test) {
vector<vector<int>> shapes;
random_device r;
default_random_engine generator(r());
uniform_int_distribution<int> dm(1, 256);
uniform_int_distribution<int> dnk(1, 1024);
for (int i = 0; i < 10; i++) {
int m = dm(generator);
int n = dnk(generator);
int k = dnk(generator);
shapes.push_back({m, n, k});
if (m > 10) {
shapes.push_back({(m / 10) * 10, n, k});
}
}
float alpha = 1.f, beta = 0.f;
matrix_op_t atrans, btrans;
tie(atrans, btrans) = GetParam();
for (auto s : shapes) {
int m = s[0];
int n = s[1];
int k = s[2];
cerr << "m = " << m << " n = " << n << " k = " << k;
if (atrans == matrix_op_t::Transpose) {
cerr << " A_transposed";
}
if (btrans == matrix_op_t::Transpose) {
cerr << " B_transposed";
}
cerr << endl;
// initialize with small numbers
aligned_vector<int> Aint(m * k);
aligned_vector<int> Bint(k * n);
randFill(Aint, 0, 4);
randFill(Bint, 0, 4);
aligned_vector<float> A(Aint.begin(), Aint.end());
aligned_vector<float> B(Bint.begin(), Bint.end());
aligned_vector<float> C(m * n, NAN);
aligned_vector<float> A_ref(A), B_ref(B), C_ref(C);
if (atrans == matrix_op_t::Transpose) {
transpose_matrix(A_ref.data(), k, m);
}
if (btrans == matrix_op_t::Transpose) {
transpose_matrix(B_ref.data(), n, k);
}
// Gold via reference sgemm
matmul_fp_ref(m, n, k, k, n, n, A_ref.data(), B_ref.data(), C_ref.data());
// fbgemm fp16
PackedGemmMatrixFP16 Bp(btrans, k, n, alpha, B.data());
#ifdef _OPENMP
#pragma omp parallel
#endif
{
int num_threads = fbgemm_get_num_threads();
int tid = fbgemm_get_thread_num();
cblas_gemm_compute(
atrans, m, A.data(), Bp, beta, C.data(), tid, num_threads);
}
// correctness check
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float expected = C_ref[i * n + j];
float actual = C[i * n + j];
EXPECT_EQ(expected, actual)
<< "GEMM results differ at (" << i << ", " << j << "). ref "
<< expected << " FBGemm " << actual;
}
}
}
}
|