Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordskhudia <dskhudia@fb.com>2018-11-04 19:22:37 +0300
committerdskhudia <dskhudia@fb.com>2018-11-04 19:22:37 +0300
commit690dbc29d9b0cb373fa0303b7c30c20b527e9605 (patch)
tree56d9b3ebc1a7b5ff394e5dc9e08db9e44285e6f4 /src/FbgemmFP16.cc
parent505eb847185c9255526813dd39edadcd4e61d8e0 (diff)
Syncing with internal version. Fixes for Mac/clang build. Other minor fixes
Diffstat (limited to 'src/FbgemmFP16.cc')
-rw-r--r--src/FbgemmFP16.cc10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/FbgemmFP16.cc b/src/FbgemmFP16.cc
index 7bbfa54..eff173f 100644
--- a/src/FbgemmFP16.cc
+++ b/src/FbgemmFP16.cc
@@ -7,6 +7,8 @@
#include "fbgemm/FbgemmFP16.h"
#include <cpuinfo.h>
+#include <array>
+#include <utility>
#include "FbgemmFP16UKernels.h"
@@ -44,7 +46,7 @@ struct KernelInfo {
// autotuned kernel splits for various cases m = 1:mb_max
// may need re-autotuning for new uarch
- static constexpr array<array<pair<int, int>, 2>, 121 > partition = {
+ static constexpr array<array<array<int, 2>, 2>, 121 > partition = {
{
{{ { 0, 0 }, { 0, 0 } } },
{{ { 1, 1 }, { 0, 0 } } },
@@ -171,7 +173,7 @@ struct KernelInfo {
};
};
constexpr array<KernelInfo::knl_ptr, 15> KernelInfo::kernel;
-constexpr array<array<pair<int, int>, 2>, 121 > KernelInfo::partition;
+constexpr array<array<array<int, 2>, 2>, 121 > KernelInfo::partition;
// autotuned kernel splits for various cases m = 1:mb_max
void
@@ -220,8 +222,8 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
auto m1 = 0;
for (auto c = 0; c < 2; c++) {
- auto kernel_nrows = KernelInfo::partition[mb][c].first;
- auto nkernel_nrows = KernelInfo::partition[mb][c].second;
+ auto kernel_nrows = KernelInfo::partition[mb][c][0];
+ auto nkernel_nrows = KernelInfo::partition[mb][c][1];
auto m_start = m1, m_end = m1 + kernel_nrows * nkernel_nrows;
for (auto m2 = m_start; m2 < m_end; m2 += kernel_nrows) {