Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/fbgemm/Fbgemm.h12
-rw-r--r--include/fbgemm/FbgemmFP16.h2
-rw-r--r--include/fbgemm/Types.h6
-rw-r--r--src/FbgemmI8Depthwise2DAvx2-inl.h34
-rw-r--r--src/FbgemmI8Depthwise3x3Avx2.cc2
-rw-r--r--src/FbgemmI8DepthwiseAvx2-inl.h14
-rw-r--r--src/FbgemmI8DepthwisePerChannelQuantAvx2.cc2
-rw-r--r--src/PackBMatrix.cc48
-rw-r--r--src/QuantUtilsAvx2.cc3
-rw-r--r--src/TransposeUtilsAvx2.h3
10 files changed, 94 insertions, 32 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index c5f9563..8f9f6cd 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -413,6 +413,18 @@ class FBGEMM_API PackBMatrix final
const BlockingFactors* params = nullptr);
/**
+ * This constructor accepts pre-packed matrix as an input.
+ * And, it skips the actual packing procedure.
+ */
+ PackBMatrix(matrix_op_t trans,
+ std::int32_t nRow,
+ std::int32_t nCol,
+ inpType* prepackedmat,
+ std::int32_t ld,
+ int groups = 1,
+ const BlockingFactors* params = nullptr);
+
+ /**
* Weight matrices are usually constant so worth pre-packing.
*/
bool isPrePacked() const {
diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h
index ed56e31..7d1b031 100644
--- a/include/fbgemm/FbgemmFP16.h
+++ b/include/fbgemm/FbgemmFP16.h
@@ -223,7 +223,7 @@ class PackedGemmMatrixFP16 {
}
int matSize() const {
- return size_;
+ return (int)size_;
}
int numRows() const {
return nrow_;
diff --git a/include/fbgemm/Types.h b/include/fbgemm/Types.h
index 370a29a..fda5262 100644
--- a/include/fbgemm/Types.h
+++ b/include/fbgemm/Types.h
@@ -36,11 +36,11 @@ static inline float16 cpu_float2half_rn(float f) {
// Get rid of +Inf/-Inf, +0/-0.
if (u > 0x477fefff) {
- ret = sign | 0x7c00U;
+ ret = (fbgemm::float16)(sign | 0x7c00U);
return ret;
}
if (u < 0x33000001) {
- ret = (sign | 0x0000);
+ ret = (fbgemm::float16)(sign | 0x0000);
return ret;
}
@@ -70,7 +70,7 @@ static inline float16 cpu_float2half_rn(float f) {
}
}
- ret = (sign | (exponent << 10) | mantissa);
+ ret = (fbgemm::float16)(sign | (exponent << 10) | mantissa);
return ret;
}
diff --git a/src/FbgemmI8Depthwise2DAvx2-inl.h b/src/FbgemmI8Depthwise2DAvx2-inl.h
index 7488a3c..a07dcd7 100644
--- a/src/FbgemmI8Depthwise2DAvx2-inl.h
+++ b/src/FbgemmI8Depthwise2DAvx2-inl.h
@@ -318,7 +318,7 @@ static ALWAYS_INLINE void inner_prod_2d_packed_(
std::int32_t* C,
int remainder,
std::int32_t* row_offsets) {
- if (S == 3) {
+ //if constexpr (S == 3) {
inner_prod_3x3_packed_<SUM_A, REMAINDER, PER_CHANNEL_QUANTIZATION>(
H,
W,
@@ -332,22 +332,22 @@ static ALWAYS_INLINE void inner_prod_2d_packed_(
C,
remainder,
row_offsets);
- } else {
- assert(S == 5);
- inner_prod_5x5_packed_<SUM_A, REMAINDER, PER_CHANNEL_QUANTIZATION>(
- H,
- W,
- K,
- h_in,
- w_in,
- A,
- A_zero_point,
- Bp,
- B_zero_point,
- C,
- remainder,
- row_offsets);
- }
+ //} else {
+ // assert(S == 5);
+ // inner_prod_5x5_packed_<SUM_A, REMAINDER, PER_CHANNEL_QUANTIZATION>(
+ // H,
+ // W,
+ // K,
+ // h_in,
+ // w_in,
+ // A,
+ // A_zero_point,
+ // Bp,
+ // B_zero_point,
+ // C,
+ // remainder,
+ // row_offsets);
+ //}
}
template <
diff --git a/src/FbgemmI8Depthwise3x3Avx2.cc b/src/FbgemmI8Depthwise3x3Avx2.cc
index b2ad049..523af73 100644
--- a/src/FbgemmI8Depthwise3x3Avx2.cc
+++ b/src/FbgemmI8Depthwise3x3Avx2.cc
@@ -4,7 +4,7 @@
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
-#define FBGEMM_EXPORTS
+//#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmI8DepthwiseAvx2.h"
#include <stdexcept> // for logic_error
diff --git a/src/FbgemmI8DepthwiseAvx2-inl.h b/src/FbgemmI8DepthwiseAvx2-inl.h
index 18dda3b..7128916 100644
--- a/src/FbgemmI8DepthwiseAvx2-inl.h
+++ b/src/FbgemmI8DepthwiseAvx2-inl.h
@@ -230,7 +230,7 @@ static ALWAYS_INLINE void inner_prod_packed_(
__m256i a_sum_temp[2] = {0, 0};
int k = 0;
- if (K >= 4) {
+ //if (K >= 4) {
madd_epi16x4_packed<SUM_A>(
a_v[0],
a_v[1],
@@ -261,12 +261,12 @@ static ALWAYS_INLINE void inner_prod_packed_(
c[2] = _mm256_add_epi32(c[2], c_temp[2]);
c[3] = _mm256_add_epi32(c[3], c_temp[3]);
}
- } else {
- c[0] = _mm256_setzero_si256();
- c[1] = _mm256_setzero_si256();
- c[2] = _mm256_setzero_si256();
- c[3] = _mm256_setzero_si256();
- }
+ //} else {
+ // c[0] = _mm256_setzero_si256();
+ // c[1] = _mm256_setzero_si256();
+ // c[2] = _mm256_setzero_si256();
+ // c[3] = _mm256_setzero_si256();
+ //}
if (K - k == 3) {
madd_epi16x3_packed<SUM_A>(
diff --git a/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc b/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc
index ec26287..883558b 100644
--- a/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc
+++ b/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc
@@ -4,7 +4,7 @@
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
-#define FBGEMM_EXPORTS
+//#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmI8DepthwiseAvx2.h"
#include <stdexcept> // for logic_error
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
index c271c4c..dc8ce5c 100644
--- a/src/PackBMatrix.cc
+++ b/src/PackBMatrix.cc
@@ -239,6 +239,54 @@ PackBMatrix<T, accT>::PackBMatrix(
}
template <typename T, typename accT>
+PackBMatrix<T, accT>::PackBMatrix(matrix_op_t trans,
+ int32_t nRow,
+ int32_t nCol,
+ inpType* prepackedmat,
+ int32_t ld,
+ int groups,
+ const BlockingFactors* params)
+ : PackMatrix<PackBMatrix<T, accT>, T, accT>(nRow, nCol, prepackedmat, groups, params),
+ trans_(trans),
+ smat_(nullptr),
+ ld_(ld) {
+ if(!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
+ if(params) {
+ if(fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
+ BaseType::brow_ = params->KCB;
+ BaseType::bcol_ = params->NCB;
+ row_interleave_ = params->ROW_INTERLEAVE;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
+ } else {
+ if(fbgemmHasAvx512Support()) {
+ BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
+ BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::NCB;
+ row_interleave_ = PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
+ } else if(fbgemmHasAvx2Support()) {
+ BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
+ BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::NCB;
+ row_interleave_ = PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
+ } else {
+ // Error
+ assert(0 && "unknown architecure");
+ }
+ }
+ if(BaseType::numRows() % groups != 0) {
+ throw std::runtime_error("groups = " + std::to_string(groups)
+ + " does not divide numRows = " + std::to_string(BaseType::numRows()));
+ }
+
+ // blocking for one group
+ block_type_t block{0, BaseType::numRows() / BaseType::numGroups(), 0, BaseType::numCols()};
+ BaseType::packedBlock(block);
+}
+
+template <typename T, typename accT>
void PackBMatrix<T, accT>::pack_unpack_(
const block_type_t& block,
T* unpack_buf,
diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc
index 8e36855..a1e6858 100644
--- a/src/QuantUtilsAvx2.cc
+++ b/src/QuantUtilsAvx2.cc
@@ -163,7 +163,8 @@ void FindMinMax(const float* a, float* min, float* max, int len) {
////////////////////////////////////////////////////////////////////////////////
// Requantization (with floats)
-#ifdef __AVX2__
+//#ifdef __AVX2__
+#if 1
void RequantizeAvx2(
const int32_t* src,
uint8_t* dst,
diff --git a/src/TransposeUtilsAvx2.h b/src/TransposeUtilsAvx2.h
index a405745..9925eb6 100644
--- a/src/TransposeUtilsAvx2.h
+++ b/src/TransposeUtilsAvx2.h
@@ -13,7 +13,8 @@ namespace fbgemm {
namespace internal {
-#ifdef __AVX2__
+//#ifdef __AVX2__
+#if 1
// NOTE: Make sure every function defined in here has static linkage because
// this header file is included by UtilsAvx512.cc compiled with -mavx512f option