Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/GEMMsTunableBenchmark.cc2
-rw-r--r--src/ExecuteKernelU8S8.cc76
-rw-r--r--src/Fbgemm.cc8
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc3
-rw-r--r--src/PackAMatrix.cc8
-rw-r--r--src/PackAWithIm2Col.cc24
-rw-r--r--src/PackAWithQuantRowOffset.cc3
-rw-r--r--src/PackAWithRowOffset.cc22
-rw-r--r--src/PackBMatrix.cc8
-rw-r--r--src/PackMatrix.cc14
-rw-r--r--src/QuantUtils.cc10
-rw-r--r--src/QuantUtilsAvx2.cc13
12 files changed, 122 insertions, 69 deletions
diff --git a/bench/GEMMsTunableBenchmark.cc b/bench/GEMMsTunableBenchmark.cc
index 04eed8a..a65b51f 100644
--- a/bench/GEMMsTunableBenchmark.cc
+++ b/bench/GEMMsTunableBenchmark.cc
@@ -291,6 +291,8 @@ int main(int /* unused */, char** /* unused */) {
assert(0 && "architecture not supported");
return 0;
}
+ } else {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
}
set<vector<int>> incorrect_configs;
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 4175d65..f7292fd 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -45,11 +45,19 @@ ExecuteKernel<
outputProcess_(outputProcess),
thread_id_(thread_id),
num_threads_(num_threads) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (params) {
- mbSize_ = params->MCB;
- nbSize_ = params->NCB;
- nrMinSize_ = params->NR_MIN;
- nrSize_ = params->NR;
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
+ mbSize_ = params->MCB;
+ nbSize_ = params->NCB;
+ nrMinSize_ = params->NR_MIN;
+ nrSize_ = params->NR;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
mbSize_ = PackingTraits<
@@ -110,28 +118,24 @@ void ExecuteKernel<
typename BaseType::jit_micro_kernel_fp fn;
- if (cpuinfo_initialize()) {
- if (fbgemmHasAvx512Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx512>(
- accum,
- packed_rows_A,
- packedB_.blockColSize(),
- packedA_.numPackedCols(),
- nbSize_);
- } else if (fbgemmHasAvx2Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx2>(
- accum,
- packed_rows_A,
- packedB_.blockColSize(),
- packedA_.numPackedCols(),
- nbSize_);
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return;
- }
+ if (fbgemmHasAvx512Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx512>(
+ accum,
+ packed_rows_A,
+ packedB_.blockColSize(),
+ packedA_.numPackedCols(),
+ nbSize_);
+ } else if (fbgemmHasAvx2Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx2>(
+ accum,
+ packed_rows_A,
+ packedB_.blockColSize(),
+ packedA_.numPackedCols(),
+ nbSize_);
} else {
- throw std::runtime_error("Failed to initialize cpuinfo!");
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return;
}
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
@@ -144,20 +148,16 @@ void ExecuteKernel<
if (jb == bColBlocks - 1) {
int nc = ((packedB_.lastBcol() - 1) / nrMinSize_ + 1) * nrMinSize_;
if (nc != nbSize_) {
- if (cpuinfo_initialize()) {
- if (fbgemmHasAvx512Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx512>(
- accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
- } else if (fbgemmHasAvx2Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx2>(
- accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return;
- }
+ if (fbgemmHasAvx512Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx512>(
+ accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
+ } else if (fbgemmHasAvx2Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx2>(
+ accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
} else {
- throw std::runtime_error("Failed to initialize cpuinfo!");
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return;
}
}
}
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc
index a40f38a..523f556 100644
--- a/src/Fbgemm.cc
+++ b/src/Fbgemm.cc
@@ -50,9 +50,11 @@ void fbgemmPacked(
// Run time CPU detection
if (cpuinfo_initialize()) {
if (blocking_params) {
- MCB = blocking_params->MCB;
- KCB = blocking_params->KCB;
- MR = blocking_params->MR;
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
+ MCB = blocking_params->MCB;
+ KCB = blocking_params->KCB;
+ MR = blocking_params->MR;
+ }
} else {
if (fbgemmHasAvx512Support()) {
MCB = PackingTraits<
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 0e1b85b..0032e72 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -1789,6 +1789,9 @@ void fbgemmGroupwiseConv(
int num_threads) {
typedef ReQuantizeOutput<FUSE_RELU, Q_GRAN> processOutputType;
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (!fbgemmOptimizedGConv<SPATIAL_DIM>(conv_param) ||
(!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
return fbgemmGroupwiseConvBase_<
diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc
index 89ec13e..87adaba 100644
--- a/src/PackAMatrix.cc
+++ b/src/PackAMatrix.cc
@@ -31,10 +31,18 @@ PackAMatrix<T, accT>::PackAMatrix(
trans_(trans),
smat_(smat),
ld_(ld) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (params) {
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
BaseType::brow_ = params->MCB;
BaseType::bcol_ = params->KCB;
row_interleave_B_ = params->ROW_INTERLEAVE;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index fb4556c..e55dd4e 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -46,11 +46,19 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
a_zero_pt_(a_zero_pt) {
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3, "unsupported conv dimension ");
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (params) {
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
BaseType::brow_ = params->MCB;
BaseType::bcol_ = params->KCB;
row_interleave_B_ = params->ROW_INTERLEAVE;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
@@ -470,19 +478,19 @@ template <typename T, typename accT, int SPATIAL_DIM>
int PackAWithIm2Col<T, accT, SPATIAL_DIM>::rowOffsetBufferSize(
const BlockingFactors* params) {
if (cpuinfo_initialize()) {
- if (params){
+ if (params) {
return params->MCB;
} else {
- if (fbgemmHasAvx512Support()) {
- return PackingTraits<T, accT, inst_set_t::avx512>::MCB;
- } else if (fbgemmHasAvx2Support()) {
+ if (fbgemmHasAvx512Support()) {
+ return PackingTraits<T, accT, inst_set_t::avx512>::MCB;
+ } else if (fbgemmHasAvx2Support()) {
return PackingTraits<T, accT, inst_set_t::avx2>::MCB;
} else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return -1;
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return -1;
+ }
}
- }
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc
index 175425f..7572a51 100644
--- a/src/PackAWithQuantRowOffset.cc
+++ b/src/PackAWithQuantRowOffset.cc
@@ -42,6 +42,9 @@ PackAWithQuantRowOffset<T, accT>::PackAWithQuantRowOffset(
scale_(scale),
zero_pt_(zero_pt),
row_offset_(row_offset) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
rowOffsetAllocatedHere = false;
if (params) {
if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
index 139a6d3..b791817 100644
--- a/src/PackAWithRowOffset.cc
+++ b/src/PackAWithRowOffset.cc
@@ -36,11 +36,19 @@ PackAWithRowOffset<T, accT>::PackAWithRowOffset(
smat_(smat),
ld_(ld),
row_offset_(row_offset) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
rowOffsetAllocatedHere = false;
if (params) {
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
BaseType::brow_ = params->MCB;
BaseType::bcol_ = params->KCB;
row_interleave_B_ = params->ROW_INTERLEAVE;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
@@ -179,19 +187,19 @@ template <typename T, typename accT>
int PackAWithRowOffset<T, accT>::rowOffsetBufferSize(
const BlockingFactors* params) {
if (cpuinfo_initialize()) {
- if (params){
+ if (params) {
return params->MCB;
} else {
- if (fbgemmHasAvx512Support()) {
+ if (fbgemmHasAvx512Support()) {
return PackingTraits<T, accT, inst_set_t::avx512>::MCB;
} else if (fbgemmHasAvx2Support()) {
return PackingTraits<T, accT, inst_set_t::avx2>::MCB;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return -1;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return -1;
+ }
}
- }
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
index 472c802..970a741 100644
--- a/src/PackBMatrix.cc
+++ b/src/PackBMatrix.cc
@@ -185,10 +185,18 @@ PackBMatrix<T, accT>::PackBMatrix(
trans_(trans),
smat_(smat),
ld_(ld) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (params) {
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
BaseType::brow_ = params->KCB;
BaseType::bcol_ = params->NCB;
row_interleave_ = params->ROW_INTERLEAVE;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
diff --git a/src/PackMatrix.cc b/src/PackMatrix.cc
index e93b97c..c9a68a6 100644
--- a/src/PackMatrix.cc
+++ b/src/PackMatrix.cc
@@ -33,11 +33,19 @@ int PackMatrix<PT, inpType, accType>::packedBufferSize(
int rows,
int cols,
const BlockingFactors* params) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
int MCB, KCB, NCB;
if (params) {
- MCB = params->MCB;
- NCB = params->NCB;
- KCB = params->KCB;
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
+ MCB = params->MCB;
+ NCB = params->NCB;
+ KCB = params->KCB;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
MCB = PackingTraits<inpType, accType, inst_set_t::avx512>::MCB;
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
index 82f8c2a..1ab00d1 100644
--- a/src/QuantUtils.cc
+++ b/src/QuantUtils.cc
@@ -176,7 +176,7 @@ void Quantize<uint8_t>(
uint8_t* dst,
int len,
const TensorQuantizationParams& qparams) {
- bool avx2_support = fbgemmHasAvx2Support();
+ bool avx2_support = cpuinfo_initialize() && fbgemmHasAvx2Support();
bool fma_support = cpuinfo_has_x86_fma3();
if (avx2_support && fma_support && qparams.precision == 8) {
// fast path
@@ -221,7 +221,8 @@ void Requantize<uint8_t>(
uint8_t* dst,
const int len,
const RequantizationParams& params) {
- if (params.target_qparams.precision == 8 && fbgemmHasAvx2Support()) {
+ if (params.target_qparams.precision == 8 && cpuinfo_initialize() &&
+ fbgemmHasAvx2Support()) {
RequantizeAvx2(src, dst, len, params);
} else {
for (int i = 0; i < len; ++i) {
@@ -237,7 +238,7 @@ void RequantizeFixedPoint(
int len,
const RequantizationParams& params) {
if (std::is_same<T, uint8_t>::value && params.target_qparams.precision == 8 &&
- fbgemmHasAvx2Support()) {
+ cpuinfo_initialize() && fbgemmHasAvx2Support()) {
RequantizeFixedPointAvx2(src, dst, len, params);
} else {
for (int i = 0; i < len; ++i) {
@@ -267,7 +268,8 @@ void RequantizeFixedPoint<uint8_t>(
uint8_t* dst,
const int len,
const RequantizationParams& params) {
- if (params.target_qparams.precision == 8 && fbgemmHasAvx2Support()) {
+ if (params.target_qparams.precision == 8 && cpuinfo_initialize() &&
+ fbgemmHasAvx2Support()) {
RequantizeFixedPointAvx2(src, dst, len, params);
} else {
for (int i = 0; i < len; ++i) {
diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc
index 875c9e1..7f43ced 100644
--- a/src/QuantUtilsAvx2.cc
+++ b/src/QuantUtilsAvx2.cc
@@ -142,16 +142,17 @@ void RequantizeAvx2(
int len,
const RequantizationParams& params) {
DoNothing<> doNothingObj{};
+ int32_t Bq_zero_point[] = { 0 };
ReQuantizeOutput<false /* FUSE_RELU */> requantizeObj(
doNothingObj,
&params.real_multiplier,
params.target_qparams.zero_point,
- 0,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- len);
+ 0, // Aq_zero_point
+ Bq_zero_point, // Bq_zero_point
+ nullptr, // row_offsets
+ nullptr, // col_offsets
+ nullptr, // bias
+ len); // ncol
requantizeObj.f<inst_set_t::avx2>(dst, src, {0, 1, 0, len}, 0, 0);
}