Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2019-04-19 03:51:07 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-04-19 03:54:36 +0300
commit6ec218e6ed5dcb9b5397a608a3b5b8027b236819 (patch)
tree0151bf346fb3cfa2708ad070ed295d7bc3fcfa11 /src/ExecuteKernelU8S8.cc
parentc6e86067e41a363af718dae7f8d7494068aad868 (diff)
make sure cpuinfo_initialize called before fbgemmHasAvx2/512Support (#94)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/94 If we don't call cpuinfo_initialize before hand, fbgemmHasAvx2/512Support will always return false. We should really careful about this. Reviewed By: jianyuh Differential Revision: D14994129 fbshipit-source-id: b78028f0543d05595caaa627be2feb743d0694b1
Diffstat (limited to 'src/ExecuteKernelU8S8.cc')
-rw-r--r--src/ExecuteKernelU8S8.cc76
1 files changed, 38 insertions, 38 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 4175d65..f7292fd 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -45,11 +45,19 @@ ExecuteKernel<
outputProcess_(outputProcess),
thread_id_(thread_id),
num_threads_(num_threads) {
+ if (!cpuinfo_initialize()) {
+ throw std::runtime_error("Failed to initialize cpuinfo!");
+ }
if (params) {
- mbSize_ = params->MCB;
- nbSize_ = params->NCB;
- nrMinSize_ = params->NR_MIN;
- nrSize_ = params->NR;
+ if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
+ mbSize_ = params->MCB;
+ nbSize_ = params->NCB;
+ nrMinSize_ = params->NR_MIN;
+ nrSize_ = params->NR;
+ } else {
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecure");
+ }
} else {
if (fbgemmHasAvx512Support()) {
mbSize_ = PackingTraits<
@@ -110,28 +118,24 @@ void ExecuteKernel<
typename BaseType::jit_micro_kernel_fp fn;
- if (cpuinfo_initialize()) {
- if (fbgemmHasAvx512Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx512>(
- accum,
- packed_rows_A,
- packedB_.blockColSize(),
- packedA_.numPackedCols(),
- nbSize_);
- } else if (fbgemmHasAvx2Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx2>(
- accum,
- packed_rows_A,
- packedB_.blockColSize(),
- packedA_.numPackedCols(),
- nbSize_);
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return;
- }
+ if (fbgemmHasAvx512Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx512>(
+ accum,
+ packed_rows_A,
+ packedB_.blockColSize(),
+ packedA_.numPackedCols(),
+ nbSize_);
+ } else if (fbgemmHasAvx2Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx2>(
+ accum,
+ packed_rows_A,
+ packedB_.blockColSize(),
+ packedA_.numPackedCols(),
+ nbSize_);
} else {
- throw std::runtime_error("Failed to initialize cpuinfo!");
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return;
}
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
@@ -144,20 +148,16 @@ void ExecuteKernel<
if (jb == bColBlocks - 1) {
int nc = ((packedB_.lastBcol() - 1) / nrMinSize_ + 1) * nrMinSize_;
if (nc != nbSize_) {
- if (cpuinfo_initialize()) {
- if (fbgemmHasAvx512Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx512>(
- accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
- } else if (fbgemmHasAvx2Support()) {
- fn = BaseType::template getOrCreate<inst_set_t::avx2>(
- accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecture");
- return;
- }
+ if (fbgemmHasAvx512Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx512>(
+ accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
+ } else if (fbgemmHasAvx2Support()) {
+ fn = BaseType::template getOrCreate<inst_set_t::avx2>(
+ accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
} else {
- throw std::runtime_error("Failed to initialize cpuinfo!");
+ // TODO: Have default slower path
+ assert(0 && "unsupported architecture");
+ return;
}
}
}