diff options
author | Jianyu Huang <jianyuhuang@fb.com> | 2019-03-06 21:18:56 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-03-06 21:21:45 +0300 |
commit | 66b41357561f2ff9895d2b4638273f07c49dbe29 (patch) | |
tree | 1d0014e483a9fe5beb0cce3b1394b0eb550c12ba /src/ExecuteKernelU8S8.cc | |
parent | 2eb84b8912f8340d8ffc54a3ef7653291f64f6f8 (diff) |
Add Avx512BW/VL/DQ check (#84)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/84
Add AVX512BW Check:
AVX-512 Byte and Word Instructions add support for for 8-bit and 16-bit integer operations such as vpmaddubsw.
Similarly, add AVX512VL/DQ check.
Reviewed By: jspark1105
Differential Revision: D14321050
fbshipit-source-id: bd34745fd488ce4efe3248aeb78c54e1c2d91d47
Diffstat (limited to 'src/ExecuteKernelU8S8.cc')
-rw-r--r-- | src/ExecuteKernelU8S8.cc | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc index 0dfc531..f2b028d 100644 --- a/src/ExecuteKernelU8S8.cc +++ b/src/ExecuteKernelU8S8.cc @@ -42,7 +42,7 @@ ExecuteKernel< outputProcess_(outputProcess), thread_id_(thread_id), num_threads_(num_threads) { - if (cpuinfo_has_x86_avx512f()) { + if (fbgemmHasAvx512Support()) { mbSize_ = PackingTraits< int8_t, typename packingAMatrix::accType, @@ -55,7 +55,7 @@ ExecuteKernel< int8_t, typename packingAMatrix::accType, inst_set_t::avx512>::NR; - } else if (cpuinfo_has_x86_avx2()) { + } else if (fbgemmHasAvx2Support()) { mbSize_ = PackingTraits< int8_t, typename packingAMatrix::accType, @@ -101,14 +101,14 @@ void ExecuteKernel< typename BaseType::jit_micro_kernel_fp fn; if (cpuinfo_initialize()) { - if (cpuinfo_has_x86_avx512f()) { + if (fbgemmHasAvx512Support()) { fn = BaseType::template getOrCreate<inst_set_t::avx512>( accum, packed_rows_A, packedB_.blockColSize(), packedA_.numPackedCols(), nbSize_); - } else if (cpuinfo_has_x86_avx2()) { + } else if (fbgemmHasAvx2Support()) { fn = BaseType::template getOrCreate<inst_set_t::avx2>( accum, packed_rows_A, @@ -135,10 +135,10 @@ void ExecuteKernel< int nc = ((packedB_.lastBcol() - 1) / nrSize_ + 1) * nrSize_; if (nc != nbSize_) { if (cpuinfo_initialize()) { - if (cpuinfo_has_x86_avx512f()) { + if (fbgemmHasAvx512Support()) { fn = BaseType::template getOrCreate<inst_set_t::avx512>( accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_); - } else if (cpuinfo_has_x86_avx2()) { + } else if (fbgemmHasAvx2Support()) { fn = BaseType::template getOrCreate<inst_set_t::avx2>( accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_); } else { @@ -203,7 +203,7 @@ void ExecuteKernel< int32_t nSize = C_buffer_start == C_tile_ ? jb * nbSize_ : packedB_.numCols(); if (nSize) { - if (cpuinfo_has_x86_avx512f()) { + if (fbgemmHasAvx512Support()) { // TODO: avx512 path // Currently use avx2 code outputProcess_.template f<inst_set_t::avx2>( @@ -212,7 +212,7 @@ void ExecuteKernel< {row_start_A, packed_rows_A, NDim * group, nSize}, ldc_, ldc_); - } else if (cpuinfo_has_x86_avx2()) { + } else if (fbgemmHasAvx2Support()) { outputProcess_.template f<inst_set_t::avx2>( matC_, C_buffer_row_start, @@ -228,7 +228,7 @@ void ExecuteKernel< if (C_buffer_start == C_tile_) { // When C_tile_ scratchpad was used to avoid accessing memory past // C_buffer_ . - if (cpuinfo_has_x86_avx512f()) { + if (fbgemmHasAvx512Support()) { // TODO: avx512 path // Currently use avx2 code outputProcess_.template f<inst_set_t::avx2>( @@ -240,7 +240,7 @@ void ExecuteKernel< packedB_.lastBcol()}, ldc_, leadingDim); - } else if (cpuinfo_has_x86_avx2()) { + } else if (fbgemmHasAvx2Support()) { outputProcess_.template f<inst_set_t::avx2>( matC_, C_tile_, |