From 66b41357561f2ff9895d2b4638273f07c49dbe29 Mon Sep 17 00:00:00 2001
From: Jianyu Huang <jianyuhuang@fb.com>
Date: Wed, 6 Mar 2019 10:18:56 -0800
Subject: Add Avx512BW/VL/DQ check (#84)

Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/84

Add AVX512BW Check:
AVX-512 Byte and Word Instructions add support for for 8-bit and 16-bit integer operations such as vpmaddubsw.

Similarly, add AVX512VL/DQ check.

Reviewed By: jspark1105

Differential Revision: D14321050

fbshipit-source-id: bd34745fd488ce4efe3248aeb78c54e1c2d91d47
---
 src/ExecuteKernelU8S8.cc | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'src/ExecuteKernelU8S8.cc')

diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 0dfc531..f2b028d 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -42,7 +42,7 @@ ExecuteKernel<
       outputProcess_(outputProcess),
       thread_id_(thread_id),
       num_threads_(num_threads) {
-  if (cpuinfo_has_x86_avx512f()) {
+  if (fbgemmHasAvx512Support()) {
     mbSize_ = PackingTraits<
         int8_t,
         typename packingAMatrix::accType,
@@ -55,7 +55,7 @@ ExecuteKernel<
         int8_t,
         typename packingAMatrix::accType,
         inst_set_t::avx512>::NR;
-  } else if (cpuinfo_has_x86_avx2()) {
+  } else if (fbgemmHasAvx2Support()) {
     mbSize_ = PackingTraits<
         int8_t,
         typename packingAMatrix::accType,
@@ -101,14 +101,14 @@ void ExecuteKernel<
   typename BaseType::jit_micro_kernel_fp fn;
 
   if (cpuinfo_initialize()) {
-    if (cpuinfo_has_x86_avx512f()) {
+    if (fbgemmHasAvx512Support()) {
       fn = BaseType::template getOrCreate<inst_set_t::avx512>(
           accum,
           packed_rows_A,
           packedB_.blockColSize(),
           packedA_.numPackedCols(),
           nbSize_);
-    } else if (cpuinfo_has_x86_avx2()) {
+    } else if (fbgemmHasAvx2Support()) {
       fn = BaseType::template getOrCreate<inst_set_t::avx2>(
           accum,
           packed_rows_A,
@@ -135,10 +135,10 @@ void ExecuteKernel<
       int nc = ((packedB_.lastBcol() - 1) / nrSize_ + 1) * nrSize_;
       if (nc != nbSize_) {
         if (cpuinfo_initialize()) {
-          if (cpuinfo_has_x86_avx512f()) {
+          if (fbgemmHasAvx512Support()) {
             fn = BaseType::template getOrCreate<inst_set_t::avx512>(
                 accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
-          } else if (cpuinfo_has_x86_avx2()) {
+          } else if (fbgemmHasAvx2Support()) {
             fn = BaseType::template getOrCreate<inst_set_t::avx2>(
                 accum, packed_rows_A, nc, packedA_.numPackedCols(), nbSize_);
           } else {
@@ -203,7 +203,7 @@ void ExecuteKernel<
       int32_t nSize =
           C_buffer_start == C_tile_ ? jb * nbSize_ : packedB_.numCols();
       if (nSize) {
-        if (cpuinfo_has_x86_avx512f()) {
+        if (fbgemmHasAvx512Support()) {
           // TODO: avx512 path
           // Currently use avx2 code
           outputProcess_.template f<inst_set_t::avx2>(
@@ -212,7 +212,7 @@ void ExecuteKernel<
               {row_start_A, packed_rows_A, NDim * group, nSize},
               ldc_,
               ldc_);
-        } else if (cpuinfo_has_x86_avx2()) {
+        } else if (fbgemmHasAvx2Support()) {
           outputProcess_.template f<inst_set_t::avx2>(
               matC_,
               C_buffer_row_start,
@@ -228,7 +228,7 @@ void ExecuteKernel<
       if (C_buffer_start == C_tile_) {
         // When C_tile_ scratchpad was used to avoid accessing memory past
         // C_buffer_ .
-        if (cpuinfo_has_x86_avx512f()) {
+        if (fbgemmHasAvx512Support()) {
           // TODO: avx512 path
           // Currently use avx2 code
           outputProcess_.template f<inst_set_t::avx2>(
@@ -240,7 +240,7 @@ void ExecuteKernel<
                packedB_.lastBcol()},
               ldc_,
               leadingDim);
-        } else if (cpuinfo_has_x86_avx2()) {
+        } else if (fbgemmHasAvx2Support()) {
           outputProcess_.template f<inst_set_t::avx2>(
               matC_,
               C_tile_,
-- 
cgit v1.2.3