Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2018-12-11 09:59:28 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-12-11 10:01:30 +0300
commitebbe4f4fca119e9787f47b769591643ddfc8c4a7 (patch)
tree6c5c19671f00aa00266b9bbf17e969f9fd42ffad /src/ExecuteKernelU8S8.cc
parent895646cfe2b68e42a506c49217a635270d42bd09 (diff)
instantiate more kernels for PackAmatrix (#47)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/47 PackAMatrix (compared to PackAWithRowOffset) can be a faster alternative when B_zero_point = 0 Reviewed By: jianyuh Differential Revision: D13413605 fbshipit-source-id: 2cac4560e8f166d19c58c65ae25400d1b0795b19
Diffstat (limited to 'src/ExecuteKernelU8S8.cc')
-rw-r--r--src/ExecuteKernelU8S8.cc39
1 files changed, 19 insertions, 20 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 152d7f1..3b6e059 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -243,25 +243,30 @@ void ExecuteKernel<
////////////////////////////////////////////////////////////////////////////////
// ReQuantizeOutput
-#define INSTANTIATE_BASE(ACC_T, RELU, Q_GRAN) \
- template class ExecuteKernel< \
- PackAWithRowOffset<uint8_t, ACC_T>, \
- PackBMatrix<int8_t, ACC_T>, \
- uint8_t, \
+#define INSTANTIATE_BASE(PACK_A, ACC_T, RELU, Q_GRAN) \
+ template class ExecuteKernel< \
+ PACK_A<uint8_t, ACC_T>, \
+ PackBMatrix<int8_t, ACC_T>, \
+ uint8_t, \
ReQuantizeOutput<RELU, Q_GRAN>>;
-#define INSTANTIATE_Q_GRANS(ACC_T, RELU) \
- INSTANTIATE_BASE(ACC_T, RELU, QuantizationGranularity::TENSOR); \
- INSTANTIATE_BASE(ACC_T, RELU, QuantizationGranularity::GROUP); \
- INSTANTIATE_BASE(ACC_T, RELU, QuantizationGranularity::OUT_CHANNEL);
+#define INSTANTIATE_Q_GRANS(PACK_A, ACC_T, RELU) \
+ INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::TENSOR); \
+ INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::GROUP); \
+ INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::OUT_CHANNEL);
-#define INSTANTIATE_RELU(ACC_T) \
- INSTANTIATE_Q_GRANS(ACC_T, false); \
- INSTANTIATE_Q_GRANS(ACC_T, true);
+#define INSTANTIATE_RELU(PACK_A, ACC_T) \
+ INSTANTIATE_Q_GRANS(PACK_A, ACC_T, false); \
+ INSTANTIATE_Q_GRANS(PACK_A, ACC_T, true);
-INSTANTIATE_RELU(int32_t);
-INSTANTIATE_RELU(int16_t);
+#define INSTANTIATE_ACC_T(PACK_A) \
+ INSTANTIATE_RELU(PACK_A, int32_t); \
+ INSTANTIATE_RELU(PACK_A, int16_t);
+
+INSTANTIATE_ACC_T(PackAMatrix);
+INSTANTIATE_ACC_T(PackAWithRowOffset);
+#undef INSTANTIATE_ACC_T
#undef INSTANTIATE_RELU
#undef INSTANTIATE_Q_GRANS
#undef INSTANTIATE_BASE
@@ -295,12 +300,6 @@ INSTANTIATE_RELU(int16_t);
#undef INSTANTIATE_Q_GRANS
#undef INSTANTIATE_BASE
-template class ExecuteKernel<
- PackAMatrix<uint8_t, int16_t>,
- PackBMatrix<int8_t, int16_t>,
- uint8_t,
- ReQuantizeOutput<false>>;
-
////////////////////////////////////////////////////////////////////////////////
// ReQuantizeForFloat
#define INSTANTIATE_BASE(PACK_A, RELU, Q_GRAN) \