Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2018-11-29 06:41:59 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-11-29 06:44:08 +0300
commit027de07a11a0460fd1daffb026d50dba0e56eb79 (patch)
tree5e0f497059d6a22b18de8508c2031ffdbc9f52d3 /src/ExecuteKernelU8S8.cc
parent90535d3da35f9d3da6a8dbd62da0c68d01696924 (diff)
sparse convolution output processing (#27)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/27 DoSpmdmOnInpBuffer can't be used together with PackAWithIm2Col because DoSpmdmOnInpBuffer expects im2col'ed A matrix. This diff implements DoSConvOnInpBuffer that does sparse convolution directly on A input without im2col. The performance is well optimized and need to see if this implementation is good enough to get good resnet50 performance. Reviewed By: dskhudia Differential Revision: D13192336 fbshipit-source-id: 2076555ba9749e111afbaec408a2bfa0f55bd5bc
Diffstat (limited to 'src/ExecuteKernelU8S8.cc')
-rw-r--r--src/ExecuteKernelU8S8.cc18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index f1ec882..152d7f1 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -381,6 +381,24 @@ INSTANTIATE_Q_GRANS(true);
#undef INSTANTIATE_Q_GRANS
#undef INSTANTIATE_BASE
+#define INSTANTIATE_BASE(RELU, Q_GRAN) \
+ template class ExecuteKernel< \
+ PackAWithIm2Col<uint8_t, int16_t>, \
+ PackBMatrix<int8_t, int16_t>, \
+ uint8_t, \
+ DoSConvOnInpBuffer<uint8_t, int32_t, ReQuantizeOutput<RELU, Q_GRAN>>>;
+
+#define INSTANTIATE_Q_GRANS(RELU) \
+ INSTANTIATE_BASE(RELU, QuantizationGranularity::TENSOR); \
+ INSTANTIATE_BASE(RELU, QuantizationGranularity::GROUP); \
+ INSTANTIATE_BASE(RELU, QuantizationGranularity::OUT_CHANNEL);
+
+INSTANTIATE_Q_GRANS(false);
+INSTANTIATE_Q_GRANS(true);
+
+#undef INSTANTIATE_Q_GRANS
+#undef INSTANTIATE_BASE
+
template class ExecuteKernel<
PackAWithRowOffset<uint8_t, int16_t>,
PackBMatrix<int8_t, int16_t>,