sparse convolution output processing (#27)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/27 DoSpmdmOnInpBuffer can't be used together with PackAWithIm2Col because DoSpmdmOnInpBuffer expects im2col'ed A matrix. This diff implements DoSConvOnInpBuffer that does sparse convolution directly on A input without im2col. The performance is well optimized and need to see if this implementation is good enough to get good resnet50 performance. Reviewed By: dskhudia Differential Revision: D13192336 fbshipit-source-id: 2076555ba9749e111afbaec408a2bfa0f55bd5bc
author: Jongsoo Park <jongsoo@fb.com> 2018-11-29 06:41:59 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-11-29 06:44:08 +0300
commit: 027de07a11a0460fd1daffb026d50dba0e56eb79 (patch)
tree: 5e0f497059d6a22b18de8508c2031ffdbc9f52d3 /src/Fbgemm.cc
parent: 90535d3da35f9d3da6a8dbd62da0c68d01696924 (diff)
1 files changed, 25 insertions, 0 deletions
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc
index a8bf02f..6623fe7 100644
--- a/src/Fbgemm.cc
+++ b/src/Fbgemm.cc
@@ -376,6 +376,31 @@ INSTANTIATE_Q_GRANS(true);
 #undef INSTANTIATE_Q_GRANS
 #undef INSTANTIATE_BASE
 
+#define INSTANTIATE_BASE(RELU, Q_GRAN)                                        \
+  template void fbgemmPacked(                                                 \
+      PackMatrix<PackAWithIm2Col<uint8_t, int16_t>, uint8_t, int16_t>& packA, \
+      PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB,       \
+      uint8_t* C,                                                             \
+      int32_t* C_buffer,                                                      \
+      uint32_t ldc,                                                           \
+      const DoSConvOnInpBuffer<                                               \
+          uint8_t,                                                            \
+          int32_t,                                                            \
+          ReQuantizeOutput<RELU, Q_GRAN>>& outProcess,                        \
+      int thread_id,                                                          \
+      int num_threads);
+
+#define INSTANTIATE_Q_GRANS(RELU)                          \
+  INSTANTIATE_BASE(RELU, QuantizationGranularity::TENSOR); \
+  INSTANTIATE_BASE(RELU, QuantizationGranularity::GROUP);  \
+  INSTANTIATE_BASE(RELU, QuantizationGranularity::OUT_CHANNEL);
+
+INSTANTIATE_Q_GRANS(false);
+INSTANTIATE_Q_GRANS(true);
+
+#undef INSTANTIATE_Q_GRANS
+#undef INSTANTIATE_BASE
+
 template void fbgemmPacked(
     PackMatrix<PackAWithRowOffset<uint8_t, int16_t>, uint8_t, int16_t>& packA,
     PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB,
author	Jongsoo Park <jongsoo@fb.com>	2018-11-29 06:41:59 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-11-29 06:44:08 +0300
commit	027de07a11a0460fd1daffb026d50dba0e56eb79 (patch)
tree	5e0f497059d6a22b18de8508c2031ffdbc9f52d3 /src/Fbgemm.cc
parent	90535d3da35f9d3da6a8dbd62da0c68d01696924 (diff)