diff options
author | Jongsoo Park <jongsoo@fb.com> | 2018-11-29 06:41:59 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-11-29 06:44:08 +0300 |
commit | 027de07a11a0460fd1daffb026d50dba0e56eb79 (patch) | |
tree | 5e0f497059d6a22b18de8508c2031ffdbc9f52d3 /src/Fbgemm.cc | |
parent | 90535d3da35f9d3da6a8dbd62da0c68d01696924 (diff) |
sparse convolution output processing (#27)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/27
DoSpmdmOnInpBuffer can't be used together with PackAWithIm2Col because DoSpmdmOnInpBuffer expects im2col'ed A matrix. This diff implements DoSConvOnInpBuffer that does sparse convolution directly on A input without im2col. The performance is well optimized and need to see if this implementation is good enough to get good resnet50 performance.
Reviewed By: dskhudia
Differential Revision: D13192336
fbshipit-source-id: 2076555ba9749e111afbaec408a2bfa0f55bd5bc
Diffstat (limited to 'src/Fbgemm.cc')
-rw-r--r-- | src/Fbgemm.cc | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc index a8bf02f..6623fe7 100644 --- a/src/Fbgemm.cc +++ b/src/Fbgemm.cc @@ -376,6 +376,31 @@ INSTANTIATE_Q_GRANS(true); #undef INSTANTIATE_Q_GRANS #undef INSTANTIATE_BASE +#define INSTANTIATE_BASE(RELU, Q_GRAN) \ + template void fbgemmPacked( \ + PackMatrix<PackAWithIm2Col<uint8_t, int16_t>, uint8_t, int16_t>& packA, \ + PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, \ + uint8_t* C, \ + int32_t* C_buffer, \ + uint32_t ldc, \ + const DoSConvOnInpBuffer< \ + uint8_t, \ + int32_t, \ + ReQuantizeOutput<RELU, Q_GRAN>>& outProcess, \ + int thread_id, \ + int num_threads); + +#define INSTANTIATE_Q_GRANS(RELU) \ + INSTANTIATE_BASE(RELU, QuantizationGranularity::TENSOR); \ + INSTANTIATE_BASE(RELU, QuantizationGranularity::GROUP); \ + INSTANTIATE_BASE(RELU, QuantizationGranularity::OUT_CHANNEL); + +INSTANTIATE_Q_GRANS(false); +INSTANTIATE_Q_GRANS(true); + +#undef INSTANTIATE_Q_GRANS +#undef INSTANTIATE_BASE + template void fbgemmPacked( PackMatrix<PackAWithRowOffset<uint8_t, int16_t>, uint8_t, int16_t>& packA, PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, |