diff options
author | Jianyu Huang <jianyuhuang@fb.com> | 2018-11-08 22:09:04 +0300 |
---|---|---|
committer | Jianyu Huang <jianyuhuang@fb.com> | 2018-11-08 22:09:04 +0300 |
commit | 428a0b6cede232eb5c4e9c3bbd8e9d74d8e34500 (patch) | |
tree | 8597c3316e00e00fa5d0fc8939c9ae5c518f767a /src/Fbgemm.cc | |
parent | d90e2e1659f9f991319d05bfc58640aeafa733aa (diff) |
Sync with internal copy: Asymmetric padding; fbgemm2 -> fbgemm
Diffstat (limited to 'src/Fbgemm.cc')
-rw-r--r-- | src/Fbgemm.cc | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc index 9195a05..f8f0d34 100644 --- a/src/Fbgemm.cc +++ b/src/Fbgemm.cc @@ -15,9 +15,9 @@ double computing_time = 0.0; double run_time = 0.0; #endif -using namespace fbgemm2; +using namespace fbgemm; -namespace fbgemm2 { +namespace fbgemm { template < typename packingAMatrix, @@ -246,6 +246,26 @@ template void fbgemmPacked( int num_threads); template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int32_t>, uint8_t, int32_t>& packA, + PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int32_t, 3>, uint8_t, int32_t>& packA, + PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( PackMatrix<PackAWithQuantRowOffset<uint8_t, int32_t>, uint8_t, int32_t>& packA, PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, @@ -361,6 +381,26 @@ template void fbgemmPacked( int num_threads); template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int16_t>, uint8_t, int16_t>& packA, + PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int16_t, 3>, uint8_t, int16_t>& packA, + PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( PackMatrix<PackAMatrix<uint8_t, int16_t>, uint8_t, int16_t>& packA, PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, int32_t* C, @@ -380,4 +420,4 @@ template void fbgemmPacked( int thread_id, int num_threads); -} // namespace fbgemm2 +} // namespace fbgemm |