diff options
Diffstat (limited to 'src/Fbgemm.cc')
-rw-r--r-- | src/Fbgemm.cc | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc index 9195a05..f8f0d34 100644 --- a/src/Fbgemm.cc +++ b/src/Fbgemm.cc @@ -15,9 +15,9 @@ double computing_time = 0.0; double run_time = 0.0; #endif -using namespace fbgemm2; +using namespace fbgemm; -namespace fbgemm2 { +namespace fbgemm { template < typename packingAMatrix, @@ -246,6 +246,26 @@ template void fbgemmPacked( int num_threads); template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int32_t>, uint8_t, int32_t>& packA, + PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int32_t, 3>, uint8_t, int32_t>& packA, + PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( PackMatrix<PackAWithQuantRowOffset<uint8_t, int32_t>, uint8_t, int32_t>& packA, PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>& packB, @@ -361,6 +381,26 @@ template void fbgemmPacked( int num_threads); template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int16_t>, uint8_t, int16_t>& packA, + PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( + PackMatrix<PackAWithIm2Col<uint8_t, int16_t, 3>, uint8_t, int16_t>& packA, + PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, + uint8_t* C, + int32_t* C_buffer, + uint32_t ldc, + const ReQuantizeOutput<false>& outProcess, + int thread_id, + int num_threads); + +template void fbgemmPacked( PackMatrix<PackAMatrix<uint8_t, int16_t>, uint8_t, int16_t>& packA, PackMatrix<PackBMatrix<int8_t, int16_t>, int8_t, int16_t>& packB, int32_t* C, @@ -380,4 +420,4 @@ template void fbgemmPacked( int thread_id, int num_threads); -} // namespace fbgemm2 +} // namespace fbgemm |