diff options
author | Jongsoo Park <jongsoo@fb.com> | 2018-11-27 21:05:28 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-11-27 21:13:17 +0300 |
commit | d4ee77f5a851879f4a778f122656158663b766b5 (patch) | |
tree | 1d1db56c63c55753bf237bc6fb4cd59810732001 /bench/PackedFloatInOutBenchmark.cc | |
parent | db52c82306e7aa10e2dde706b7205c30eec31cd5 (diff) |
per-group and per-channel quantization (#14340)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14340
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/25
Per-group and per-channel quantization in fbgemm
This diff also cleans up explicit template instantiation using macro expansion
This diff also changes randFill interface which was easy to make mistakes of generating integer random numbers for floating point vectors.
Using this in DNNLOWP operators will be done in a separate diff.
Reviewed By: dskhudia
Differential Revision: D13176386
fbshipit-source-id: e46c53e31e21520bded71b8ed86e8b19e010e2dd
Diffstat (limited to 'bench/PackedFloatInOutBenchmark.cc')
-rw-r--r-- | bench/PackedFloatInOutBenchmark.cc | 35 |
1 files changed, 17 insertions, 18 deletions
diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc index badbda0..79a750e 100644 --- a/bench/PackedFloatInOutBenchmark.cc +++ b/bench/PackedFloatInOutBenchmark.cc @@ -86,27 +86,27 @@ void performance_test() { int k = shape[2]; float alpha = 1.f, beta = 0.f; - aligned_vector<float> Afp32(m * k, 0.0f); - aligned_vector<uint8_t> Aint8(m * k, 0); + aligned_vector<float> Afp32(m * k); + aligned_vector<uint8_t> Aint8(Afp32.size()); - aligned_vector<float> Bfp32(k * n, 0.0f); - aligned_vector<int8_t> Bint8(k * n, 0); + aligned_vector<float> Bfp32(k * n); + aligned_vector<int8_t> Bint8(Bfp32.size()); - aligned_vector<float> Cfp32_mkl(m * n, 0.0f); - aligned_vector<float> Cfp32_fb(m * n, 0.0f); + aligned_vector<float> Cfp32_mkl(m * n); + aligned_vector<float> Cfp32_fb(Cfp32_mkl.size()); - aligned_vector<uint8_t> Cint8_fb(m * n, 0); - aligned_vector<int32_t> Cint32_buffer(m * n, 0); + aligned_vector<uint8_t> Cint8_fb(Cfp32_mkl.size()); + aligned_vector<int32_t> Cint32_buffer(Cfp32_mkl.size()); // A matrix - randFill(Aint8, 0, 255); + randFill<uint8_t>(Aint8, 0, 255); float Aint8_scale = 0.11; int32_t Aint8_zero_point = 43; for (auto i = 0; i < Afp32.size(); ++i) { Afp32[i] = Aint8_scale * (Aint8[i] - Aint8_zero_point); } - randFill(Bint8, -128, 127); + randFill<int8_t>(Bint8, -128, 127); avoidOverflow(m, n, k, Aint8.data(), Bint8.data()); float Bint8_scale = 0.49; @@ -116,10 +116,9 @@ void performance_test() { } // computing column offset - vector<int32_t> col_offsets; - col_offsets.resize(n); + vector<int32_t> col_offsets(n); col_offsets_with_zero_pt_s8acc32_ref( - k, n, n, Bint8.data(), Bint8_zero_point, col_offsets.data()); + k, n, n, Bint8.data(), &Bint8_zero_point, col_offsets.data(), n); double ttot = 0; std::string type; @@ -172,8 +171,7 @@ void performance_test() { // printMatrix(matrix_op_t::NoTranspose, col_offsets.data(), 1, n, n, "col // offsets before"); - vector<int32_t> row_offset_buf; - row_offset_buf.resize( + vector<int32_t> row_offset_buf( PackAWithQuantRowOffset<uint8_t>::rowOffsetBufferSize()); PackAWithQuantRowOffset<uint8_t> packAN( @@ -201,12 +199,13 @@ void performance_test() { ReQuantizeForFloat<false> outputProcObj( doNothingObj, Aint8_scale, - Bint8_scale, + &Bint8_scale, Aint8_zero_point, - Bint8_zero_point, + &Bint8_zero_point, packAN.getRowOffsetBuffer(), col_offsets.data(), - nullptr); + nullptr, + n); ttot = 0; type = "FBGEMM_i8_acc32"; |