Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2018-11-27 21:05:28 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-11-27 21:13:17 +0300
commitd4ee77f5a851879f4a778f122656158663b766b5 (patch)
tree1d1db56c63c55753bf237bc6fb4cd59810732001 /bench/Im2ColFusedRequantizeAcc16Benchmark.cc
parentdb52c82306e7aa10e2dde706b7205c30eec31cd5 (diff)
per-group and per-channel quantization (#14340)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14340 Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/25 Per-group and per-channel quantization in fbgemm This diff also cleans up explicit template instantiation using macro expansion This diff also changes randFill interface which was easy to make mistakes of generating integer random numbers for floating point vectors. Using this in DNNLOWP operators will be done in a separate diff. Reviewed By: dskhudia Differential Revision: D13176386 fbshipit-source-id: e46c53e31e21520bded71b8ed86e8b19e010e2dd
Diffstat (limited to 'bench/Im2ColFusedRequantizeAcc16Benchmark.cc')
-rw-r--r--bench/Im2ColFusedRequantizeAcc16Benchmark.cc37
1 files changed, 11 insertions, 26 deletions
diff --git a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
index 2115863..cb2edf5 100644
--- a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
+++ b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
@@ -125,43 +125,29 @@ void performance_test() {
chrono::time_point<chrono::high_resolution_clock> begin, end;
for (auto conv_p : shapes) {
- aligned_vector<float> Afp32(
- conv_p.MB * conv_p.IN_DIM[0] * conv_p.IN_DIM[1] * conv_p.IC, 0.0f);
aligned_vector<uint8_t> Aint8(
- conv_p.MB * conv_p.IN_DIM[0] * conv_p.IN_DIM[1] * conv_p.IC, 0);
-
+ conv_p.MB * conv_p.IN_DIM[0] * conv_p.IN_DIM[1] * conv_p.IC);
aligned_vector<uint8_t> Aint8_out(
conv_p.MB * conv_p.OUT_DIM[0] * conv_p.OUT_DIM[1] * conv_p.K[0] *
- conv_p.K[1] * conv_p.IC,
- 0);
+ conv_p.K[1] * conv_p.IC);
- aligned_vector<float> Bfp32(
- conv_p.K[0] * conv_p.K[1] * conv_p.IC * conv_p.OC, 0.0f);
aligned_vector<int8_t> Bint8(
- conv_p.K[0] * conv_p.K[1] * conv_p.IC * conv_p.OC, 0);
+ conv_p.K[0] * conv_p.K[1] * conv_p.IC * conv_p.OC);
aligned_vector<int32_t> Cint32_ref(
- conv_p.MB * conv_p.OUT_DIM[0] * conv_p.OUT_DIM[1] * conv_p.OC, 0);
-
- aligned_vector<int32_t> Cint32_fb(
- conv_p.MB * conv_p.OUT_DIM[0] * conv_p.OUT_DIM[1] * conv_p.OC, 0);
-
- aligned_vector<int32_t> Cint32_fb2(
- conv_p.MB * conv_p.OUT_DIM[0] * conv_p.OUT_DIM[1] * conv_p.OC, 0);
+ conv_p.MB * conv_p.OUT_DIM[0] * conv_p.OUT_DIM[1] * conv_p.OC);
+ aligned_vector<int32_t> Cint32_fb(Cint32_ref.size());
+ aligned_vector<int32_t> Cint32_fb2(Cint32_ref.size());
// A matrix (input activations)
- randFill(Afp32, 0, 5);
+ randFill<uint8_t>(Aint8, 0, 5);
int32_t Aint8_zero_point = 4;
- for (auto i = 0; i < Afp32.size(); ++i) {
- Aint8[i] = static_cast<uint8_t>(Afp32[i]);
- }
+ aligned_vector<float> Afp32(Aint8.begin(), Aint8.end());
// B matrix (weights)
- randFill(Bfp32, -4, 4);
+ randFill<int8_t>(Bint8, -4, 4);
// int32_t Bint8_zero_point = -3;
- for (auto i = 0; i < Bfp32.size(); ++i) {
- Bint8[i] = static_cast<int8_t>(Bfp32[i]);
- }
+ aligned_vector<float> Bfp32(Bint8.begin(), Bint8.end());
// reference implementation
conv_ref(
@@ -184,8 +170,7 @@ void performance_test() {
double ttot = 0.0;
string runType;
- vector<int32_t> row_offset_buf;
- row_offset_buf.resize(
+ vector<int32_t> row_offset_buf(
PackAWithIm2Col<uint8_t, int16_t>::rowOffsetBufferSize());
PackAWithIm2Col<uint8_t, int16_t> packA(