Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2019-02-01 22:50:44 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-02-01 22:53:50 +0300
commitd90dbbacf0307681fef2c7d5f86685c219f433c0 (patch)
treed6e16041eb915352a5860dffb432dde1173c6126 /src
parent3373890bdeb69129ae1f37d3e764bcbea462806d (diff)
make G slowest moving dim of packed weight of gconv (#62)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/62 This more naturally aligns with access pattern Reviewed By: dskhudia Differential Revision: D13833672 fbshipit-source-id: 2b383b5aaf2f60201c8d433f86b64c9cf32960a2
Diffstat (limited to 'src')
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc8
-rw-r--r--src/PackWeightMatrixForGConv.cc8
2 files changed, 9 insertions, 7 deletions
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index ce93b87..7906c04 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -185,7 +185,7 @@ void GenConvKernel<int32_t>::genForLoadingWeights<inst_set_t::avx2>(
WRegs_avx2_[r * S_ + s],
x86::dword_ptr(
wghts_R_,
- (r * S_ + s) * G_ * K_per_G_ * C_per_G_ * sizeof(int8_t)));
+ (r * S_ + s) * 2 * K_per_G_ * C_per_G_ * sizeof(int8_t)));
}
}
}
@@ -1387,7 +1387,8 @@ void fbgemmGroupwiseConvBase_(
fpConv(
actStartGroup,
- packed_weights.getBuf() + g * K_per_G * C_per_G,
+ packed_weights.getBuf() +
+ g * conv_param.K[0] * conv_param.K[1] * K_per_G * C_per_G,
currOutBuf,
a_zero_point,
H,
@@ -1574,7 +1575,8 @@ void fbgemmGroupwiseConv(
fpConv(
actStartGroup,
- packed_weights.getBuf() + g * K_per_G * C_per_G,
+ packed_weights.getBuf() +
+ g * conv_param.K[0] * conv_param.K[1] * K_per_G * C_per_G,
currOutBuf,
a_zero_point,
H,
diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc
index e6c9b7d..e38fba9 100644
--- a/src/PackWeightMatrixForGConv.cc
+++ b/src/PackWeightMatrixForGConv.cc
@@ -42,10 +42,10 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
* Let IC_per_G be number of input channels per group and OC_per_G be number of
* output channels per group.
*
- * For IC_per_G == 4 && OC_per_G == 4 optimized
+ * For IC_per_G == 4 && OC_per_G == 4 optimized
* kernel works on 2 groups at a time hence input channels for g and g+1 group
- * are laid out sequentially for each output channel, i.e., the layout is R S
- * (G/2) K (2C)
+ * are laid out sequentially for each output channel, i.e., the layout is (G/2)
+ * R S K (2C) and K (2C) is in each 32B vector.
* We work on two groups at a time to fully utilize the avx2 SIMD width of
* 256-bits.
*
@@ -78,7 +78,7 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
: sdata_
[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k];
pdata_
- [((((r * S + s) * (G / 2) + (g / 2)) * OC_per_G + k) * 2 +
+ [(((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 +
(g % 2)) *
IC_per_G +
c] = b;