Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/PackWeightMatrixForGConv.cc')
-rw-r--r--src/PackWeightMatrixForGConv.cc141
1 files changed, 112 insertions, 29 deletions
diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc
index 0fb0e2c..ba6adf3 100644
--- a/src/PackWeightMatrixForGConv.cc
+++ b/src/PackWeightMatrixForGConv.cc
@@ -36,8 +36,61 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
}
/**
- * @brief Pack weight tensor in a suitable format required for the optimized
- * kernel.
+ * @brief Get the index of the unpacked data for a given <r, s, k, g, c, tr>
+ *
+ * Non-transposed: G (R S C/G) K/G
+ * Transposed: G K/G (R S C/G)
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpacked_index_(
+ int r, int s, int k, int g, int c, bool tr) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ if (tr) {
+ idx = (((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c;
+ } else {
+ idx = (((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k;
+ }
+ return idx;
+}
+
+/**
+ * @brief Get the index of the packed data for a given <r, s, k, g, c>
+ *
+ * The index may differ depending on IC_per_G.
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::packed_index_(
+ int r, int s, int k, int g, int c) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ // For IC_per_G == 4, we need to work on 2 groups at a time
+ if (IC_per_G == 4) {
+ idx = (((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 + (g % 2))
+ * IC_per_G + c;
+ } else {
+ idx = ((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) * OC_per_G + k)
+ * 4 + (c % 4);
+ }
+ return idx;
+}
+
+/**
+ * @ brief Pack or unpack matrix
*
* Let IC_per_G be number of input channels per group and OC_per_G be number of
* output channels per group.
@@ -53,15 +106,17 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
* on 2 groups at a time and full SIMD width can be efficiently utilized even
* while working on 1 group at a time.
* In this case, the layout is G (C/4) R S K 4
- */
+*/
+
template <typename T, typename accT, int SPATIAL_DIM>
-void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack_unpack_(
+ const T* src, T* dst, bool ispack) {
// filters are assumed to be in G RS C/G K/G format
int R = conv_param_.K[0];
int S = conv_param_.K[1];
int G = conv_param_.G;
- int IC_per_G = conv_param_.IC / conv_param_.G;
- int OC_per_G = conv_param_.OC / conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
// If transpose option is set, the weight matrix is in layout G K/G (R S C/G)
// instead of G (R S C/G) K/G
@@ -73,25 +128,13 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
- inpType b = tr
- ? sdata_
- [(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c]
- : sdata_
- [(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k];
- if (IC_per_G == 4) {
- // For IC_per_G == 4, we need to work on 2 groups at a time
- pdata_
- [(((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 +
- (g % 2)) *
- IC_per_G +
- c] = b;
+ int p_idx = packed_index_(r, s, k, g, c);
+ int up_idx = unpacked_index_(r, s, k, g, c, tr);
+ // Pack: src (unpacked) -> dst (packed)
+ if (ispack) {
+ dst[p_idx] = src[up_idx];
} else {
- pdata_
- [((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) *
- OC_per_G +
- k) *
- 4 +
- (c % 4)] = b;
+ dst[up_idx] = src[p_idx];
}
}
}
@@ -99,14 +142,54 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
}
}
} else {
+ // For pack & transposed, call transposeConvWeights()
+ // G K/G (R S C/G) => G (R S C/G) K/G
if (tr) {
- // conv_ref expects weights to be in G (R S C/G) K/G format
- transposeConvWeights(conv_param_, sdata_, pdata_);
+ if (ispack) {
+ transposeConvWeights(conv_param_, src, dst);
+ } else {
+ // TODO: Wrap this as a inverseTransposeConvWeights()?
+ // For unpack & transposed, call transposeConvWeights()
+ // G (R S C/G) K/G => G K/G (R S C/G)
+ for (int r = 0; r < R; ++r) {
+ for (int s = 0; s < S; ++s) {
+ for (int k = 0; k < OC_per_G; ++k) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < IC_per_G; ++c) {
+ dst[(((g * OC_per_G + k) * R + r) * S + s)
+ * IC_per_G + c] =
+ src[(((g * R + r) * S + s) * IC_per_G + c)
+ * OC_per_G + k];
+ }
+ }
+ }
+ }
+ }
+ } // end if(ispack)
} else {
// just copy the data for not supported cases
- memcpy(pdata_, sdata_, G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
- }
- }
+ memcpy(dst, src,
+ G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
+ } //end if(tr)
+ } // end if(fbgemmOptimizedGConv(conv_param_)
+}
+
+/**
+ * @brief Pack weight tensor in a suitable format required for the optimized
+ * kernel.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+ pack_unpack_(sdata_, pdata_, true);
+}
+
+/**
+ * @brief Unpack the packed weight tensor (for the optimized kernel)
+ * to the original form.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpack(T* origin_buf) {
+ pack_unpack_(const_cast<const T*>(pdata_), origin_buf, false);
}
template class PackWeightMatrixForGConv<int8_t, int32_t, 2>;