Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaewon Lee <jaewon@fb.com>2019-07-06 00:58:49 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-07-06 01:03:34 +0300
commit64a2c73a425e4113839f2c2b596ea28d632d20f6 (patch)
tree228cbd60a528309c4eb6884b03cb78ca8a73b7ba
parentb0cf97df8e2f368d8e0c1d2e9e1cacbd7638f79d (diff)
Implement ::unpack() for PackWeightMatrixForGConv
Summary: Implement ::unpack() for PackWeightMatrixForGConv. Unpack index calculation is the inverse of ::pack(). Reviewed By: dskhudia Differential Revision: D16085552 fbshipit-source-id: b8866365dc425fee2cb985b3e48c627198ebc29a
-rw-r--r--include/fbgemm/Fbgemm.h21
-rw-r--r--src/PackWeightMatrixForGConv.cc141
-rw-r--r--test/GConvTest.cc57
3 files changed, 190 insertions, 29 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index 721b12f..87f0907 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -504,6 +504,11 @@ class FBGEMM_API PackWeightMatrixForGConv {
void pack();
/**
+ * @brief Unpacks a pmat buffer into source matrix.
+ */
+ void unpack(T* origin_buf);
+
+ /**
* @brief Return packed data
*/
inpType* getBuf() {
@@ -522,6 +527,22 @@ class FBGEMM_API PackWeightMatrixForGConv {
const T* sdata_;
T* pdata_;
bool bufAllocatedHere_;
+
+ /**
+ * @brief Internal function performing both pack & unpack
+ */
+ void pack_unpack_(const T* src, T* dst, bool ispack);
+
+ /**
+ * @brief Get the index of the unpacked data
+ */
+ int unpacked_index_(int r, int s, int k, int g, int c, bool tr);
+
+ /**
+ * @brief Get the index of the packed data
+ */
+ int packed_index_(int r, int s, int k, int g, int c);
+
};
/**
diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc
index 0fb0e2c..ba6adf3 100644
--- a/src/PackWeightMatrixForGConv.cc
+++ b/src/PackWeightMatrixForGConv.cc
@@ -36,8 +36,61 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
}
/**
- * @brief Pack weight tensor in a suitable format required for the optimized
- * kernel.
+ * @brief Get the index of the unpacked data for a given <r, s, k, g, c, tr>
+ *
+ * Non-transposed: G (R S C/G) K/G
+ * Transposed: G K/G (R S C/G)
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpacked_index_(
+ int r, int s, int k, int g, int c, bool tr) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ if (tr) {
+ idx = (((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c;
+ } else {
+ idx = (((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k;
+ }
+ return idx;
+}
+
+/**
+ * @brief Get the index of the packed data for a given <r, s, k, g, c>
+ *
+ * The index may differ depending on IC_per_G.
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::packed_index_(
+ int r, int s, int k, int g, int c) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ // For IC_per_G == 4, we need to work on 2 groups at a time
+ if (IC_per_G == 4) {
+ idx = (((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 + (g % 2))
+ * IC_per_G + c;
+ } else {
+ idx = ((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) * OC_per_G + k)
+ * 4 + (c % 4);
+ }
+ return idx;
+}
+
+/**
+ * @ brief Pack or unpack matrix
*
* Let IC_per_G be number of input channels per group and OC_per_G be number of
* output channels per group.
@@ -53,15 +106,17 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
* on 2 groups at a time and full SIMD width can be efficiently utilized even
* while working on 1 group at a time.
* In this case, the layout is G (C/4) R S K 4
- */
+*/
+
template <typename T, typename accT, int SPATIAL_DIM>
-void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack_unpack_(
+ const T* src, T* dst, bool ispack) {
// filters are assumed to be in G RS C/G K/G format
int R = conv_param_.K[0];
int S = conv_param_.K[1];
int G = conv_param_.G;
- int IC_per_G = conv_param_.IC / conv_param_.G;
- int OC_per_G = conv_param_.OC / conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
// If transpose option is set, the weight matrix is in layout G K/G (R S C/G)
// instead of G (R S C/G) K/G
@@ -73,25 +128,13 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
- inpType b = tr
- ? sdata_
- [(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c]
- : sdata_
- [(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k];
- if (IC_per_G == 4) {
- // For IC_per_G == 4, we need to work on 2 groups at a time
- pdata_
- [(((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 +
- (g % 2)) *
- IC_per_G +
- c] = b;
+ int p_idx = packed_index_(r, s, k, g, c);
+ int up_idx = unpacked_index_(r, s, k, g, c, tr);
+ // Pack: src (unpacked) -> dst (packed)
+ if (ispack) {
+ dst[p_idx] = src[up_idx];
} else {
- pdata_
- [((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) *
- OC_per_G +
- k) *
- 4 +
- (c % 4)] = b;
+ dst[up_idx] = src[p_idx];
}
}
}
@@ -99,14 +142,54 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
}
}
} else {
+ // For pack & transposed, call transposeConvWeights()
+ // G K/G (R S C/G) => G (R S C/G) K/G
if (tr) {
- // conv_ref expects weights to be in G (R S C/G) K/G format
- transposeConvWeights(conv_param_, sdata_, pdata_);
+ if (ispack) {
+ transposeConvWeights(conv_param_, src, dst);
+ } else {
+ // TODO: Wrap this as a inverseTransposeConvWeights()?
+ // For unpack & transposed, call transposeConvWeights()
+ // G (R S C/G) K/G => G K/G (R S C/G)
+ for (int r = 0; r < R; ++r) {
+ for (int s = 0; s < S; ++s) {
+ for (int k = 0; k < OC_per_G; ++k) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < IC_per_G; ++c) {
+ dst[(((g * OC_per_G + k) * R + r) * S + s)
+ * IC_per_G + c] =
+ src[(((g * R + r) * S + s) * IC_per_G + c)
+ * OC_per_G + k];
+ }
+ }
+ }
+ }
+ }
+ } // end if(ispack)
} else {
// just copy the data for not supported cases
- memcpy(pdata_, sdata_, G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
- }
- }
+ memcpy(dst, src,
+ G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
+ } //end if(tr)
+ } // end if(fbgemmOptimizedGConv(conv_param_)
+}
+
+/**
+ * @brief Pack weight tensor in a suitable format required for the optimized
+ * kernel.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+ pack_unpack_(sdata_, pdata_, true);
+}
+
+/**
+ * @brief Unpack the packed weight tensor (for the optimized kernel)
+ * to the original form.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpack(T* origin_buf) {
+ pack_unpack_(const_cast<const T*>(pdata_), origin_buf, false);
}
template class PackWeightMatrixForGConv<int8_t, int32_t, 2>;
diff --git a/test/GConvTest.cc b/test/GConvTest.cc
index 84f0d52..0074535 100644
--- a/test/GConvTest.cc
+++ b/test/GConvTest.cc
@@ -43,6 +43,8 @@ class fbgemmGConvAcc32WithQuantGranularityTest
QuantizationGranularity,
bool,
bool>> {};
+class fbgemmGConvPackTest
+ : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
@@ -61,6 +63,13 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(qGranularityVals),
::testing::Bool(), // A symmetric
::testing::Bool())); // B symmetric
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ fbgemmGConvPackTest,
+ ::testing::Combine(
+ ::testing::Values(matrix_op_t::NoTranspose),
+ ::testing::ValuesIn(transposeVals)));
/**
* @brief Shapes for unit test.
*/
@@ -413,3 +422,51 @@ TEST_P(fbgemmGConvAcc32Test, NoRequantizeTest) {
static_cast<int32_t>(0));
} // for each shape
}
+
+/**
+ * @brief Unit test for packing and unpacking the weight tensor
+ */
+TEST_P(fbgemmGConvPackTest, PackUnpackTest) {
+ vector<conv_param_t<>> shapes(GetShapes_());
+ matrix_op_t atrans, btrans;
+ tie(atrans, btrans) = GetParam();
+
+ for (auto conv_p : shapes) {
+ int R = conv_p.K[0];
+ int S = conv_p.K[1];
+ int IC_per_G = conv_p.IC / conv_p.G;
+ int OC_per_G = conv_p.OC / conv_p.G;
+
+ // Weights -- test the packing/unpacking of only the weights
+ // when btrans == Transpose, the weight matrix is in layout G K/G (R S C/G)
+ // instead of G (R S C/G) K/G
+ int weight_len = R * S * conv_p.G * IC_per_G * OC_per_G;
+ aligned_vector<int8_t> Bint8(weight_len, 0);
+
+ // Random fill the weights
+ randFill<int8_t>(Bint8, -4, 4);
+
+ // Instantiate the object
+ PackWeightMatrixForGConv<int8_t> packedWeights(
+ btrans, conv_p, Bint8.data(), nullptr);
+
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(weight_len, 0);
+
+ // START Actual pack-unpack operations
+ // Perform packing first. This should populate pdata_ of packedWeights
+ packedWeights.pack();
+
+ // Next perform unpacking
+ packedWeights.unpack(unpack_buf.data());
+ // END actual pack-unpack operations
+
+ // Sanity check
+ for (int i = 0; i < weight_len; ++i) {
+ EXPECT_EQ(Bint8.data()[i], unpack_buf.data()[i])
+ << "Pack/Unpack results differ at index " << i
+ << ", Reference: " << static_cast<int> (Bint8.data()[i])
+ << ", Pack-Unpacked: " << static_cast<int> (unpack_buf.data()[i]);
+ }
+ } // for each shape
+}