diff options
author | Jaewon Lee <jaewon@fb.com> | 2019-07-06 00:58:49 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-07-06 01:03:34 +0300 |
commit | 64a2c73a425e4113839f2c2b596ea28d632d20f6 (patch) | |
tree | 228cbd60a528309c4eb6884b03cb78ca8a73b7ba | |
parent | b0cf97df8e2f368d8e0c1d2e9e1cacbd7638f79d (diff) |
Implement ::unpack() for PackWeightMatrixForGConv
Summary: Implement ::unpack() for PackWeightMatrixForGConv. Unpack index calculation is the inverse of ::pack().
Reviewed By: dskhudia
Differential Revision: D16085552
fbshipit-source-id: b8866365dc425fee2cb985b3e48c627198ebc29a
-rw-r--r-- | include/fbgemm/Fbgemm.h | 21 | ||||
-rw-r--r-- | src/PackWeightMatrixForGConv.cc | 141 | ||||
-rw-r--r-- | test/GConvTest.cc | 57 |
3 files changed, 190 insertions, 29 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h index 721b12f..87f0907 100644 --- a/include/fbgemm/Fbgemm.h +++ b/include/fbgemm/Fbgemm.h @@ -504,6 +504,11 @@ class FBGEMM_API PackWeightMatrixForGConv { void pack(); /** + * @brief Unpacks a pmat buffer into source matrix. + */ + void unpack(T* origin_buf); + + /** * @brief Return packed data */ inpType* getBuf() { @@ -522,6 +527,22 @@ class FBGEMM_API PackWeightMatrixForGConv { const T* sdata_; T* pdata_; bool bufAllocatedHere_; + + /** + * @brief Internal function performing both pack & unpack + */ + void pack_unpack_(const T* src, T* dst, bool ispack); + + /** + * @brief Get the index of the unpacked data + */ + int unpacked_index_(int r, int s, int k, int g, int c, bool tr); + + /** + * @brief Get the index of the packed data + */ + int packed_index_(int r, int s, int k, int g, int c); + }; /** diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc index 0fb0e2c..ba6adf3 100644 --- a/src/PackWeightMatrixForGConv.cc +++ b/src/PackWeightMatrixForGConv.cc @@ -36,8 +36,61 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv( } /** - * @brief Pack weight tensor in a suitable format required for the optimized - * kernel. + * @brief Get the index of the unpacked data for a given <r, s, k, g, c, tr> + * + * Non-transposed: G (R S C/G) K/G + * Transposed: G K/G (R S C/G) + * Using inline as this will be called frequently + */ +template <typename T, typename accT, int SPATIAL_DIM> +inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpacked_index_( + int r, int s, int k, int g, int c, bool tr) { + // Get the full dimensions + int R = conv_param_.K[0]; + int S = conv_param_.K[1]; + int G = conv_param_.G; + int IC_per_G = conv_param_.IC / G; + int OC_per_G = conv_param_.OC / G; + + int idx; + if (tr) { + idx = (((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c; + } else { + idx = (((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k; + } + return idx; +} + +/** + * @brief Get the index of the packed data for a given <r, s, k, g, c> + * + * The index may differ depending on IC_per_G. + * Using inline as this will be called frequently + */ +template <typename T, typename accT, int SPATIAL_DIM> +inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::packed_index_( + int r, int s, int k, int g, int c) { + // Get the full dimensions + int R = conv_param_.K[0]; + int S = conv_param_.K[1]; + int G = conv_param_.G; + int IC_per_G = conv_param_.IC / G; + int OC_per_G = conv_param_.OC / G; + + int idx; + // For IC_per_G == 4, we need to work on 2 groups at a time + if (IC_per_G == 4) { + idx = (((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 + (g % 2)) + * IC_per_G + c; + } else { + idx = ((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) * OC_per_G + k) + * 4 + (c % 4); + } + return idx; +} + +/** + * @ brief Pack or unpack matrix * * Let IC_per_G be number of input channels per group and OC_per_G be number of * output channels per group. @@ -53,15 +106,17 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv( * on 2 groups at a time and full SIMD width can be efficiently utilized even * while working on 1 group at a time. * In this case, the layout is G (C/4) R S K 4 - */ +*/ + template <typename T, typename accT, int SPATIAL_DIM> -void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() { +void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack_unpack_( + const T* src, T* dst, bool ispack) { // filters are assumed to be in G RS C/G K/G format int R = conv_param_.K[0]; int S = conv_param_.K[1]; int G = conv_param_.G; - int IC_per_G = conv_param_.IC / conv_param_.G; - int OC_per_G = conv_param_.OC / conv_param_.G; + int IC_per_G = conv_param_.IC / G; + int OC_per_G = conv_param_.OC / G; // If transpose option is set, the weight matrix is in layout G K/G (R S C/G) // instead of G (R S C/G) K/G @@ -73,25 +128,13 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() { for (int k = 0; k < OC_per_G; ++k) { for (int g = 0; g < G; ++g) { for (int c = 0; c < IC_per_G; ++c) { - inpType b = tr - ? sdata_ - [(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c] - : sdata_ - [(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k]; - if (IC_per_G == 4) { - // For IC_per_G == 4, we need to work on 2 groups at a time - pdata_ - [(((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 + - (g % 2)) * - IC_per_G + - c] = b; + int p_idx = packed_index_(r, s, k, g, c); + int up_idx = unpacked_index_(r, s, k, g, c, tr); + // Pack: src (unpacked) -> dst (packed) + if (ispack) { + dst[p_idx] = src[up_idx]; } else { - pdata_ - [((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) * - OC_per_G + - k) * - 4 + - (c % 4)] = b; + dst[up_idx] = src[p_idx]; } } } @@ -99,14 +142,54 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() { } } } else { + // For pack & transposed, call transposeConvWeights() + // G K/G (R S C/G) => G (R S C/G) K/G if (tr) { - // conv_ref expects weights to be in G (R S C/G) K/G format - transposeConvWeights(conv_param_, sdata_, pdata_); + if (ispack) { + transposeConvWeights(conv_param_, src, dst); + } else { + // TODO: Wrap this as a inverseTransposeConvWeights()? + // For unpack & transposed, call transposeConvWeights() + // G (R S C/G) K/G => G K/G (R S C/G) + for (int r = 0; r < R; ++r) { + for (int s = 0; s < S; ++s) { + for (int k = 0; k < OC_per_G; ++k) { + for (int g = 0; g < G; ++g) { + for (int c = 0; c < IC_per_G; ++c) { + dst[(((g * OC_per_G + k) * R + r) * S + s) + * IC_per_G + c] = + src[(((g * R + r) * S + s) * IC_per_G + c) + * OC_per_G + k]; + } + } + } + } + } + } // end if(ispack) } else { // just copy the data for not supported cases - memcpy(pdata_, sdata_, G * R * S * OC_per_G * IC_per_G * sizeof(inpType)); - } - } + memcpy(dst, src, + G * R * S * OC_per_G * IC_per_G * sizeof(inpType)); + } //end if(tr) + } // end if(fbgemmOptimizedGConv(conv_param_) +} + +/** + * @brief Pack weight tensor in a suitable format required for the optimized + * kernel. + */ +template <typename T, typename accT, int SPATIAL_DIM> +void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() { + pack_unpack_(sdata_, pdata_, true); +} + +/** + * @brief Unpack the packed weight tensor (for the optimized kernel) + * to the original form. + */ +template <typename T, typename accT, int SPATIAL_DIM> +void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpack(T* origin_buf) { + pack_unpack_(const_cast<const T*>(pdata_), origin_buf, false); } template class PackWeightMatrixForGConv<int8_t, int32_t, 2>; diff --git a/test/GConvTest.cc b/test/GConvTest.cc index 84f0d52..0074535 100644 --- a/test/GConvTest.cc +++ b/test/GConvTest.cc @@ -43,6 +43,8 @@ class fbgemmGConvAcc32WithQuantGranularityTest QuantizationGranularity, bool, bool>> {}; +class fbgemmGConvPackTest + : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t>> {}; }; // namespace INSTANTIATE_TEST_CASE_P( @@ -61,6 +63,13 @@ INSTANTIATE_TEST_CASE_P( ::testing::ValuesIn(qGranularityVals), ::testing::Bool(), // A symmetric ::testing::Bool())); // B symmetric + +INSTANTIATE_TEST_CASE_P( + InstantiationName, + fbgemmGConvPackTest, + ::testing::Combine( + ::testing::Values(matrix_op_t::NoTranspose), + ::testing::ValuesIn(transposeVals))); /** * @brief Shapes for unit test. */ @@ -413,3 +422,51 @@ TEST_P(fbgemmGConvAcc32Test, NoRequantizeTest) { static_cast<int32_t>(0)); } // for each shape } + +/** + * @brief Unit test for packing and unpacking the weight tensor + */ +TEST_P(fbgemmGConvPackTest, PackUnpackTest) { + vector<conv_param_t<>> shapes(GetShapes_()); + matrix_op_t atrans, btrans; + tie(atrans, btrans) = GetParam(); + + for (auto conv_p : shapes) { + int R = conv_p.K[0]; + int S = conv_p.K[1]; + int IC_per_G = conv_p.IC / conv_p.G; + int OC_per_G = conv_p.OC / conv_p.G; + + // Weights -- test the packing/unpacking of only the weights + // when btrans == Transpose, the weight matrix is in layout G K/G (R S C/G) + // instead of G (R S C/G) K/G + int weight_len = R * S * conv_p.G * IC_per_G * OC_per_G; + aligned_vector<int8_t> Bint8(weight_len, 0); + + // Random fill the weights + randFill<int8_t>(Bint8, -4, 4); + + // Instantiate the object + PackWeightMatrixForGConv<int8_t> packedWeights( + btrans, conv_p, Bint8.data(), nullptr); + + // Setup a buffer to get pack -> unpacked results + aligned_vector<int8_t> unpack_buf(weight_len, 0); + + // START Actual pack-unpack operations + // Perform packing first. This should populate pdata_ of packedWeights + packedWeights.pack(); + + // Next perform unpacking + packedWeights.unpack(unpack_buf.data()); + // END actual pack-unpack operations + + // Sanity check + for (int i = 0; i < weight_len; ++i) { + EXPECT_EQ(Bint8.data()[i], unpack_buf.data()[i]) + << "Pack/Unpack results differ at index " << i + << ", Reference: " << static_cast<int> (Bint8.data()[i]) + << ", Pack-Unpacked: " << static_cast<int> (unpack_buf.data()[i]); + } + } // for each shape +} |