Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2019-08-01 22:38:23 +0300
committerYoung Jin Kim <youki@microsoft.com>2019-08-01 22:38:23 +0300
commiteb8fede25bd048da6fd396654936703a474f0504 (patch)
tree943fd29e7e173fb1075b9886b0309765f5f4b114
parente4ed5196cbec0d0a485578996b09912e92927e02 (diff)
parentf712cb2328a2b29424bdaeecb9c0731da2cd997b (diff)
Merge upstream master
-rw-r--r--README.md3
-rw-r--r--bench/ConvUnifiedBenchmark.cc39
-rw-r--r--include/fbgemm/Fbgemm.h72
-rw-r--r--include/fbgemm/FbgemmI8DepthwiseAvx2.h20
-rw-r--r--include/fbgemm/Utils.h2
-rw-r--r--src/FbgemmConv.cc57
-rw-r--r--src/FbgemmI8DepthwiseAvx2.cc46
-rw-r--r--src/PackAMatrix.cc22
-rw-r--r--src/PackAWithIm2Col.cc21
-rw-r--r--src/PackAWithQuantRowOffset.cc25
-rw-r--r--src/PackAWithRowOffset.cc25
-rw-r--r--src/PackBMatrix.cc139
-rw-r--r--src/PackMatrix.cc40
-rw-r--r--src/PackWeightMatrixForGConv.cc141
-rw-r--r--src/PackWeightsForConv.cc68
-rw-r--r--src/RefImplementations.cc54
-rw-r--r--test/GConvTest.cc57
-rw-r--r--test/I8DepthwiseTest.cc69
-rw-r--r--test/PackedRequantizeAcc16Test.cc72
-rw-r--r--test/PackedRequantizeTest.cc70
-rw-r--r--test/QuantUtilsTest.cc183
-rw-r--r--test/UniConvPackingTest.cc148
-rw-r--r--test/UniConvTest.cc325
23 files changed, 1322 insertions, 376 deletions
diff --git a/README.md b/README.md
index 2335b81..d287c44 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,9 @@ General build instructions are as follows:
```
git clone --recursive https://github.com/pytorch/FBGEMM.git
cd FBGEMM
+# if you are updating an existing checkout
+git submodule sync
+git submodule update --init --recursive
mkdir build && cd build
cmake ..
make
diff --git a/bench/ConvUnifiedBenchmark.cc b/bench/ConvUnifiedBenchmark.cc
index 6bc2cf4..b450beb 100644
--- a/bench/ConvUnifiedBenchmark.cc
+++ b/bench/ConvUnifiedBenchmark.cc
@@ -26,16 +26,18 @@ using namespace fbgemm;
// 2D conv shapes
vector<conv_param_t<2>> shapes_2d = {
- // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w,
- // pad_h_top, pad_w_left, pad_h_bottom, pad_w_right
- // 2D convolutions
- // regular
- conv_param_t<>(1, 128, 128, {56, 56}, 1, {3, 3}, {1, 1}, {1, 1, 1, 1}),
- // groupwise
- conv_param_t<>(1, 128, 128, {56, 56}, 32, {3, 3}, {1, 1}, {1, 1, 1, 1}),
-
- // DW
- conv_param_t<>(1, 272, 272, {47, 125}, 272, {3, 3}, {1, 1}, {1, 1, 1, 1}),
+ // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w,
+ // pad_h_top, pad_w_left, pad_h_bottom, pad_w_right
+ // 2D convolutions
+ // regular
+ conv_param_t<>(1, 128, 128, {56, 56}, 1, {3, 3}, {1, 1}, {1, 1, 1, 1}),
+ // groupwise
+ conv_param_t<>(1, 128, 128, {56, 56}, 32, {3, 3}, {1, 1}, {1, 1, 1, 1}),
+ // DW
+ conv_param_t<>(1, 272, 272, {47, 125}, 272, {3, 3}, {1, 1}, {1, 1, 1, 1}),
+ // Pointwise
+ conv_param_t<>(1, 128, 128, {56, 56}, 1, {1, 1}, {1, 1}, {0, 0, 0, 0})
+
};
// 3D conv shapes
@@ -43,9 +45,11 @@ vector<conv_param_t<3>> shapes_3d = {
// MB, IC, OC, {IT, IH, IW}, G, {KT, KH, KW}, {stride_t, stride_h, stride_w},
// {pad_prev, pad_h_top, pad_w_left, pad_next, pad_h_bottom, pad_w_right}
// Regular
- conv_param_t<3>(1, 64, 64, {32, 56, 56}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}),
+ conv_param_t<3>(1, 64, 64, {8, 14, 14}, 1, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}),
// Depthwise
- conv_param_t<3>(1, 64, 64, {32, 56, 56}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1})
+ conv_param_t<3>(1, 64, 64, {8, 14, 14}, 64, {3, 3, 3}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}),
+ // Pointwise
+ conv_param_t<3>(1, 128, 128, {8, 14, 14}, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0})
};
template <int SPATIAL_DIM, typename Acc_t>
@@ -110,6 +114,9 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
aligned_vector<int8_t> Bint8(
kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G));
+ aligned_vector<int8_t> Bint8_tr(
+ kernel_dim * conv_p.IC * (conv_p.OC / conv_p.G));
+
int im_out_dim = accumulate(
conv_p.OUT_DIM.begin(), conv_p.OUT_DIM.end(), 1, multiplies<int>());
aligned_vector<int32_t> Cint32_ref(conv_p.MB * im_out_dim * conv_p.OC);
@@ -132,14 +139,14 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
randFill(C_multiplier, 0.1234f / 2, 0.1234f * 3 / 2);
int32_t C_zero_point = 5;
- aligned_vector<float> Bfp32(Bint8.begin(), Bint8.end());
-
// reference implementation
+ // conv_ref expects weights to be in G (R S C/G) K/G
+ transposeConvWeights<SPATIAL_DIM>(conv_p, Bint8.data(), Bint8_tr.data());
conv_ref(
conv_p,
Aint8.data(),
Aint8_zero_point,
- Bint8.data(),
+ Bint8_tr.data(),
Cint32_ref.data());
// matrix dimensions after im2col
@@ -162,7 +169,7 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
KDimPerGroup,
OC_per_G,
OC_per_G,
- Bint8.data() + g * KDimPerGroup * OC_per_G,
+ Bint8_tr.data() + g * KDimPerGroup * OC_per_G,
Bint8_zero_point.data(),
col_offsets.data() + g * OC_per_G,
conv_p.OC);
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index bdcf308..7f428ed 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -489,6 +489,15 @@ class FBGEMM_API PackBMatrix final
const T* smat_;
std::int32_t ld_;
std::int32_t row_interleave_;
+
+ /**
+ * @brief Internal function performing both pack & unpack
+ */
+ void pack_unpack_(
+ const block_type_t& block,
+ T* unpack_buf,
+ T* pack_buf,
+ bool ispack);
};
/**
@@ -521,6 +530,11 @@ class FBGEMM_API PackWeightMatrixForGConv {
void pack();
/**
+ * @brief Unpacks a pmat buffer into source matrix.
+ */
+ void unpack(T* origin_buf);
+
+ /**
* @brief Return packed data
*/
inpType* getBuf() {
@@ -543,6 +557,22 @@ class FBGEMM_API PackWeightMatrixForGConv {
const T* sdata_;
T* pdata_;
bool bufAllocatedHere_;
+
+ /**
+ * @brief Internal function performing both pack & unpack
+ */
+ void pack_unpack_(const T* src, T* dst, bool ispack);
+
+ /**
+ * @brief Get the index of the unpacked data
+ */
+ int unpacked_index_(int r, int s, int k, int g, int c, bool tr);
+
+ /**
+ * @brief Get the index of the packed data
+ */
+ int packed_index_(int r, int s, int k, int g, int c);
+
};
/**
@@ -588,7 +618,40 @@ class FBGEMM_API PackWeightsForConv {
return W_gconv_packed_;
}
+ std::shared_ptr<PackBMatrix<T, accT>> getPackedWForPointwise() {
+ return W_pointwise_packed_;
+ }
+
+ int inputChannels() {
+ return conv_param_.IC;
+ }
+
+ int outputChannels() {
+ return conv_param_.OC;
+ }
+
+ std::array<int, SPATIAL_DIM> kernelDims() {
+ return conv_param_.K;
+ }
+
+ int groups() {
+ return conv_param_.G;
+ }
+
+ /**
+ * @brief Returns true if the packed weights would work for the given
+ * convolution parameters, and false otherwise
+ */
+ bool isPackingCompliant(const conv_param_t<SPATIAL_DIM>& conv_p);
+
+ /**
+ * @brief Unpack packed matric into origin_buf (Used for the serialization to
+ * recover weight matrix).
+ */
+ void unpack(T* origin_buf);
+
private:
+ const conv_param_t<SPATIAL_DIM> conv_param_;
// Packed weights if we use im2col based convolution implementation
std::shared_ptr<PackBMatrix<T, accT>> W_im2col_packed_;
// Packed weights if we use 2D depthwise convolution implementation
@@ -599,6 +662,8 @@ class FBGEMM_API PackWeightsForConv {
// implementation
std::shared_ptr<PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>>
W_gconv_packed_;
+ // Packed weights if we use direct gemm for pointwise convolution
+ std::shared_ptr<PackBMatrix<T, accT>> W_pointwise_packed_;
};
/**
@@ -1374,6 +1439,13 @@ template <int SPATIAL_DIM>
FBGEMM_API bool fbgemmOptimizedGConv(const conv_param_t<SPATIAL_DIM>& conv_p);
/**
+ * @brief Is this convolution a direct matrix-matrix multiplication, i.e., 1x1
+ * (aka pointwise) with right paddings etc.?
+ */
+template <int SPATIAL_DIM>
+FBGEMM_API bool takePointWiseFastPath(const conv_param_t<SPATIAL_DIM>& conv_p);
+
+/**
* @brief Allocate __size bytes of uninitialized storage whose alignment is
* specified by __align.
*/
diff --git a/include/fbgemm/FbgemmI8DepthwiseAvx2.h b/include/fbgemm/FbgemmI8DepthwiseAvx2.h
index 069ff77..e7b0ec4 100644
--- a/include/fbgemm/FbgemmI8DepthwiseAvx2.h
+++ b/include/fbgemm/FbgemmI8DepthwiseAvx2.h
@@ -16,7 +16,7 @@ namespace fbgemm {
template <int KERNEL_PROD>
class FBGEMM_API PackedDepthWiseConvMatrix {
public:
- // smat in RSG layout
+ // smat in GRS layout
PackedDepthWiseConvMatrix(int K, const std::int8_t* smat);
virtual ~PackedDepthWiseConvMatrix();
@@ -24,6 +24,17 @@ class FBGEMM_API PackedDepthWiseConvMatrix {
return pmat_;
}
+ /**
+ * @brief Unpacks pmat_ into unpack_data.
+ * Used for recovering the weight matrix into the original format
+ */
+ void unpack(std::int8_t* unpacked_data);
+
+ /**
+ * @brief returns the index into pmat_ given the row and column for smat
+ */
+ int addr(int r, int c);
+
private:
int K_;
std::int8_t* pmat_;
@@ -31,6 +42,13 @@ class FBGEMM_API PackedDepthWiseConvMatrix {
using Packed3x3ConvMatrix = PackedDepthWiseConvMatrix<3 * 3>;
using Packed3x3x3ConvMatrix = PackedDepthWiseConvMatrix<3 * 3 * 3>;
+using Packed1ConvMatrix = PackedDepthWiseConvMatrix<1>;
+using Packed2ConvMatrix = PackedDepthWiseConvMatrix<2>;
+using Packed3ConvMatrix = PackedDepthWiseConvMatrix<3>;
+using Packed4ConvMatrix = PackedDepthWiseConvMatrix<4>;
+using Packed5ConvMatrix = PackedDepthWiseConvMatrix<5>;
+using Packed10ConvMatrix = PackedDepthWiseConvMatrix<10>;
+using Packed11ConvMatrix = PackedDepthWiseConvMatrix<11>;
/**
* Depth-wise 3x3 convolution with pad=1 and stride=1 and K a multiple of 8
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
index 68fe177..eac0bcd 100644
--- a/include/fbgemm/Utils.h
+++ b/include/fbgemm/Utils.h
@@ -44,7 +44,7 @@ enum class inst_set_t { anyarch, avx2, avx512 };
/**
* @brief Typed enum for optimized paths for convolutions
*/
-enum class optimized_conv_t { depthwise, groupwise, im2col };
+enum class optimized_conv_t { depthwise, groupwise, pointwise, im2col };
/**
* @brief Typed enum for implementation type.
diff --git a/src/FbgemmConv.cc b/src/FbgemmConv.cc
index 5db63f6..027e6c5 100644
--- a/src/FbgemmConv.cc
+++ b/src/FbgemmConv.cc
@@ -6,8 +6,9 @@
*/
#include <algorithm>
-#include <iostream>
+#include <numeric>
#include <vector>
+#include <functional>
#include "fbgemm/Fbgemm.h"
namespace fbgemm {
@@ -33,12 +34,24 @@ bool takeDepthWiseFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
});
}
+template <int SPATIAL_DIM>
+bool takePointWiseFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
+ return std::accumulate(conv_p.K.begin(), conv_p.K.end(), 0) == SPATIAL_DIM &&
+ std::accumulate(conv_p.stride.begin(), conv_p.stride.end(), 0) ==
+ SPATIAL_DIM &&
+ std::accumulate(conv_p.dilation.begin(), conv_p.dilation.end(), 0) ==
+ SPATIAL_DIM &&
+ std::accumulate(conv_p.pad.begin(), conv_p.pad.end(), 0) == 0;
+}
+
template <int SPATIAL_DIM, typename ACC_T>
optimized_conv_t ConvFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
if (takeDepthWiseFastPath<SPATIAL_DIM, ACC_T>(conv_p)) {
return optimized_conv_t::depthwise;
} else if (fbgemmOptimizedGConv<SPATIAL_DIM>(conv_p)) {
return optimized_conv_t::groupwise;
+ } else if (takePointWiseFastPath<SPATIAL_DIM>(conv_p)) {
+ return optimized_conv_t::pointwise;
} else {
return optimized_conv_t::im2col;
}
@@ -58,6 +71,13 @@ int fbgemmConv(
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3,
"Only 2D and 3D convolutions are supported");
+
+ if (!packed_weights.isPackingCompliant(conv_p)) {
+ throw std::logic_error(
+ "[FBGEMM_CONV_ERROR] Prepacked weights can't be used"
+ " with these convolution parameters!");
+ }
+
switch (ConvFastPath<SPATIAL_DIM, ACC_T>(conv_p)) {
case optimized_conv_t::depthwise: {
// 2D and 3D depthwise fast path
@@ -134,11 +154,44 @@ int fbgemmConv(
num_threads);
break;
}
+ case optimized_conv_t::pointwise: {
+ std::vector<int32_t> row_offset_buf(
+ PackAWithRowOffset<uint8_t>::rowOffsetBufferSize(blocking_params));
+ int image_dim = std::accumulate(
+ conv_p.IN_DIM.begin(),
+ conv_p.IN_DIM.end(),
+ 1,
+ std::multiplies<int>());
+ PackAWithRowOffset<uint8_t, ACC_T> packA(
+ matrix_op_t::NoTranspose,
+ conv_p.MB * image_dim,
+ conv_p.IC,
+ activations,
+ conv_p.IC,
+ nullptr,
+ conv_p.G,
+ row_offset_buf.data(),
+ blocking_params);
+
+ outProcess.setRowOffsets(row_offset_buf.data());
+ fbgemmPacked(
+ packA,
+ *(packed_weights.getPackedWForPointwise()),
+ out,
+ outBuffer,
+ conv_p.OC,
+ outProcess,
+ thread_id,
+ num_threads,
+ blocking_params);
+ break;
+ }
case optimized_conv_t::im2col: {
// All other convolutions go through im2col-based implementation
// std::cout << "Im2col path" << std::endl;
std::vector<int32_t> row_offset_buf(
- PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>::rowOffsetBufferSize());
+ PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>
+ ::rowOffsetBufferSize(blocking_params));
const std::int32_t* b_zero_point = outProcess.getBZeroPoint();
bool b_symmetric = b_zero_point[0] == 0;
diff --git a/src/FbgemmI8DepthwiseAvx2.cc b/src/FbgemmI8DepthwiseAvx2.cc
index f96d1d2..183a8a9 100644
--- a/src/FbgemmI8DepthwiseAvx2.cc
+++ b/src/FbgemmI8DepthwiseAvx2.cc
@@ -170,6 +170,45 @@ PackedDepthWiseConvMatrix<KERNEL_PROD>::PackedDepthWiseConvMatrix(
}
template <int KERNEL_PROD>
+int PackedDepthWiseConvMatrix<KERNEL_PROD>::addr(int r, int c) {
+ constexpr int KERNEL_PROD_ALIGNED = (KERNEL_PROD + 1) / 2 * 2;
+ if (c >= KERNEL_PROD / 4 * 4 &&
+ (KERNEL_PROD % 4 == 1 || KERNEL_PROD % 4 == 2)) {
+ int kBlock = r / 32;
+ int reg_idx = (r % 16) / 8 + c / 4 * 4;
+
+ int blk_idx = kBlock * KERNEL_PROD_ALIGNED + reg_idx;
+
+ int r_ = r % 8;
+ int c_ = c % 4;
+
+ int in_blk_idx = (r % 32) / 16 * 16 + 2 * r_ + c_;
+ return blk_idx * 32 + in_blk_idx;
+
+ } else {
+ int kBlock = r / 32;
+ int reg_idx = (r % 16) / 4 + c / 4 * 4;
+
+ int blk_idx = kBlock * KERNEL_PROD_ALIGNED + reg_idx;
+
+ int r_ = r % 4;
+ int c_ = c % 4;
+
+ int in_blk_idx = (r % 32) / 16 * 16 + 4 * r_ + c_;
+ return blk_idx * 32 + in_blk_idx;
+ }
+}
+
+template <int KERNEL_PROD>
+void PackedDepthWiseConvMatrix<KERNEL_PROD>::unpack(int8_t* unpacked_data) {
+ for (int r = 0; r < K_; ++r) {
+ for (int c = 0; c < KERNEL_PROD; ++c) {
+ unpacked_data[r * KERNEL_PROD + c] = pmat_[addr(r, c)];
+ }
+ }
+}
+
+template <int KERNEL_PROD>
PackedDepthWiseConvMatrix<KERNEL_PROD>::~PackedDepthWiseConvMatrix() {
#ifdef _MSC_VER
_aligned_free(pmat_);
@@ -180,6 +219,13 @@ PackedDepthWiseConvMatrix<KERNEL_PROD>::~PackedDepthWiseConvMatrix() {
template class PackedDepthWiseConvMatrix<3 * 3>;
template class PackedDepthWiseConvMatrix<3 * 3 * 3>;
+template class PackedDepthWiseConvMatrix<1>;
+template class PackedDepthWiseConvMatrix<2>;
+template class PackedDepthWiseConvMatrix<3>;
+template class PackedDepthWiseConvMatrix<4>;
+template class PackedDepthWiseConvMatrix<5>;
+template class PackedDepthWiseConvMatrix<5 * 2>;
+template class PackedDepthWiseConvMatrix<11 * 1>;
// c = a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3
// A is in uint8_t
diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc
index 87adaba..143e11d 100644
--- a/src/PackAMatrix.cc
+++ b/src/PackAMatrix.cc
@@ -34,31 +34,29 @@ PackAMatrix<T, accT>::PackAMatrix(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
+
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- BaseType::brow_ = params->MCB;
- BaseType::bcol_ = params->KCB;
- row_interleave_B_ = params->ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ BaseType::brow_ = params->MCB;
+ BaseType::bcol_ = params->KCB;
+ row_interleave_B_ = params->ROW_INTERLEAVE;
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
}
}
+
if (BaseType::numCols() % groups != 0) {
throw std::runtime_error(
"groups = " + std::to_string(groups) +
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index e55dd4e..d731654 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -49,32 +49,29 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- BaseType::brow_ = params->MCB;
- BaseType::bcol_ = params->KCB;
- row_interleave_B_ = params->ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ BaseType::brow_ = params->MCB;
+ BaseType::bcol_ = params->KCB;
+ row_interleave_B_ = params->ROW_INTERLEAVE;
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
}
}
+
if (BaseType::numCols() % conv_p.G != 0) {
throw std::runtime_error(
"groups = " + std::to_string(conv_p.G) +
diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc
index 305a298..52caed4 100644
--- a/src/PackAWithQuantRowOffset.cc
+++ b/src/PackAWithQuantRowOffset.cc
@@ -45,32 +45,31 @@ PackAWithQuantRowOffset<T, accT>::PackAWithQuantRowOffset(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
- rowOffsetAllocatedHere = false;
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
+
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- BaseType::brow_ = params->MCB;
- BaseType::bcol_ = params->KCB;
- row_interleave_B_ = params->ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ BaseType::brow_ = params->MCB;
+ BaseType::bcol_ = params->KCB;
+ row_interleave_B_ = params->ROW_INTERLEAVE;
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unknown architecure");
}
}
+
+ rowOffsetAllocatedHere = false;
+
if (BaseType::numCols() % groups != 0) {
throw std::runtime_error(
"groups = " + std::to_string(groups) +
diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
index b791817..733bf5c 100644
--- a/src/PackAWithRowOffset.cc
+++ b/src/PackAWithRowOffset.cc
@@ -39,32 +39,31 @@ PackAWithRowOffset<T, accT>::PackAWithRowOffset(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
- rowOffsetAllocatedHere = false;
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
+
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- BaseType::brow_ = params->MCB;
- BaseType::bcol_ = params->KCB;
- row_interleave_B_ = params->ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ BaseType::brow_ = params->MCB;
+ BaseType::bcol_ = params->KCB;
+ row_interleave_B_ = params->ROW_INTERLEAVE;
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::MCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
row_interleave_B_ =
PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unknown architecure");
}
}
+
+ rowOffsetAllocatedHere = false;
+
if (BaseType::numCols() % groups != 0) {
throw std::runtime_error(
"groups = " + std::to_string(groups) +
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
index 4d86d45..b19b5d4 100644
--- a/src/PackBMatrix.cc
+++ b/src/PackBMatrix.cc
@@ -188,31 +188,29 @@ PackBMatrix<T, accT>::PackBMatrix(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
+
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- BaseType::brow_ = params->KCB;
- BaseType::bcol_ = params->NCB;
- row_interleave_ = params->ROW_INTERLEAVE;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ BaseType::brow_ = params->KCB;
+ BaseType::bcol_ = params->NCB;
+ row_interleave_ = params->ROW_INTERLEAVE;
} else {
if (fbgemmHasAvx512Support()) {
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx512>::KCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx512>::NCB;
row_interleave_ =
PackingTraits<T, accT, inst_set_t::avx512>::ROW_INTERLEAVE;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
BaseType::brow_ = PackingTraits<T, accT, inst_set_t::avx2>::KCB;
BaseType::bcol_ = PackingTraits<T, accT, inst_set_t::avx2>::NCB;
row_interleave_ =
PackingTraits<T, accT, inst_set_t::avx2>::ROW_INTERLEAVE;
- } else {
- // Error
- assert(0 && "unknown architecure");
}
}
+
if (BaseType::numRows() % groups != 0) {
throw std::runtime_error(
"groups = " + std::to_string(groups) +
@@ -292,7 +290,11 @@ PackBMatrix<T, accT>::PackBMatrix(
}
template <typename T, typename accT>
-void PackBMatrix<T, accT>::pack(const block_type_t& block) {
+void PackBMatrix<T, accT>::pack_unpack_(
+ const block_type_t& block,
+ T* unpack_buf,
+ T* pack_buf,
+ bool ispack) {
assert((BaseType::blockRowSize() % row_interleave_) == 0);
assert((block.row_start % BaseType::blockRowSize()) == 0);
assert((block.col_start % BaseType::blockColSize()) == 0);
@@ -300,7 +302,7 @@ void PackBMatrix<T, accT>::pack(const block_type_t& block) {
BaseType::packedBlock(block);
bool tr = (trans_ == matrix_op_t::Transpose);
for (int g = 0; g < BaseType::numGroups(); ++g) {
- T* out = BaseType::getBuf() +
+ T* pack_buf_cur = pack_buf +
g * BaseType::packedBufferSize(block.row_size, block.col_size);
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
@@ -326,10 +328,16 @@ void PackBMatrix<T, accT>::pack(const block_type_t& block) {
c_blk_offset * BaseType::blockRowSize() * BaseType::blockColSize() +
c_idx_offset * row_interleave_;
- int out_idx = r_offset + c_offset;
- T val = tr ? smat_[i + (g * block.col_size + j) * ld_]
- : smat_[(g * block.row_size + i) * ld_ + j];
- out[out_idx] = val;
+ if (ispack) {
+ pack_buf_cur[r_offset + c_offset] = tr
+ ? unpack_buf[i + (g * block.col_size + j) * ld_]
+ : unpack_buf[(g * block.row_size + i) * ld_ + j];
+ } else {
+ T* unpack_buf_cur = tr
+ ? &(unpack_buf[i + (g * block.col_size + j) * ld_])
+ : &(unpack_buf[(g * block.row_size + i) * ld_ + j]);
+ *unpack_buf_cur = pack_buf_cur[r_offset + c_offset];
+ }
c_idx_offset++;
if (c_idx_offset == BaseType::blockColSize()) {
@@ -338,78 +346,45 @@ void PackBMatrix<T, accT>::pack(const block_type_t& block) {
}
}
}
- // fill the remaining with zero.
- // Please see the comment in PackAMatrix.cc on zero vs zero_pt fill.
- for (int i = block.row_start + block.row_size;
- i < (block.row_start + block.row_size + row_interleave_ - 1) /
- row_interleave_ * row_interleave_;
- ++i) {
- int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
- (BaseType::blockRowSize() * BaseType::blockColSize()) +
- (i % BaseType::blockRowSize() / row_interleave_) *
- BaseType::blockColSize() * row_interleave_ +
- i % row_interleave_;
- for (int j = block.col_start; j < block.col_start + block.col_size; j++) {
- int c_offset = (j / BaseType::blockColSize()) *
- BaseType::blockRowSize() * BaseType::blockColSize() +
- (j % BaseType::blockColSize()) * row_interleave_;
+ if (ispack) {
+ // fill the remaining with zero.
+ // Please see the comment in PackAMatrix.cc on zero vs zero_pt fill.
+ for (int i = block.row_start + block.row_size;
+ i < (block.row_start + block.row_size + row_interleave_ - 1) /
+ row_interleave_ * row_interleave_;
+ ++i) {
+ int r_offset =
+ ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
+ (BaseType::blockRowSize() * BaseType::blockColSize()) +
+ (i % BaseType::blockRowSize() / row_interleave_) *
+ BaseType::blockColSize() * row_interleave_ +
+ i % row_interleave_;
+ for (int j = block.col_start; j < block.col_start + block.col_size;
+ j++) {
+ int c_offset = (j / BaseType::blockColSize()) *
+ BaseType::blockRowSize() * BaseType::blockColSize() +
+ (j % BaseType::blockColSize()) * row_interleave_;
- int out_idx = r_offset + c_offset;
- out[out_idx] = 0;
+ int out_idx = r_offset + c_offset;
+ pack_buf_cur[out_idx] = 0;
+ }
}
}
} // for each group
}
template <typename T, typename accT>
-void PackBMatrix<T, accT>::unpack(T* origin_buf) {
- bool tr = (trans_ == matrix_op_t::Transpose);
- for (int g = 0; g < this->numGroups(); ++g) {
- T* out = BaseType::getBuf() +
- g *
- BaseType::packedBufferSize(
- BaseType::numPackedRows(), BaseType::numPackedCols());
- for (int i = BaseType::packedRowStart();
- i < BaseType::packedRowStart() + BaseType::numPackedRows();
- ++i) {
- int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
- (BaseType::blockRowSize() * BaseType::blockColSize()) +
- (i % BaseType::blockRowSize() / row_interleave_) *
- BaseType::blockColSize() * row_interleave_ +
- i % row_interleave_;
-
- int c_start_offset =
- (BaseType::packedColStart() / BaseType::blockColSize()) *
- BaseType::blockRowSize() * BaseType::blockColSize() +
- (BaseType::packedColStart() % BaseType::blockColSize()) *
- row_interleave_;
-
- int c_idx_offset = 0;
- int c_blk_offset = 0;
- for (int j = BaseType::packedColStart();
- j < BaseType::packedColStart() + BaseType::numPackedCols();
- ++j) {
- int c_offset = c_start_offset +
- c_blk_offset * BaseType::blockRowSize() * BaseType::blockColSize() +
- c_idx_offset * row_interleave_;
-
- int out_idx = r_offset + c_offset;
-
- T val = out[out_idx];
- if (tr) {
- origin_buf[i + (g * BaseType::numPackedCols() + j) * ld_] = val;
- } else {
- origin_buf[(g * BaseType::numPackedRows() + i) * ld_ + j] = val;
- }
+void PackBMatrix<T, accT>::pack(const block_type_t& block) {
+ pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true);
+}
- c_idx_offset++;
- if (c_idx_offset == BaseType::blockColSize()) {
- c_idx_offset = 0;
- c_blk_offset++;
- }
- }
- }
- } // for each group
+template <typename T, typename accT>
+void PackBMatrix<T, accT>::unpack(T* origin_buf) {
+ block_type_t blockB{BaseType::packedRowStart(),
+ BaseType::numPackedRows(),
+ BaseType::packedColStart(),
+ BaseType::numPackedCols()};
+ pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false);
}
template <typename T, typename accT>
diff --git a/src/PackMatrix.cc b/src/PackMatrix.cc
index 33227fb..c7503dd 100644
--- a/src/PackMatrix.cc
+++ b/src/PackMatrix.cc
@@ -36,45 +36,37 @@ int PackMatrix<PT, inpType, accType>::packedBufferSize(
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
+ if ((!fbgemmHasAvx512Support() && !fbgemmHasAvx2Support())) {
+ assert(0 && "unknown architecure");
+ }
+
int MCB, KCB, NCB;
if (params) {
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- MCB = params->MCB;
- NCB = params->NCB;
- KCB = params->KCB;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- }
+ MCB = params->MCB;
+ NCB = params->NCB;
+ KCB = params->KCB;
} else {
if (fbgemmHasAvx512Support()) {
MCB = PackingTraits<inpType, accType, inst_set_t::avx512>::MCB;
NCB = PackingTraits<inpType, accType, inst_set_t::avx512>::NCB;
KCB = PackingTraits<inpType, accType, inst_set_t::avx512>::KCB;
- } else if (fbgemmHasAvx2Support()) {
+ } else {
+ // AVX2
MCB = PackingTraits<inpType, accType, inst_set_t::avx2>::MCB;
NCB = PackingTraits<inpType, accType, inst_set_t::avx2>::NCB;
KCB = PackingTraits<inpType, accType, inst_set_t::avx2>::KCB;
- } else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
- return -1;
}
}
- if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
- if (isA()) {
- return MCB * KCB;
- } else {
- int rowBlock = KCB;
- int colBlock = NCB;
- return (((rows + rowBlock - 1) / rowBlock) * rowBlock) *
- (((cols + colBlock - 1) / colBlock) * colBlock);
- }
+ if (isA()) {
+ return MCB * KCB;
} else {
- // TODO: Have default slower path
- assert(0 && "unsupported architecure");
+ int rowBlock = KCB;
+ int colBlock = NCB;
+ return (((rows + rowBlock - 1) / rowBlock) * rowBlock) *
+ (((cols + colBlock - 1) / colBlock) * colBlock);
}
+
return -1;
}
diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc
index 0fb0e2c..ba6adf3 100644
--- a/src/PackWeightMatrixForGConv.cc
+++ b/src/PackWeightMatrixForGConv.cc
@@ -36,8 +36,61 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
}
/**
- * @brief Pack weight tensor in a suitable format required for the optimized
- * kernel.
+ * @brief Get the index of the unpacked data for a given <r, s, k, g, c, tr>
+ *
+ * Non-transposed: G (R S C/G) K/G
+ * Transposed: G K/G (R S C/G)
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpacked_index_(
+ int r, int s, int k, int g, int c, bool tr) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ if (tr) {
+ idx = (((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c;
+ } else {
+ idx = (((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k;
+ }
+ return idx;
+}
+
+/**
+ * @brief Get the index of the packed data for a given <r, s, k, g, c>
+ *
+ * The index may differ depending on IC_per_G.
+ * Using inline as this will be called frequently
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::packed_index_(
+ int r, int s, int k, int g, int c) {
+ // Get the full dimensions
+ int R = conv_param_.K[0];
+ int S = conv_param_.K[1];
+ int G = conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
+
+ int idx;
+ // For IC_per_G == 4, we need to work on 2 groups at a time
+ if (IC_per_G == 4) {
+ idx = (((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 + (g % 2))
+ * IC_per_G + c;
+ } else {
+ idx = ((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) * OC_per_G + k)
+ * 4 + (c % 4);
+ }
+ return idx;
+}
+
+/**
+ * @ brief Pack or unpack matrix
*
* Let IC_per_G be number of input channels per group and OC_per_G be number of
* output channels per group.
@@ -53,15 +106,17 @@ PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::PackWeightMatrixForGConv(
* on 2 groups at a time and full SIMD width can be efficiently utilized even
* while working on 1 group at a time.
* In this case, the layout is G (C/4) R S K 4
- */
+*/
+
template <typename T, typename accT, int SPATIAL_DIM>
-void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack_unpack_(
+ const T* src, T* dst, bool ispack) {
// filters are assumed to be in G RS C/G K/G format
int R = conv_param_.K[0];
int S = conv_param_.K[1];
int G = conv_param_.G;
- int IC_per_G = conv_param_.IC / conv_param_.G;
- int OC_per_G = conv_param_.OC / conv_param_.G;
+ int IC_per_G = conv_param_.IC / G;
+ int OC_per_G = conv_param_.OC / G;
// If transpose option is set, the weight matrix is in layout G K/G (R S C/G)
// instead of G (R S C/G) K/G
@@ -73,25 +128,13 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
for (int k = 0; k < OC_per_G; ++k) {
for (int g = 0; g < G; ++g) {
for (int c = 0; c < IC_per_G; ++c) {
- inpType b = tr
- ? sdata_
- [(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c]
- : sdata_
- [(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k];
- if (IC_per_G == 4) {
- // For IC_per_G == 4, we need to work on 2 groups at a time
- pdata_
- [(((((g / 2) * R + r) * S + s) * OC_per_G + k) * 2 +
- (g % 2)) *
- IC_per_G +
- c] = b;
+ int p_idx = packed_index_(r, s, k, g, c);
+ int up_idx = unpacked_index_(r, s, k, g, c, tr);
+ // Pack: src (unpacked) -> dst (packed)
+ if (ispack) {
+ dst[p_idx] = src[up_idx];
} else {
- pdata_
- [((((g * (IC_per_G / 4) + (c / 4)) * R + r) * S + s) *
- OC_per_G +
- k) *
- 4 +
- (c % 4)] = b;
+ dst[up_idx] = src[p_idx];
}
}
}
@@ -99,14 +142,54 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
}
}
} else {
+ // For pack & transposed, call transposeConvWeights()
+ // G K/G (R S C/G) => G (R S C/G) K/G
if (tr) {
- // conv_ref expects weights to be in G (R S C/G) K/G format
- transposeConvWeights(conv_param_, sdata_, pdata_);
+ if (ispack) {
+ transposeConvWeights(conv_param_, src, dst);
+ } else {
+ // TODO: Wrap this as a inverseTransposeConvWeights()?
+ // For unpack & transposed, call transposeConvWeights()
+ // G (R S C/G) K/G => G K/G (R S C/G)
+ for (int r = 0; r < R; ++r) {
+ for (int s = 0; s < S; ++s) {
+ for (int k = 0; k < OC_per_G; ++k) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < IC_per_G; ++c) {
+ dst[(((g * OC_per_G + k) * R + r) * S + s)
+ * IC_per_G + c] =
+ src[(((g * R + r) * S + s) * IC_per_G + c)
+ * OC_per_G + k];
+ }
+ }
+ }
+ }
+ }
+ } // end if(ispack)
} else {
// just copy the data for not supported cases
- memcpy(pdata_, sdata_, G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
- }
- }
+ memcpy(dst, src,
+ G * R * S * OC_per_G * IC_per_G * sizeof(inpType));
+ } //end if(tr)
+ } // end if(fbgemmOptimizedGConv(conv_param_)
+}
+
+/**
+ * @brief Pack weight tensor in a suitable format required for the optimized
+ * kernel.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack() {
+ pack_unpack_(sdata_, pdata_, true);
+}
+
+/**
+ * @brief Unpack the packed weight tensor (for the optimized kernel)
+ * to the original form.
+ */
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpack(T* origin_buf) {
+ pack_unpack_(const_cast<const T*>(pdata_), origin_buf, false);
}
template class PackWeightMatrixForGConv<int8_t, int32_t, 2>;
diff --git a/src/PackWeightsForConv.cc b/src/PackWeightsForConv.cc
index c811144..25b04af 100644
--- a/src/PackWeightsForConv.cc
+++ b/src/PackWeightsForConv.cc
@@ -4,6 +4,7 @@
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
+#include <algorithm>
#include <memory>
#include "fbgemm/Fbgemm.h"
@@ -13,7 +14,8 @@ template <int SPATIAL_DIM, typename T, typename accT>
PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
const conv_param_t<SPATIAL_DIM>& conv_p,
const T* sdata,
- const BlockingFactors* blocking_params) {
+ const BlockingFactors* blocking_params)
+ : conv_param_(conv_p) {
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3,
"Only 2D and 3D convolutions are supported");
@@ -42,18 +44,36 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
W_dw_3D_packed_ = nullptr;
W_gconv_packed_ =
std::make_shared<PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>>(
- matrix_op_t::NoTranspose, conv_p, sdata, nullptr);
+ matrix_op_t::Transpose, conv_p, sdata, nullptr);
+ break;
+ }
+ case optimized_conv_t::pointwise: {
+ W_im2col_packed_ = nullptr;
+ W_dw_2D_packed_ = nullptr;
+ W_dw_3D_packed_ = nullptr;
+ W_gconv_packed_ = nullptr;
+ int NDim = conv_p.OC / conv_p.G;
+ int KDim = conv_p.K[0] * conv_p.K[1] * conv_p.IC;
+ W_pointwise_packed_ = std::make_shared<PackBMatrix<T, accT>>(
+ matrix_op_t::Transpose,
+ KDim,
+ NDim,
+ sdata,
+ KDim / conv_p.G,
+ nullptr,
+ conv_p.G,
+ blocking_params);
break;
}
case optimized_conv_t::im2col: {
int NDim = conv_p.OC / conv_p.G;
int KDim = conv_p.K[0] * conv_p.K[1] * conv_p.IC;
W_im2col_packed_ = std::make_shared<PackBMatrix<T, accT>>(
- matrix_op_t::NoTranspose,
+ matrix_op_t::Transpose,
KDim,
NDim,
sdata,
- NDim,
+ KDim / conv_p.G,
nullptr,
conv_p.G,
blocking_params);
@@ -65,6 +85,46 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
} // switch
}
+template <int SPATIAL_DIM, typename T, typename accT>
+void PackWeightsForConv<SPATIAL_DIM, T, accT>::unpack(T* origin_buf) {
+ if (W_dw_2D_packed_) {
+ W_dw_2D_packed_->unpack(origin_buf);
+ } else if (W_dw_3D_packed_) {
+ W_dw_3D_packed_->unpack(origin_buf);
+ } else if (W_gconv_packed_) {
+ W_gconv_packed_->unpack(origin_buf);
+ } else if (W_im2col_packed_) {
+ W_im2col_packed_->unpack(origin_buf);
+ } else if (W_pointwise_packed_) {
+ W_pointwise_packed_->unpack(origin_buf);
+ } else {
+ assert(false && "At least one packed weights object should exist");
+ }
+}
+
+template <int SPATIAL_DIM, typename T, typename accT>
+bool PackWeightsForConv<SPATIAL_DIM, T, accT>::isPackingCompliant(
+ const conv_param_t<SPATIAL_DIM>& test_conv_p) {
+ return conv_param_.IC == test_conv_p.IC && conv_param_.OC == test_conv_p.OC &&
+ conv_param_.G == test_conv_p.G &&
+ std::equal(
+ conv_param_.K.begin(),
+ conv_param_.K.end(),
+ test_conv_p.K.begin()) &&
+ std::equal(
+ conv_param_.stride.begin(),
+ conv_param_.stride.end(),
+ test_conv_p.stride.begin()) &&
+ std::equal(
+ conv_param_.pad.begin(),
+ conv_param_.pad.end(),
+ test_conv_p.pad.begin()) &&
+ std::equal(
+ conv_param_.dilation.begin(),
+ conv_param_.dilation.end(),
+ test_conv_p.dilation.begin());
+}
+
template class PackWeightsForConv<2, int8_t, int32_t>;
template class PackWeightsForConv<3, int8_t, int32_t>;
diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc
index b4b0c2b..e3c0eac 100644
--- a/src/RefImplementations.cc
+++ b/src/RefImplementations.cc
@@ -181,8 +181,7 @@ void cblas_sgemm_ref(
int ldb,
float beta,
float* Cfp32,
- int ldc
- ) {
+ int ldc) {
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float sum = 0;
@@ -204,7 +203,6 @@ void cblas_sgemm_ref(
}
}
-
void row_offsets_u8acc32_ref(
int M,
int K,
@@ -542,21 +540,49 @@ void transposeConvWeights(
const conv_param_t<SPATIAL_DIM>& conv_p,
const std::int8_t* src,
std::int8_t* dest) {
- assert(SPATIAL_DIM == 2 && "Only 2D supported currently");
- int R = conv_p.K[0];
- int S = conv_p.K[1];
int G = conv_p.G;
int IC_per_G = conv_p.IC / conv_p.G;
int OC_per_G = conv_p.OC / conv_p.G;
- // Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format.
- for (int r = 0; r < R; ++r) {
- for (int s = 0; s < S; ++s) {
- for (int k = 0; k < OC_per_G; ++k) {
- for (int g = 0; g < G; ++g) {
- for (int c = 0; c < IC_per_G; ++c) {
- dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] =
- src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c];
+ assert(
+ (SPATIAL_DIM == 3 || SPATIAL_DIM == 2) &&
+ "Only 2D and 3D convolutions are supported");
+ if (SPATIAL_DIM == 2) {
+ int R = conv_p.K[0];
+ int S = conv_p.K[1];
+ // Transforms weights from G K/G (R S C/G) to G (R S C/G) K/G format.
+ for (int r = 0; r < R; ++r) {
+ for (int s = 0; s < S; ++s) {
+ for (int k = 0; k < OC_per_G; ++k) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < IC_per_G; ++c) {
+ dest[(((g * R + r) * S + s) * IC_per_G + c) * OC_per_G + k] =
+ src[(((g * OC_per_G + k) * R + r) * S + s) * IC_per_G + c];
+ }
+ }
+ }
+ }
+ }
+ } else {
+ // Transforms weights from G K/G (T R S C/G) to G (T R S C/G) K/G format.
+ int T = conv_p.K[0];
+ int R = conv_p.K[1];
+ int S = conv_p.K[2];
+ for (int t = 0; t < T; ++t) {
+ for (int r = 0; r < R; ++r) {
+ for (int s = 0; s < S; ++s) {
+ for (int k = 0; k < OC_per_G; ++k) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < IC_per_G; ++c) {
+ dest
+ [((((g * T + t) * R + r) * S + s) * IC_per_G + c) *
+ OC_per_G +
+ k] =
+ src[((((g * OC_per_G + k) * T + t) * R + r) * S + s) *
+ IC_per_G +
+ c];
+ }
+ }
}
}
}
diff --git a/test/GConvTest.cc b/test/GConvTest.cc
index 84f0d52..0074535 100644
--- a/test/GConvTest.cc
+++ b/test/GConvTest.cc
@@ -43,6 +43,8 @@ class fbgemmGConvAcc32WithQuantGranularityTest
QuantizationGranularity,
bool,
bool>> {};
+class fbgemmGConvPackTest
+ : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
@@ -61,6 +63,13 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(qGranularityVals),
::testing::Bool(), // A symmetric
::testing::Bool())); // B symmetric
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ fbgemmGConvPackTest,
+ ::testing::Combine(
+ ::testing::Values(matrix_op_t::NoTranspose),
+ ::testing::ValuesIn(transposeVals)));
/**
* @brief Shapes for unit test.
*/
@@ -413,3 +422,51 @@ TEST_P(fbgemmGConvAcc32Test, NoRequantizeTest) {
static_cast<int32_t>(0));
} // for each shape
}
+
+/**
+ * @brief Unit test for packing and unpacking the weight tensor
+ */
+TEST_P(fbgemmGConvPackTest, PackUnpackTest) {
+ vector<conv_param_t<>> shapes(GetShapes_());
+ matrix_op_t atrans, btrans;
+ tie(atrans, btrans) = GetParam();
+
+ for (auto conv_p : shapes) {
+ int R = conv_p.K[0];
+ int S = conv_p.K[1];
+ int IC_per_G = conv_p.IC / conv_p.G;
+ int OC_per_G = conv_p.OC / conv_p.G;
+
+ // Weights -- test the packing/unpacking of only the weights
+ // when btrans == Transpose, the weight matrix is in layout G K/G (R S C/G)
+ // instead of G (R S C/G) K/G
+ int weight_len = R * S * conv_p.G * IC_per_G * OC_per_G;
+ aligned_vector<int8_t> Bint8(weight_len, 0);
+
+ // Random fill the weights
+ randFill<int8_t>(Bint8, -4, 4);
+
+ // Instantiate the object
+ PackWeightMatrixForGConv<int8_t> packedWeights(
+ btrans, conv_p, Bint8.data(), nullptr);
+
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(weight_len, 0);
+
+ // START Actual pack-unpack operations
+ // Perform packing first. This should populate pdata_ of packedWeights
+ packedWeights.pack();
+
+ // Next perform unpacking
+ packedWeights.unpack(unpack_buf.data());
+ // END actual pack-unpack operations
+
+ // Sanity check
+ for (int i = 0; i < weight_len; ++i) {
+ EXPECT_EQ(Bint8.data()[i], unpack_buf.data()[i])
+ << "Pack/Unpack results differ at index " << i
+ << ", Reference: " << static_cast<int> (Bint8.data()[i])
+ << ", Pack-Unpacked: " << static_cast<int> (unpack_buf.data()[i]);
+ }
+ } // for each shape
+}
diff --git a/test/I8DepthwiseTest.cc b/test/I8DepthwiseTest.cc
index 11bd625..0604879 100644
--- a/test/I8DepthwiseTest.cc
+++ b/test/I8DepthwiseTest.cc
@@ -69,8 +69,16 @@ static vector<vector<int>> shapes = {
};
namespace {
-class FBGemmDepthWiseTest
- : public testing::TestWithParam<tuple<bool, bool>> {};
+
+class FBGemmDepthWiseTest : public testing::TestWithParam<tuple<bool, bool>> {};
+
+// Two parameters are K (or Groups) and kernel_prod, i.e.,
+// (output_channels)(kernel_prod)
+// output_channels == Groups.
+// For example, kernel_prod for 3x3 convolution is 9
+class FBGemmDepthWisePackUnpackTest
+ : public testing::TestWithParam<tuple<int, int>> {};
+
} // namespace
INSTANTIATE_TEST_CASE_P(
@@ -78,6 +86,13 @@ INSTANTIATE_TEST_CASE_P(
FBGemmDepthWiseTest,
::testing::Combine(::testing::Bool(), ::testing::Bool()));
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ FBGemmDepthWisePackUnpackTest,
+ ::testing::Combine(
+ ::testing::ValuesIn({8, 16, 24, 32, 40, 64, 72}),
+ ::testing::ValuesIn({1, 2, 3, 4, 5, 9, 10, 11, 27})));
+
TEST_P(FBGemmDepthWiseTest, Test3x3) {
bool a_symmetric, b_symmetric;
tie(a_symmetric, b_symmetric) = GetParam();
@@ -297,8 +312,8 @@ TEST_P(FBGemmDepthWiseTest, Test3x3x3) {
for (int k = 0; k < K; ++k) {
int32_t expected = C_uint8_ref
[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k];
- int32_t actual = C_uint8
- [(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k];
+ int32_t actual =
+ C_uint8[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + k];
EXPECT_EQ(expected, actual)
<< "Depthwise 3x3 results differ at (" << n << ", " << t
<< ", " << h << ", " << w << ", " << k << ").";
@@ -561,4 +576,50 @@ TEST(FBGemmDepthWiseTest, Test3x3x3PerChannelQuantization) {
} // for each shape
} // Test3x3PerChannelQuantization
+TEST_P(FBGemmDepthWisePackUnpackTest, TestPackUnpack) {
+ int K, kernel_prod;
+ tie(K, kernel_prod) = GetParam();
+
+ ASSERT_EQ(K % 8, 0)
+ << "output channels (== groups) should be a multiple of 8";
+ aligned_vector<int8_t> B(K * kernel_prod);
+ randFill<int8_t>(B, -16, 16);
+
+ aligned_vector<int8_t> BUnpacked(K * kernel_prod);
+
+ if (kernel_prod == 1) {
+ Packed1ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 2) {
+ Packed2ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 3) {
+ Packed3ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 4) {
+ Packed4ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 5) {
+ Packed5ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 9) {
+ Packed3x3ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 10) {
+ Packed10ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 11) {
+ Packed11ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else if (kernel_prod == 27) {
+ Packed3x3x3ConvMatrix BPacked(K, B.data());
+ BPacked.unpack(BUnpacked.data());
+ } else {
+ ASSERT_TRUE(false);
+ }
+
+ ASSERT_EQ(B, BUnpacked)
+ << "Original and unpacked data elements are not the same";
+} // TestPackUnpack
+
} // namespace fbgemm
diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc
index 20f860e..23af3eb 100644
--- a/test/PackedRequantizeAcc16Test.cc
+++ b/test/PackedRequantizeAcc16Test.cc
@@ -27,7 +27,7 @@ using namespace std;
using namespace fbgemm;
vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
- matrix_op_t::Transpose};
+ matrix_op_t::Transpose};
vector<QuantizationGranularity> qGranularityVals{
QuantizationGranularity::TENSOR,
@@ -40,6 +40,8 @@ class fbgemmu8s8acc16WithQuantGranularityTest
tuple<matrix_op_t, matrix_op_t, bool, QuantizationGranularity>> {};
class fbgemmu8s8acc16Test
: public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t, bool>> {};
+class fbgemmPackUnpackAcc16Test
+ : public testing::TestWithParam<tuple<matrix_op_t, bool>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
@@ -59,6 +61,11 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(transposeVals),
::testing::Bool()));
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ fbgemmPackUnpackAcc16Test,
+ ::testing::Combine(::testing::ValuesIn(transposeVals), ::testing::Bool()));
+
/**
* @brief Shapes for unit test.
*/
@@ -810,3 +817,66 @@ TEST_P(fbgemmu8s8acc16Test, NoRequantizeTest) {
} // for each groups
} // for each shape
}
+
+/**
+ * @brief Unit test for packing and unpacking the weight tensor.
+ */
+TEST_P(fbgemmPackUnpackAcc16Test, TestPackUnpack) {
+ vector<vector<int>> shapes(GetShapes_());
+ matrix_op_t btrans;
+ bool test_ld;
+ tie(btrans, test_ld) = GetParam();
+
+ for (auto shape : shapes) {
+ for (int groups : {1, 3, 4}) {
+ int n = shape[1];
+ int k = shape[2];
+
+ if (k % groups != 0) {
+ continue;
+ }
+ int k_per_group = k / groups;
+
+ // kxn matrix
+ aligned_vector<int8_t> Bint8(k * n);
+ randFill<int8_t>(Bint8, -128, 127);
+
+ // To test lda != k , we just reduce k by half and use the original k
+ // as lda.
+ int n_adjusted = n;
+ if (test_ld) {
+ if (btrans == matrix_op_t::NoTranspose) {
+ n_adjusted = std::max(n / 2, 1);
+ }
+ }
+
+ // Note that packing for weight is performed during the constructor
+ // stage.
+ PackBMatrix<int8_t, int16_t> packedWeights(
+ btrans,
+ k,
+ n_adjusted,
+ Bint8.data(),
+ (btrans == matrix_op_t::Transpose) ? k_per_group : n,
+ nullptr,
+ groups);
+
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(k * n, 0);
+
+ // Perform unpacking
+ packedWeights.unpack(unpack_buf.data());
+
+ // Sanity check
+ for (int i = 0; i < k; i++) {
+ for (int j = 0; j < n_adjusted; j++) {
+ EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
+ << "Pack/Unpack results differ at index (" << i << ", " << j
+ << ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
+ << ", Pack-Unpacked: "
+ << static_cast<int>(unpack_buf.data()[i * n + j]);
+ }
+ }
+ }
+ }
+}
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index fd827b0..11ef6ff 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -39,6 +39,8 @@ class fbgemmu8s8acc32WithQuantGranularityTest
tuple<matrix_op_t, matrix_op_t, bool, QuantizationGranularity>> {};
class fbgemmu8s8acc32Test
: public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t, bool>> {};
+class fbgemmPackUnpackAcc32Test
+ : public testing::TestWithParam<tuple<matrix_op_t, bool>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
@@ -58,6 +60,11 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(transposeVals),
::testing::Bool()));
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ fbgemmPackUnpackAcc32Test,
+ ::testing::Combine(::testing::ValuesIn(transposeVals), ::testing::Bool()));
+
/**
* @brief Shapes for unit test.
*/
@@ -749,3 +756,66 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) {
} // for each groups
} // for each shape
}
+
+/**
+ * @brief Unit test for packing and unpacking the weight tensor.
+ */
+TEST_P(fbgemmPackUnpackAcc32Test, TestPackUnpack) {
+ vector<vector<int>> shapes(GetShapes_());
+ matrix_op_t btrans;
+ bool test_ld;
+ tie(btrans, test_ld) = GetParam();
+
+ for (auto shape : shapes) {
+ for (int groups : {1, 3, 4}) {
+ int n = shape[1];
+ int k = shape[2];
+
+ if (k % groups != 0) {
+ continue;
+ }
+ int k_per_group = k / groups;
+
+ // kxn matrix
+ aligned_vector<int8_t> Bint8(k * n);
+ randFill<int8_t>(Bint8, -128, 127);
+
+ // To test lda != k , we just reduce k by half and use the original k
+ // as lda.
+ int n_adjusted = n;
+ if (test_ld) {
+ if (btrans == matrix_op_t::NoTranspose) {
+ n_adjusted = std::max(n / 2, 1);
+ }
+ }
+
+ // Note that packing for weight is performed during the constructor
+ // stage.
+ PackBMatrix<int8_t> packedWeights(
+ btrans,
+ k,
+ n_adjusted,
+ Bint8.data(),
+ (btrans == matrix_op_t::Transpose) ? k_per_group : n,
+ nullptr,
+ groups);
+
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(k * n, 0);
+
+ // Perform unpacking
+ packedWeights.unpack(unpack_buf.data());
+
+ // Sanity check
+ for (int i = 0; i < k; i++) {
+ for (int j = 0; j < n_adjusted; j++) {
+ EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
+ << "Pack/Unpack results differ at index (" << i << ", " << j
+ << ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
+ << ", Pack-Unpacked: "
+ << static_cast<int>(unpack_buf.data()[i * n + j]);
+ }
+ }
+ }
+ }
+}
diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc
new file mode 100644
index 0000000..ddb1f91
--- /dev/null
+++ b/test/QuantUtilsTest.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <limits>
+#include <random>
+
+#include <gtest/gtest.h>
+
+#include "fbgemm/QuantUtils.h"
+#include "fbgemm/Utils.h"
+
+using namespace std;
+using namespace fbgemm;
+
+// tuple represents K, C, X, G, layout_t
+// layout_t can be KCX or KXC
+class QuantizeGroupwiseTest
+ : public testing::TestWithParam<tuple<int, int, int, int, layout_t>> {};
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ QuantizeGroupwiseTest,
+ ::testing::Combine(
+ ::testing::ValuesIn({4, 12, 64}), // K
+ ::testing::ValuesIn({12, 16, 32}), // C
+ ::testing::ValuesIn({1, 10, 15, 30}), // X
+ ::testing::ValuesIn({1, 4}), // G
+ ::testing::ValuesIn({layout_t::KCX, layout_t::KXC})));
+
+template <typename T, layout_t LT>
+void ref_impl(
+ const vector<float>& src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const vector<float>& scales,
+ const vector<int>& zero_points,
+ vector<T>& dst) {
+ int C_per_G = C / G;
+ for (int i = 0; i < K; ++i) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < C / G; ++c) {
+ for (int x = 0; x < X; ++x) {
+ float num;
+ if (LT == layout_t::KCX) {
+ num = src[(i * C + g * C_per_G + c) * X + x];
+ } else {
+ num = src[(i * X + x) * C + g * C_per_G + c];
+ }
+ int res = nearbyint(zero_points[g] + num / scales[g]);
+ T final_res = min<T>(
+ max<T>(res, numeric_limits<T>::min()), numeric_limits<T>::max());
+ if (LT == layout_t::KCX) {
+ dst[(i * C + g * C_per_G + c) * X + x] = final_res;
+ } else {
+ dst[(i * X + x) * C + g * C_per_G + c] = final_res;
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T, layout_t LT>
+void runTests(
+ const vector<float>& src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const vector<float>& scales,
+ const vector<int>& zero_points,
+ vector<T>& dst,
+ vector<T>& dst_ref) {
+ QuantizeGroupwise<T, LT>(
+ src.data(), K, C, X, G, scales.data(), zero_points.data(), dst.data());
+
+ ref_impl<T, LT>(src, K, C, X, G, scales, zero_points, dst_ref);
+}
+
+/**
+ * There can be off-by-one error in quantized values due to how the mid-point
+ * cases are rounded-off in vectorized vs scalar codes and due to adding of
+ * zero_point before rounding vs after rounding. We ignore such differences
+ * while comparing results.
+ */
+template <typename T>
+::testing::AssertionResult isNear(
+ const vector<T>& res,
+ const vector<T>& res_ref) {
+ bool match = true;
+ if (res.size() == res_ref.size()) {
+ for (int i = 0; i < res.size(); ++i) {
+ if (!(res[i] == res_ref[i] || res[i] == res_ref[i] + 1 ||
+ res[i] == res_ref[i] - 1)) {
+ match = false;
+ break;
+ }
+ }
+ }
+ if (match)
+ return ::testing::AssertionSuccess();
+ else
+ return ::testing::AssertionFailure() << " Quantized results do not match";
+}
+
+/**
+ * Test for QuantizeGroupwise
+ */
+TEST_P(QuantizeGroupwiseTest, quantizeTest) {
+ int K, C, X, G;
+ layout_t layout;
+ tie(K, C, X, G, layout) = GetParam();
+
+ random_device rd;
+ mt19937 gen(rd());
+
+ uniform_real_distribution<float> disFP(0.1, 1.1);
+
+ vector<float> inp(K * C * X);
+ generate(inp.begin(), inp.end(), [&, disFP]() mutable { return disFP(gen); });
+
+ vector<float> scales(G);
+ generate(scales.begin(), scales.end(), [&, disFP]() mutable {
+ return disFP(gen);
+ });
+
+ uniform_int_distribution<> disUInt8(0, 8);
+ vector<int> zero_points_uint8(G);
+ generate(
+ zero_points_uint8.begin(),
+ zero_points_uint8.end(),
+ [&, disUInt8]() mutable { return disUInt8(gen); });
+
+ uniform_int_distribution<> disInt8(-64, 63);
+ vector<int> zero_points_int8(G);
+ generate(
+ zero_points_int8.begin(), zero_points_int8.end(), [&, disInt8]() mutable {
+ return disInt8(gen);
+ });
+
+ uniform_int_distribution<> disInt32(-512, 512);
+ vector<int> zero_points_int32(G);
+ generate(
+ zero_points_int32.begin(),
+ zero_points_int32.end(),
+ [&, disInt32]() mutable { return disInt32(gen); });
+
+ vector<uint8_t> dstuint8(K * C * X);
+ vector<uint8_t> dstuint8_ref(K * C * X);
+
+ vector<int8_t> dstint8(K * C * X);
+ vector<int8_t> dstint8_ref(K * C * X);
+
+ vector<int32_t> dstint32(K * C * X);
+ vector<int32_t> dstint32_ref(K * C * X);
+
+ if (layout == layout_t::KCX) {
+ runTests<uint8_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+ runTests<int8_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+ runTests<int32_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+ } else {
+ runTests<uint8_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+ runTests<int8_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+ runTests<int32_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+ }
+
+ EXPECT_TRUE(isNear(dstuint8, dstuint8_ref));
+ EXPECT_TRUE(isNear(dstint8, dstint8_ref));
+ EXPECT_TRUE(isNear(dstint32, dstint32_ref));
+}
diff --git a/test/UniConvPackingTest.cc b/test/UniConvPackingTest.cc
deleted file mode 100644
index 77552af..0000000
--- a/test/UniConvPackingTest.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc. and its affiliates.
- * All rights reserved.
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-#include <algorithm>
-#include <random>
-#include <iostream>
-
-
-#include <gtest/gtest.h>
-
-#include "QuantizationHelpers.h"
-#include "TestUtils.h"
-#include "bench/BenchUtils.h"
-#include "fbgemm/Fbgemm.h"
-#include "src/RefImplementations.h"
-
-using namespace std;
-using namespace fbgemm;
-
-namespace {
-
-// tuple represents MB, IC, OC, IT, IH, IW, KH/KW, stride, pad
-class convPackingTest
- : public testing::TestWithParam<
- tuple<int, int, int, int, int, int, int, int, int, int>> {};
-
-}; // namespace
-
-INSTANTIATE_TEST_CASE_P(
- InstantiationName,
- convPackingTest,
- ::testing::Combine(
- ::testing::ValuesIn({1, 2}), // MB
- ::testing::ValuesIn({16, 32}), // IC
- ::testing::ValuesIn({16, 32}), // OC
- ::testing::ValuesIn({17}), // IT
- ::testing::ValuesIn({10, 30, 55}), // IH
- ::testing::ValuesIn({10, 30, 55}), // IW
- ::testing::ValuesIn({1, 4, 16}), // G
- ::testing::ValuesIn({3, 7}), // kernel
- ::testing::ValuesIn({1, 2}), // stride
- ::testing::ValuesIn({1, 2}))); // pad
-
-/**
- * Test for conv packing
- */
-TEST_P(convPackingTest, packingTest) {
- int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
- tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();
-
- conv_param_t<2> conv_p_2d(
- MB,
- IC,
- OC,
- {IH, IW},
- G,
- {kernel, kernel},
- {stride, stride},
- {pad, pad, pad, pad});
-
- int kernel_dim_2d = kernel * kernel;
- aligned_vector<int8_t> Bint8_2d(
- kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
- PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data());
-
- switch (ConvFastPath<2, int32_t>(conv_p_2d)) {
- case optimized_conv_t::depthwise: {
- ASSERT_NE(packedB_2D.getPackedWFor2DDW(), nullptr)
- << "2D depthwise packed matrix is null";
- ASSERT_EQ(packedB_2D.getPackedWForIm2col(), nullptr)
- << "im2col packed matrix should be null";
- ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
- << "3D depthwise packed matrix should be null";
- ASSERT_EQ(packedB_2D.getPackedWForGroupwise(), nullptr)
- << "groupwise packed matrix should be null";
- break;
- }
- case optimized_conv_t::groupwise: {
- ASSERT_EQ(packedB_2D.getPackedWForIm2col(), nullptr)
- << "im2col packed matrix should be null";
- ASSERT_EQ(packedB_2D.getPackedWFor2DDW(), nullptr)
- << "2D depthwise packed matrix is null";
- ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
- << "3D depthwise packed matrix should be null";
- ASSERT_NE(packedB_2D.getPackedWForGroupwise(), nullptr)
- << "Groupwise packed matrix is null";
- break;
- }
- case optimized_conv_t::im2col: {
- ASSERT_EQ(packedB_2D.getPackedWFor2DDW(), nullptr)
- << "2D depthwise packed matrix is null";
- ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
- << "3D depthwise packed matrix should be null";
- ASSERT_EQ(packedB_2D.getPackedWForGroupwise(), nullptr)
- << "groupwise packed matrix should be null";
- ASSERT_NE(packedB_2D.getPackedWForIm2col(), nullptr)
- << "im2col packed matrix is null";
- break;
- }
- }
-
- conv_param_t<3> conv_p_3d(
- MB,
- IC,
- OC,
- {IT, IH, IW},
- G,
- {kernel, kernel, kernel},
- {stride, stride, stride},
- {pad, pad, pad, pad, pad, pad});
-
- int kernel_dim_3d = kernel * kernel * kernel;
- aligned_vector<int8_t> Bint8_3d(
- kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));
- PackWeightsForConv<3> packedB_3D(conv_p_3d, Bint8_3d.data());
-
- switch (ConvFastPath<3, int32_t>(conv_p_3d)) {
- case optimized_conv_t::depthwise: {
- ASSERT_EQ(packedB_3D.getPackedWFor2DDW(), nullptr)
- << "2D depthwise packed matrix is null";
- ASSERT_EQ(packedB_3D.getPackedWForIm2col(), nullptr)
- << "im2col packed matrix should be null";
- ASSERT_NE(packedB_3D.getPackedWFor3DDW(), nullptr)
- << "3D depthwise packed matrix should be null";
- ASSERT_EQ(packedB_3D.getPackedWForGroupwise(), nullptr)
- << "groupwise packed matrix should be null";
- break;
- }
- case optimized_conv_t::groupwise: {
- ASSERT_TRUE(false) << "groupwise are not supported for 3D";
- break;
- }
- case optimized_conv_t::im2col: {
- ASSERT_EQ(packedB_3D.getPackedWFor2DDW(), nullptr)
- << "2D depthwise packed matrix is null";
- ASSERT_EQ(packedB_3D.getPackedWFor3DDW(), nullptr)
- << "3D depthwise packed matrix should be null";
- ASSERT_EQ(packedB_3D.getPackedWForGroupwise(), nullptr)
- << "groupwise packed matrix should be null";
- ASSERT_NE(packedB_3D.getPackedWForIm2col(), nullptr)
- << "im2col packed matrix is null";
- break;
- }
- }
-}
diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc
new file mode 100644
index 0000000..91bf578
--- /dev/null
+++ b/test/UniConvTest.cc
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <algorithm>
+#include <random>
+#include <iostream>
+#include <stdexcept>
+
+
+#include <gtest/gtest.h>
+
+#include "QuantizationHelpers.h"
+#include "TestUtils.h"
+#include "bench/BenchUtils.h"
+#include "fbgemm/Fbgemm.h"
+#include "src/RefImplementations.h"
+
+using namespace std;
+using namespace fbgemm;
+
+namespace {
+
+// tuple represents MB, IC, OC, IT, IH, IW, KH/KW, stride, pad
+class uniConvTest
+ : public testing::TestWithParam<
+ tuple<int, int, int, int, int, int, int, int, int, int>> {};
+
+}; // namespace
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ uniConvTest,
+ ::testing::Combine(
+ ::testing::ValuesIn({1, 2}), // MB
+ ::testing::ValuesIn({16, 32}), // IC
+ ::testing::ValuesIn({16, 32}), // OC
+ ::testing::ValuesIn({17}), // IT
+ ::testing::ValuesIn({10, 30, 55}), // IH
+ ::testing::ValuesIn({10, 30, 55}), // IW
+ ::testing::ValuesIn({1, 4, 16}), // G
+ ::testing::ValuesIn({1, 3, 7}), // kernel
+ ::testing::ValuesIn({1, 2}), // stride
+ ::testing::ValuesIn({0, 1, 2}))); // pad
+
+/**
+ * Test for conv packing
+ */
+TEST_P(uniConvTest, packingTest) {
+ int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
+ tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();
+
+ conv_param_t<2> conv_p_2d(
+ MB,
+ IC,
+ OC,
+ {IH, IW},
+ G,
+ {kernel, kernel},
+ {stride, stride},
+ {pad, pad, pad, pad});
+
+ int kernel_dim_2d = kernel * kernel;
+ aligned_vector<int8_t> Bint8_2d(
+ kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
+ PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data());
+
+ switch (ConvFastPath<2, int32_t>(conv_p_2d)) {
+ case optimized_conv_t::depthwise: {
+ ASSERT_EQ(packedB_2D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForGroupwise(), nullptr)
+ << "groupwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix should be null";
+ ASSERT_NE(packedB_2D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ break;
+ }
+ case optimized_conv_t::groupwise: {
+ ASSERT_EQ(packedB_2D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix should be null";
+ ASSERT_NE(packedB_2D.getPackedWForGroupwise(), nullptr)
+ << "Groupwise packed matrix is null";
+ break;
+ }
+ case optimized_conv_t::pointwise: {
+ ASSERT_EQ(packedB_2D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForGroupwise(), nullptr)
+ << "Groupwise packed matrix should be null";
+ ASSERT_NE(packedB_2D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix is null";
+ break;
+ }
+ case optimized_conv_t::im2col: {
+ ASSERT_EQ(packedB_2D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_2D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForGroupwise(), nullptr)
+ << "groupwise packed matrix should be null";
+ ASSERT_EQ(packedB_2D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix should be null";
+ ASSERT_NE(packedB_2D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix is null";
+ break;
+ }
+ }
+
+ conv_param_t<3> conv_p_3d(
+ MB,
+ IC,
+ OC,
+ {IT, IH, IW},
+ G,
+ {kernel, kernel, kernel},
+ {stride, stride, stride},
+ {pad, pad, pad, pad, pad, pad});
+
+ int kernel_dim_3d = kernel * kernel * kernel;
+ aligned_vector<int8_t> Bint8_3d(
+ kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));
+ PackWeightsForConv<3> packedB_3D(conv_p_3d, Bint8_3d.data());
+
+ switch (ConvFastPath<3, int32_t>(conv_p_3d)) {
+ case optimized_conv_t::depthwise: {
+ ASSERT_EQ(packedB_3D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_3D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForGroupwise(), nullptr)
+ << "groupwise packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix should be null";
+ ASSERT_NE(packedB_3D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ break;
+ }
+ case optimized_conv_t::groupwise: {
+ ASSERT_TRUE(false) << "groupwise are not supported for 3D";
+ break;
+ }
+ case optimized_conv_t::pointwise: {
+ ASSERT_EQ(packedB_3D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_3D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForGroupwise(), nullptr)
+ << "groupwise packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix should be null";
+ ASSERT_NE(packedB_3D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix is null";
+ break;
+ }
+ case optimized_conv_t::im2col: {
+ ASSERT_EQ(packedB_3D.getPackedWFor2DDW(), nullptr)
+ << "2D depthwise packed matrix is null";
+ ASSERT_EQ(packedB_3D.getPackedWFor3DDW(), nullptr)
+ << "3D depthwise packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForGroupwise(), nullptr)
+ << "groupwise packed matrix should be null";
+ ASSERT_EQ(packedB_3D.getPackedWForPointwise(), nullptr)
+ << "pointwise packed matrix should be null";
+ ASSERT_NE(packedB_3D.getPackedWForIm2col(), nullptr)
+ << "im2col packed matrix is null";
+ break;
+ }
+ }
+}
+
+/**
+ * Test for packing/unpacking
+ */
+TEST_P(uniConvTest, packUnpackTest) {
+ int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
+ tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();
+
+ conv_param_t<2> conv_p_2d(
+ MB,
+ IC,
+ OC,
+ {IH, IW},
+ G,
+ {kernel, kernel},
+ {stride, stride},
+ {pad, pad, pad, pad});
+
+ int kernel_dim_2d = kernel * kernel;
+
+ aligned_vector<int8_t> Bint8_2d(
+ kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
+ aligned_vector<int8_t> Bint8_2d_unpacked(
+ kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
+
+ PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data());
+
+ packedB_2D.unpack(Bint8_2d_unpacked.data());
+
+ ASSERT_EQ(Bint8_2d, Bint8_2d_unpacked)
+ << "Original and unpacked data elements are not the same [2D]";
+
+ conv_param_t<3> conv_p_3d(
+ MB,
+ IC,
+ OC,
+ {IT, IH, IW},
+ G,
+ {kernel, kernel, kernel},
+ {stride, stride, stride},
+ {pad, pad, pad, pad, pad, pad});
+
+ int kernel_dim_3d = kernel * kernel * kernel;
+
+ aligned_vector<int8_t> Bint8_3d(
+ kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));
+
+ aligned_vector<int8_t> Bint8_3d_unpacked(
+ kernel_dim_3d * conv_p_3d.IC * (conv_p_3d.OC / conv_p_3d.G));
+
+ PackWeightsForConv<3> packedB_3D(conv_p_3d, Bint8_3d.data());
+
+ packedB_3D.unpack(Bint8_3d_unpacked.data());
+
+ ASSERT_EQ(Bint8_3d, Bint8_3d_unpacked)
+ << "Original and unpacked data elements are not the same [3D]";
+}
+
+TEST(uniConvTest, cornerCases) {
+ int stride = 1;
+ conv_param_t<2> conv_p_2d(
+ 1, // mini-batch
+ 16, // input channels
+ 32, // output channels
+ {28, 28}, // input height/width
+ 4, // groups
+ {3, 3}, // kernel height/width
+ {stride, stride}, // strides
+ {1, 1, 1, 1}); // padding
+
+ int kernel_dim_2d = conv_p_2d.K[0] * conv_p_2d.K[1];
+
+ aligned_vector<uint8_t> Aint8(
+ conv_p_2d.MB * conv_p_2d.IN_DIM[0] * conv_p_2d.IN_DIM[1] * conv_p_2d.IC);
+ aligned_vector<int8_t> Bint8_2d(
+ kernel_dim_2d * conv_p_2d.IC * (conv_p_2d.OC / conv_p_2d.G));
+ aligned_vector<int32_t> Cint32_fb(
+ conv_p_2d.MB * conv_p_2d.OUT_DIM[0] * conv_p_2d.OUT_DIM[1] *
+ conv_p_2d.OC);
+ aligned_vector<uint8_t> Cint8_fb(Cint32_fb.size(), 0);
+
+ // A matrix (input activations)
+ randFill<uint8_t>(Aint8, 0, 5);
+ int32_t Aint8_zero_point = 4;
+
+ // B matrix (weights)
+ randFill<int8_t>(Bint8_2d, -4, 4);
+ aligned_vector<int32_t> Bint8_zero_point(1);
+ randFill(Bint8_zero_point, -3, -1);
+
+ aligned_vector<float> C_multiplier(Bint8_zero_point.size());
+ randFill(C_multiplier, 0.1234f / 2, 0.1234f * 3 / 2);
+ int32_t C_zero_point = 5;
+
+ PackWeightsForConv<2> packedB_2D(conv_p_2d, Bint8_2d.data());
+
+ vector<int32_t> col_offsets(conv_p_2d.OC);
+
+ DoNothing<> doNothingObj{};
+ ReQuantizeOutput<false, QuantizationGranularity::TENSOR> outputProcObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_point,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, // row offsets
+ col_offsets.data(),
+ nullptr, // bias
+ conv_p_2d.OC,
+ conv_p_2d.G);
+
+ try {
+ conv_p_2d.stride[0] = 2;
+ fbgemmConv(
+ conv_p_2d,
+ Aint8.data(),
+ packedB_2D,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ outputProcObj,
+ 0,
+ 1);
+ } catch (std::logic_error const& err) {
+ std::string s(err.what());
+ EXPECT_TRUE(s.rfind("[FBGEMM_CONV_ERROR]", 0) == 0);
+ }
+
+ // reset
+ conv_p_2d.stride[0] = stride;
+ // this should run fine
+ fbgemmConv(
+ conv_p_2d,
+ Aint8.data(),
+ packedB_2D,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ outputProcObj,
+ 0,
+ 1);
+}