Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordskhudia <dskhudia@fb.com>2018-11-06 00:17:52 +0300
committerdskhudia <dskhudia@fb.com>2018-11-06 00:17:52 +0300
commitb96bc0bf311f7abdc83ffd3af0a485b4aef53f7c (patch)
tree2a6c276d20753abe94c526aab7b109305e3d1d78 /src/PackAWithIm2Col.cc
parent14adee1ac506e067489406af689ae9b73fb581bd (diff)
generalized conv_param_t and download third party libraries in build dir
Diffstat (limited to 'src/PackAWithIm2Col.cc')
-rw-r--r--src/PackAWithIm2Col.cc143
1 files changed, 118 insertions, 25 deletions
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index e067a3e..8dde696 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -4,26 +4,37 @@
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
-#include <cpuinfo.h>
+#include <algorithm>
#include <cassert>
#include <iomanip>
#include <iostream>
-#include "fbgemm/Fbgemm.h"
+#include <numeric>
+#include <cpuinfo.h>
-#include <algorithm>
+#include "fbgemm/Fbgemm.h"
namespace fbgemm2 {
-template <typename T, typename accT>
-PackAWithIm2Col<T, accT>::PackAWithIm2Col(
- const conv_param_t& conv_p,
+template <typename T, typename accT, int SPATIAL_DIM>
+PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
+ const conv_param_t<SPATIAL_DIM>& conv_p,
const T* sdata,
inpType* pmat,
int32_t zero_pt,
int32_t* row_offset)
- : PackMatrix<PackAWithIm2Col<T, accT>, T, accT>(
- conv_p.MB * conv_p.OH * conv_p.OW,
- conv_p.KH * conv_p.KW * conv_p.IC,
+ : PackMatrix<PackAWithIm2Col<T, accT, SPATIAL_DIM>, T, accT>(
+ conv_p.MB *
+ std::accumulate(
+ conv_p.OUT_DIM.begin(),
+ conv_p.OUT_DIM.end(),
+ 1,
+ std::multiplies<int>()),
+ std::accumulate(
+ conv_p.K.begin(),
+ conv_p.K.end(),
+ 1,
+ std::multiplies<int>()) *
+ conv_p.IC,
pmat,
zero_pt),
conv_p_(conv_p),
@@ -62,8 +73,8 @@ PackAWithIm2Col<T, accT>::PackAWithIm2Col(
}
}
-template <typename T, typename accT>
-void PackAWithIm2Col<T, accT>::pack(const block_type_t& block) {
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
block_type_t block_p = {block.row_start,
block.row_size,
block.col_start,
@@ -72,11 +83,87 @@ void PackAWithIm2Col<T, accT>::pack(const block_type_t& block) {
BaseType::packedBlock(block_p);
T* out = BaseType::getBuf();
+ if (SPATIAL_DIM == 3) { // static if
+ for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
+ int n =
+ i / (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1] * conv_p_.OUT_DIM[2]);
+ int thw =
+ i % (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1] * conv_p_.OUT_DIM[2]);
+ int w = thw % conv_p_.OUT_DIM[2];
+ int h = thw / conv_p_.OUT_DIM[2] % conv_p_.OUT_DIM[1];
+ int t = thw / conv_p_.OUT_DIM[2] / conv_p_.OUT_DIM[1];
+ for (int j = block.col_start;
+ j < block.col_start + block.col_size + conv_p_.IC - 1;
+ j += conv_p_.IC) {
+ int j_blk_id = j / conv_p_.IC;
+ // max( j_blk_id * IC, START) -> min( END, (j_blk_id + 1) * IC )
+ int j_blk_start = std::max(j_blk_id * conv_p_.IC, block.col_start);
+ int j_blk_end = std::min(
+ (j_blk_id + 1) * conv_p_.IC, block.col_start + block.col_size);
+ if (j_blk_start >= j_blk_end) {
+ break;
+ }
+
+ int qrs = j / conv_p_.IC;
+ int s = qrs % conv_p_.K[2];
+ int r = qrs / conv_p_.K[2] % conv_p_.K[1];
+ int q = qrs / conv_p_.K[2] / conv_p_.K[1];
+
+ int t_in = -conv_p_.pad[0] + t * conv_p_.stride[0] + q;
+ int h_in = -conv_p_.pad[1] + h * conv_p_.stride[1] + r;
+ int w_in = -conv_p_.pad[2] + w * conv_p_.stride[2] + s;
+
+ if (t_in < 0 || t_in >= conv_p_.IN_DIM[0] || h_in < 0 ||
+ h_in >= conv_p_.IN_DIM[1] || w_in < 0 ||
+ w_in >= conv_p_.IN_DIM[2]) {
+ // Please note that padding for convolution should be filled with
+ // zero_pt
+ std::memset(
+ &out
+ [(i - block.row_start) * BaseType::blockColSize() +
+ (j_blk_start - block.col_start)],
+ BaseType::zeroPoint(),
+ sizeof(T) * (j_blk_end - j_blk_start));
+ } else {
+ std::memcpy(
+ &out
+ [(i - block.row_start) * BaseType::blockColSize() +
+ j_blk_start - block.col_start],
+ &sdata_
+ [(((n * conv_p_.IN_DIM[0] + t_in) * conv_p_.IN_DIM[1] +
+ h_in) *
+ conv_p_.IN_DIM[2] +
+ w_in) *
+ conv_p_.IC +
+ (j_blk_start % conv_p_.IC)],
+ sizeof(T) * (j_blk_end - j_blk_start));
+ }
+ }
+ // zero fill
+ // Please see the comment in PackAMatrix.cc for zero vs zero_pt fill.
+ if ((block_p.col_start + block_p.col_size) -
+ (block.col_start + block.col_size) >
+ 0) {
+ std::memset(
+ &out
+ [(i - block.row_start) * BaseType::blockColSize() +
+ (block.col_size)],
+ 0,
+ sizeof(T) *
+ ((block_p.col_start + block_p.col_size) -
+ (block.col_start + block.col_size)));
+ }
+ }
+ return;
+ }
+
+ assert(SPATIAL_DIM == 2 && "unsupported conv dimension");
+
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
- int n = i / (conv_p_.OH * conv_p_.OW);
- int hw = i % (conv_p_.OH * conv_p_.OW);
- int w = hw % conv_p_.OW;
- int h = hw / conv_p_.OW;
+ int n = i / (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1]);
+ int hw = i % (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1]);
+ int w = hw % conv_p_.OUT_DIM[1];
+ int h = hw / conv_p_.OUT_DIM[1];
for (int j = block.col_start;
j < block.col_start + block.col_size + conv_p_.IC - 1;
j += conv_p_.IC) {
@@ -90,13 +177,14 @@ void PackAWithIm2Col<T, accT>::pack(const block_type_t& block) {
}
int rs = j / conv_p_.IC;
- int s = rs % conv_p_.KW;
- int r = rs / conv_p_.KW;
+ int s = rs % conv_p_.K[1];
+ int r = rs / conv_p_.K[1];
- int w_in = -conv_p_.pad_w + w * conv_p_.stride_w + s;
- int h_in = -conv_p_.pad_h + h * conv_p_.stride_h + r;
+ int h_in = -conv_p_.pad[0] + h * conv_p_.stride[0] + r;
+ int w_in = -conv_p_.pad[1] + w * conv_p_.stride[1] + s;
- if (h_in < 0 || h_in >= conv_p_.IH || w_in < 0 || w_in >= conv_p_.IW) {
+ if (h_in < 0 || h_in >= conv_p_.IN_DIM[0] || w_in < 0 ||
+ w_in >= conv_p_.IN_DIM[1]) {
// Please note that padding for convolution should be filled with
// zero_pt
std::memset(
@@ -111,7 +199,8 @@ void PackAWithIm2Col<T, accT>::pack(const block_type_t& block) {
[(i - block.row_start) * BaseType::blockColSize() +
j_blk_start - block.col_start],
&sdata_
- [((n * conv_p_.IH + h_in) * conv_p_.IW + w_in) * conv_p_.IC +
+ [((n * conv_p_.IN_DIM[0] + h_in) * conv_p_.IN_DIM[1] + w_in) *
+ conv_p_.IC +
(j_blk_start % conv_p_.IC)],
sizeof(T) * (j_blk_end - j_blk_start));
}
@@ -133,8 +222,9 @@ void PackAWithIm2Col<T, accT>::pack(const block_type_t& block) {
}
}
-template <typename T, typename accT>
-void PackAWithIm2Col<T, accT>::printPackedMatrix(std::string name) {
+template <typename T, typename accT, int SPATIAL_DIM>
+void PackAWithIm2Col<T, accT, SPATIAL_DIM>::printPackedMatrix(
+ std::string name) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;
@@ -155,8 +245,8 @@ void PackAWithIm2Col<T, accT>::printPackedMatrix(std::string name) {
std::cout << std::endl;
}
-template <typename T, typename accT>
-int PackAWithIm2Col<T, accT>::rowOffsetBufferSize() {
+template <typename T, typename accT, int SPATIAL_DIM>
+int PackAWithIm2Col<T, accT, SPATIAL_DIM>::rowOffsetBufferSize() {
if (cpuinfo_initialize()) {
if (cpuinfo_has_x86_avx512f()) {
return PackingTraits<T, accT, inst_set_t::avx512>::MCB;
@@ -174,4 +264,7 @@ int PackAWithIm2Col<T, accT>::rowOffsetBufferSize() {
template class PackAWithIm2Col<uint8_t, int32_t>;
template class PackAWithIm2Col<uint8_t, int16_t>;
+template class PackAWithIm2Col<uint8_t, int32_t, 3>;
+template class PackAWithIm2Col<uint8_t, int16_t, 3>;
+
} // namespace fbgemm2