Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2019-09-26 19:17:42 +0300
committerYoung Jin Kim <youki@microsoft.com>2019-09-26 19:17:42 +0300
commit21f93c950b8b27918cd59c8f3139fb41ad1bd2c6 (patch)
tree4ee82a52176dcd658db70612e2d8c045cac66298
parent13347764fc33d3220856185c4758059a8e43cd14 (diff)
Linux memory fix
-rw-r--r--src/PackDepthwiseConvMatrixAvx2.cc22
1 files changed, 7 insertions, 15 deletions
diff --git a/src/PackDepthwiseConvMatrixAvx2.cc b/src/PackDepthwiseConvMatrixAvx2.cc
index 840a654..a84c469 100644
--- a/src/PackDepthwiseConvMatrixAvx2.cc
+++ b/src/PackDepthwiseConvMatrixAvx2.cc
@@ -46,15 +46,7 @@ PackedDepthWiseConvMatrix::PackedDepthWiseConvMatrix(
// Allocate packed arrays
int kernel_prod_aligned = (kernel_prod + 1) / 2 * 2;
- //pmat_ = static_cast<int8_t *>(fbgemmAlignedAlloc(64, ((K + 31) / 32) * kernel_prod_aligned * 32 * sizeof(int8_t)));
-#ifdef _MSC_VER
- pmat_ = (int8_t*)_aligned_malloc(((K + 31) / 32) * kernel_prod_aligned * 32 * sizeof(int8_t), 64);
-#else
- posix_memalign(
- (void**)&pmat_,
- 64,
- ((K + 31) / 32) * kernel_prod_aligned * 32 * sizeof(int8_t));
-#endif
+ pmat_ = static_cast<int8_t *>(fbgemmAlignedAlloc(64, ((K + 31) / 32) * kernel_prod_aligned * 32 * sizeof(int8_t)));
// Pack input matrix
// The layout is optimized to use vpmaddubsw efficiently (see
@@ -109,7 +101,7 @@ PackedDepthWiseConvMatrix::PackedDepthWiseConvMatrix(
// (12, 8), (12, 9), (12, 10), zero, ..., (15, 8), (15, 9), (15, 10), zero
// (28, 8), (28, 9), (28, 10), zero, ..., (31, 8), (31, 9), (31, 10), zero
for (int k1 = 0; k1 < K; k1 += 32) {
- __m256i* b_v = new __m256i[kernel_prod];
+ __m256i* b_v = static_cast<__m256i*>(ALIGNED_MALLOC(kernel_prod * sizeof(__m256i), 64));
int remainder = K - k1;
if (remainder < 32) {
__m256i mask_v = _mm256_loadu_si256(
@@ -126,7 +118,7 @@ PackedDepthWiseConvMatrix::PackedDepthWiseConvMatrix(
}
// Interleave 2 SIMD registers
- __m256i* b_interleaved_epi16 = new __m256i[kernel_prod_aligned];
+ __m256i* b_interleaved_epi16 = static_cast<__m256i*>(ALIGNED_MALLOC(kernel_prod_aligned * sizeof(__m256i), 64));
__m256i zero_v = _mm256_setzero_si256();
for (int i = 0; i < kernel_prod_aligned / 2; ++i) {
if (2 * i + 1 >= kernel_prod) {
@@ -142,7 +134,7 @@ PackedDepthWiseConvMatrix::PackedDepthWiseConvMatrix(
}
// Interleave 4 SIMD registers
- __m256i* b_interleaved_epi32 = new __m256i[kernel_prod_aligned];
+ __m256i* b_interleaved_epi32 = static_cast<__m256i*>(ALIGNED_MALLOC(kernel_prod_aligned * sizeof(__m256i), 64));
for (int i = 0; i < kernel_prod_aligned / 4; ++i) {
b_interleaved_epi32[4 * i] = _mm256_unpacklo_epi16(
b_interleaved_epi16[4 * i], b_interleaved_epi16[4 * i + 2]);
@@ -164,9 +156,9 @@ PackedDepthWiseConvMatrix::PackedDepthWiseConvMatrix(
b_interleaved_epi32[i]);
}
- delete[] b_v;
- delete[] b_interleaved_epi16;
- delete[] b_interleaved_epi32;
+ FREE(b_v);
+ FREE(b_interleaved_epi16);
+ FREE(b_interleaved_epi32);
}
FREE(smat_transposed);
}