Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/PackWithQuantRowOffset.cc')
-rw-r--r--src/PackWithQuantRowOffset.cc46
1 files changed, 37 insertions, 9 deletions
diff --git a/src/PackWithQuantRowOffset.cc b/src/PackWithQuantRowOffset.cc
index 74eaade..5f60faa 100644
--- a/src/PackWithQuantRowOffset.cc
+++ b/src/PackWithQuantRowOffset.cc
@@ -60,13 +60,13 @@ PackAWithQuantRowOffset<T, accT>::PackAWithQuantRowOffset(
BaseType::buf_ = pmat;
} else {
BaseType::bufAllocatedHere_ = true;
- BaseType::buf_ =
- (T*)aligned_alloc(64, BaseType::brow_ * BaseType::bcol_ * sizeof(T));
+ BaseType::buf_ = (T*)fbgemmAlignedAlloc(
+ 64, BaseType::brow_ * BaseType::bcol_ * sizeof(T));
}
if (!row_offset_) {
rowOffsetAllocatedHere = true;
row_offset_ = reinterpret_cast<int32_t*>(
- aligned_alloc(64, BaseType::brow_ * sizeof(accT)));
+ fbgemmAlignedAlloc(64, BaseType::brow_ * sizeof(accT)));
}
}
@@ -109,12 +109,40 @@ void PackAWithQuantRowOffset<T, accT>::pack(const block_type_t& block) {
constexpr int VLEN = 8;
__m256 inverse_scale_v = _mm256_set1_ps(1.0f / scale_);
__m256i shuffle_mask_v = _mm256_set_epi8(
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x0c, 0x08, 0x04, 0x00,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0x0c, 0x08, 0x04, 0x00);
- __m256i permute_mask_v = _mm256_set_epi32(
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00);
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x0c,
+ 0x08,
+ 0x04,
+ 0x00,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x0c,
+ 0x08,
+ 0x04,
+ 0x00);
+ __m256i permute_mask_v =
+ _mm256_set_epi32(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00);
#endif
for (int i = 0; i < block.row_size; ++i) {