Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2019-09-05 21:02:49 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2019-09-13 18:02:28 +0300
commit30c7e3ab2d11723977ee2402c28f75b92280650b (patch)
treeb53d3c8da8b2715f6b791051913067e47e6b82c5
parent41dbd6bf88522fd30f0d92b4fe36f89f67898edf (diff)
Change order of loops in multiplication
-rw-r--r--multiply.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/multiply.h b/multiply.h
index a13437a..db19e5b 100644
--- a/multiply.h
+++ b/multiply.h
@@ -501,11 +501,11 @@ INTGEMM_SSSE3 inline static void InnerINTGEMM_SSSE3(
assert(reinterpret_cast<uintptr_t>(B) % sizeof(Integer) == 0); \
const int simd_width = width / sizeof(Integer); \
auto callback_impl = callbacks::CallbackImpl<cpu_type, Callback>(callback); \
- const Integer *B0_col = reinterpret_cast<const Integer*>(B); \
/*Go over 8 columns of B at a time.*/ \
- for (Index B0_colidx = 0; B0_colidx != B_cols; B0_col += 8 * simd_width, B0_colidx += 8) { \
+ for (Index A_rowidx = 0; A_rowidx < A_rows; ++A_rowidx) { \
+ const Integer *B0_col = reinterpret_cast<const Integer*>(B); \
+ for (Index B0_colidx = 0; B0_colidx != B_cols; B0_col += 8 * simd_width, B0_colidx += 8) { \
/*Process one row of A at a time. Doesn't seem to be faster to do multiple rows of A at once.*/ \
- for (Index A_rowidx = 0; A_rowidx < A_rows; ++A_rowidx) { \
/*Iterate over shared (inner) dimension.*/ \
const Integer *A_live = reinterpret_cast<const Integer *>(A + A_rowidx * width); \
const Integer *A_end = A_live + simd_width; \