Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2018-11-12 23:45:50 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-11-13 18:23:55 +0300
commit56d9537efc00d7741486b7539f13e0abae6ef1df (patch)
tree4bb869cf66c1e991ff1465582b870f3ccb30cdb5 /src/Utils.cc
parente1b3a99806a41cd4a54994568779b87f41ac2557 (diff)
refactoring transpose code and comment
Summary: Move ISA specific code to fbgemm::internal namespace Add comments Reviewed By: jianyuh Differential Revision: D13025640 fbshipit-source-id: d06be4a9f2f1687110497f30517db094aee578fc
Diffstat (limited to 'src/Utils.cc')
-rw-r--r--src/Utils.cc14
1 files changed, 9 insertions, 5 deletions
diff --git a/src/Utils.cc b/src/Utils.cc
index 45aafd3..d8b7b1d 100644
--- a/src/Utils.cc
+++ b/src/Utils.cc
@@ -174,11 +174,11 @@ inline void transpose_ref(
int ld_src,
float* dst,
int ld_dst) {
- for (int i = 0; i < M; i++) {
- for (int j = 0; j < N; j++) {
+ for (int j = 0; j < N; j++) {
+ for (int i = 0; i < M; i++) {
dst[i + j * ld_dst] = src[i * ld_src + j];
}
- }
+ } // for each output row
}
inline void
@@ -314,6 +314,8 @@ inline void transpose_kernel_8x8_avx2(
_mm256_storeu_ps(&dst[7 * ld_dst], h);
}
+namespace internal {
+
void transpose_8x8(
int M,
int N,
@@ -332,6 +334,8 @@ void transpose_8x8(
transpose_4x4(M - ib, N, &src[ib * ld_src], ld_src, &dst[ib], ld_dst);
}
+} // namspace internal
+
void transpose_simd(
int M,
int N,
@@ -342,9 +346,9 @@ void transpose_simd(
// Run time CPU detection
if (cpuinfo_initialize()) {
if (cpuinfo_has_x86_avx512f()) {
- transpose_16x16(M, N, src, ld_src, dst, ld_dst);
+ internal::transpose_16x16(M, N, src, ld_src, dst, ld_dst);
} else if (cpuinfo_has_x86_avx2()) {
- transpose_8x8(M, N, src, ld_src, dst, ld_dst);
+ internal::transpose_8x8(M, N, src, ld_src, dst, ld_dst);
} else {
transpose_ref(M, N, src, ld_src, dst, ld_dst);
return;