Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZafar Takhirov <zaf@fb.com>2020-04-11 01:36:53 +0300
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2020-04-11 01:39:59 +0300
commit41e97f33032e4a6a01b648f15c2971ceebe7030c (patch)
treee321a0eb13a61da50555741ba6962f2bce143db2
parent15e3b9c3ada9b027931211f03c434a9b73ca7740 (diff)
Revert D20956364: Move `transpose_simd` to TransposeUtils.cc
Differential Revision: D20956364 Original commit changeset: 5ec1c8261a81 fbshipit-source-id: fa1875d9b9d0837c9115f6a624056984c92b2aeb
-rw-r--r--BUILD.bazel30
-rw-r--r--CMakeLists.txt2
-rw-r--r--defs.bzl15
-rw-r--r--src/FbgemmBfloat16Convert.cc15
-rw-r--r--src/FbgemmFloat16Convert.cc24
-rw-r--r--src/RefImplementations.cc40
-rw-r--r--src/TransposeUtils.cc52
-rw-r--r--src/Utils.cc39
8 files changed, 98 insertions, 119 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 4f75644..6173642 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1,19 +1,13 @@
load("@bazel_skylib//lib:paths.bzl", "paths")
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
-load("defs.bzl", "get_fbgemm_avx2_srcs", "get_fbgemm_avx512_srcs", "get_fbgemm_base_srcs", "get_fbgemm_generic_srcs", "get_fbgemm_public_headers", "get_fbgemm_tests")
+load("defs.bzl", "get_fbgemm_generic_srcs", "get_fbgemm_avx2_srcs", "get_fbgemm_avx512_srcs", "get_fbgemm_public_headers")
cc_library(
- name = "fbgemm_base",
- srcs = get_fbgemm_base_srcs() + glob(["src/*.h"]),
- includes = [
- "src",
- ],
- deps = [
- ":fbgemm_headers",
- "@cpuinfo",
- "@asmjit",
+ name = "fbgemm_src_headers",
+ hdrs = [
+ "src/RefImplementations.h",
],
- linkstatic = 1,
+ include_prefix = "fbgemm",
)
cc_library(
@@ -26,7 +20,10 @@ cc_library(
deps = [
":fbgemm_avx2",
":fbgemm_avx512",
- ":fbgemm_base",
+ ":fbgemm_headers",
+ ":fbgemm_src_headers",
+ "@asmjit",
+ "@cpuinfo",
],
linkstatic = 1,
)
@@ -42,7 +39,8 @@ cc_library(
"-masm=intel",
],
deps = [
- ":fbgemm_base",
+ ":fbgemm_headers",
+ "@asmjit",
],
linkstatic = 1,
)
@@ -60,7 +58,7 @@ cc_library(
"-masm=intel",
],
deps = [
- ":fbgemm_base",
+ ":fbgemm_headers",
],
linkstatic = 1,
)
@@ -99,12 +97,12 @@ cc_library(
[
cc_test(
name = paths.split_extension(paths.basename(filename))[0],
- size = "medium",
+ size = "small",
srcs = [
filename,
],
deps = [
":test_utils",
],
- ) for filename in get_fbgemm_tests()
+ ) for filename in ["test/GConvTest.cc", "test/TransposeTest.cc", "test/QuantUtilsTest.cc", "test/I64Test.cc"]
]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 946855c..ce27196 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,7 @@ else()
endif()
# Define file lists
-get_filelist("get_fbgemm_generic_srcs(with_base=True)" FBGEMM_GENERIC_SRCS)
+get_filelist("get_fbgemm_generic_srcs()" FBGEMM_GENERIC_SRCS)
get_filelist("get_fbgemm_avx2_srcs(msvc=${MSVC_BOOL})" FBGEMM_AVX2_SRCS)
get_filelist("get_fbgemm_avx512_srcs(msvc=${MSVC_BOOL})" FBGEMM_AVX512_SRCS)
get_filelist("get_fbgemm_public_headers()" FBGEMM_PUBLIC_HEADERS)
diff --git a/defs.bzl b/defs.bzl
index bf67a3b..f46b118 100644
--- a/defs.bzl
+++ b/defs.bzl
@@ -1,11 +1,4 @@
-def get_fbgemm_base_srcs():
- return [
- "src/GenerateI8Depthwise.cc",
- "src/RefImplementations.cc",
- "src/Utils.cc",
- ]
-
-def get_fbgemm_generic_srcs(with_base = False):
+def get_fbgemm_generic_srcs():
return [
"src/EmbeddingSpMDM.cc",
"src/EmbeddingSpMDMNBit.cc",
@@ -18,6 +11,7 @@ def get_fbgemm_generic_srcs(with_base = False):
"src/FbgemmFloat16Convert.cc",
"src/FbgemmI64.cc",
"src/FbgemmI8Spmdm.cc",
+ "src/GenerateI8Depthwise.cc",
"src/GenerateKernelU8S8S32ACC16.cc",
"src/GenerateKernelU8S8S32ACC16Avx512.cc", # Acc16 AVX512 JIT code gen
"src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc",
@@ -34,10 +28,11 @@ def get_fbgemm_generic_srcs(with_base = False):
"src/PackWeightMatrixForGConv.cc",
"src/PackWeightsForConv.cc",
"src/QuantUtils.cc",
+ "src/RefImplementations.cc",
"src/RowWiseSparseAdagradFused.cc",
"src/SparseAdagrad.cc",
- "src/TransposeUtils.cc",
- ] + (get_fbgemm_base_srcs() if with_base else [])
+ "src/Utils.cc",
+ ]
def get_fbgemm_public_headers():
return [
diff --git a/src/FbgemmBfloat16Convert.cc b/src/FbgemmBfloat16Convert.cc
index b0b6591..1a528d3 100644
--- a/src/FbgemmBfloat16Convert.cc
+++ b/src/FbgemmBfloat16Convert.cc
@@ -42,6 +42,21 @@ using namespace std;
namespace fbgemm {
+void FloatToBfloat16_ref(const float* src, bfloat16* dst, int size) {
+ for (int i = 0; i < size; i++) {
+ // Add 2^15 and right shift 16 to do round-nearest
+ dst[i] = (*reinterpret_cast<const uint32_t*>(src + i) + (1 << 15)) >> 16;
+ }
+}
+
+void Bfloat16ToFloat_ref(const bfloat16* src, float* dst, int size) {
+ for (int i = 0; i < size; i++) {
+ uint32_t val_fp32 =
+ static_cast<uint32_t>(reinterpret_cast<const uint16_t*>(src)[i]) << 16;
+ reinterpret_cast<uint32_t*>(dst)[i] = val_fp32;
+ }
+}
+
void FloatToBfloat16_simd(const float* src, bfloat16* dst, int size) {
// Run time CPU detection
if (cpuinfo_initialize()) {
diff --git a/src/FbgemmFloat16Convert.cc b/src/FbgemmFloat16Convert.cc
index a714dff..79e40a4 100644
--- a/src/FbgemmFloat16Convert.cc
+++ b/src/FbgemmFloat16Convert.cc
@@ -32,6 +32,30 @@ using namespace std;
namespace fbgemm {
+void FloatToFloat16_ref(
+ const float* src,
+ float16* dst,
+ int size,
+ bool do_clip) {
+ constexpr float FP16_MAX = 65504.f;
+ if (do_clip) {
+ for (int i = 0; i < size; i++) {
+ float cur_src = std::max(-FP16_MAX, std::min(src[i], FP16_MAX));
+ dst[i] = cpu_float2half_rn(cur_src);
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ dst[i] = cpu_float2half_rn(src[i]);
+ }
+ }
+}
+
+void Float16ToFloat_ref(const float16* src, float* dst, int size) {
+ for (int i = 0; i < size; i++) {
+ dst[i] = cpu_half2float(src[i]);
+ }
+}
+
void FloatToFloat16_simd(
const float* src,
float16* dst,
diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc
index 86663ed..d0809db 100644
--- a/src/RefImplementations.cc
+++ b/src/RefImplementations.cc
@@ -8,7 +8,6 @@
#include "./RefImplementations.h"
#include "fbgemm/FbgemmBuild.h"
-#include "fbgemm/FbgemmConvert.h"
#include "fbgemm/Types.h"
#include <algorithm>
@@ -20,45 +19,6 @@ using namespace std;
namespace fbgemm {
-void FloatToFloat16_ref(
- const float* src,
- float16* dst,
- int size,
- bool do_clip) {
- constexpr float FP16_MAX = 65504.f;
- if (do_clip) {
- for (int i = 0; i < size; i++) {
- float cur_src = std::max(-FP16_MAX, std::min(src[i], FP16_MAX));
- dst[i] = cpu_float2half_rn(cur_src);
- }
- } else {
- for (int i = 0; i < size; i++) {
- dst[i] = cpu_float2half_rn(src[i]);
- }
- }
-}
-
-void Float16ToFloat_ref(const float16* src, float* dst, int size) {
- for (int i = 0; i < size; i++) {
- dst[i] = cpu_half2float(src[i]);
- }
-}
-
-void FloatToBfloat16_ref(const float* src, bfloat16* dst, int size) {
- for (int i = 0; i < size; i++) {
- // Add 2^15 and right shift 16 to do round-nearest
- dst[i] = (*reinterpret_cast<const uint32_t*>(src + i) + (1 << 15)) >> 16;
- }
-}
-
-void Bfloat16ToFloat_ref(const bfloat16* src, float* dst, int size) {
- for (int i = 0; i < size; i++) {
- uint32_t val_fp32 =
- static_cast<uint32_t>(reinterpret_cast<const uint16_t*>(src)[i]) << 16;
- reinterpret_cast<uint32_t*>(dst)[i] = val_fp32;
- }
-}
-
void requantize_u8acc32_ref(
int M,
int N,
diff --git a/src/TransposeUtils.cc b/src/TransposeUtils.cc
deleted file mode 100644
index 27afa62..0000000
--- a/src/TransposeUtils.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc. and its affiliates.
- * All rights reserved.
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-#define FBGEMM_EXPORTS
-#include "./TransposeUtils.h"
-#include "fbgemm/Utils.h"
-#include <cstring>
-
-namespace fbgemm {
-
-void transpose_ref(
- int M,
- int N,
- const float* src,
- int ld_src,
- float* dst,
- int ld_dst) {
- for (int j = 0; j < N; j++) {
- for (int i = 0; i < M; i++) {
- dst[i + j * ld_dst] = src[i * ld_src + j];
- }
- } // for each output row
-}
-
-void transpose_simd(
- int M,
- int N,
- const float* src,
- int ld_src,
- float* dst,
- int ld_dst) {
- if ((M == 1 && ld_dst == 1) || (N == 1 && ld_src == 1)) {
- if (dst != src) {
- memcpy(dst, src, M * N * sizeof(float));
- }
- return;
- }
- static const auto iset = fbgemmInstructionSet();
- // Run time CPU detection
- if (isZmm(iset)) {
- internal::transpose_avx512(M, N, src, ld_src, dst, ld_dst);
- } else if (isYmm(iset)) {
- internal::transpose_avx2(M, N, src, ld_src, dst, ld_dst);
- } else {
- transpose_ref(M, N, src, ld_src, dst, ld_dst);
- }
-}
-
-} // namespace fbgemm
diff --git a/src/Utils.cc b/src/Utils.cc
index 51fa2a8..266da6f 100644
--- a/src/Utils.cc
+++ b/src/Utils.cc
@@ -18,6 +18,7 @@
#include <stdexcept>
#include <unordered_map>
#include <unordered_set>
+#include "./TransposeUtils.h"
namespace fbgemm {
@@ -169,6 +170,44 @@ template void printMatrix<int32_t>(
size_t ld,
std::string name);
+void transpose_ref(
+ int M,
+ int N,
+ const float* src,
+ int ld_src,
+ float* dst,
+ int ld_dst) {
+ for (int j = 0; j < N; j++) {
+ for (int i = 0; i < M; i++) {
+ dst[i + j * ld_dst] = src[i * ld_src + j];
+ }
+ } // for each output row
+}
+
+void transpose_simd(
+ int M,
+ int N,
+ const float* src,
+ int ld_src,
+ float* dst,
+ int ld_dst) {
+ if ((M == 1 && ld_dst == 1) || (N == 1 && ld_src == 1)) {
+ if (dst != src) {
+ memcpy(dst, src, M * N * sizeof(float));
+ }
+ return;
+ }
+ static const auto iset = fbgemmInstructionSet();
+ // Run time CPU detection
+ if (isZmm(iset)) {
+ internal::transpose_avx512(M, N, src, ld_src, dst, ld_dst);
+ } else if (isYmm(iset)) {
+ internal::transpose_avx2(M, N, src, ld_src, dst, ld_dst);
+ } else {
+ transpose_ref(M, N, src, ld_src, dst, ld_dst);
+ }
+}
+
namespace {
inst_set_t g_forced_isa = inst_set_t::anyarch;
bool g_Avx512_Ymm_enabled = false;