Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/google/ruy.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/ruy/BUILD
diff options
context:
space:
mode:
authorBenoit Jacob <benoitjacob@google.com>2020-07-13 20:27:06 +0300
committerCopybara-Service <copybara-worker@google.com>2020-07-13 20:27:26 +0300
commit7784e18d9f29e01ce16a62dfa05d58007f1c021c (patch)
tree82553fd6961e328da9800e2c3bd134890155961d /ruy/BUILD
parent27d16d0b47ad31a81aa1d7b044a4a2162159d928 (diff)
FMA is technically a separate ISA extension from AVX2.
In practice, at least Intel CPUs supporting AVX2 also support FMA. We have always chosen to only implement a code path for AVX2+FMA, not AVX2 without FMA. At some point we had also fixed our internal ruy_copts_avx2() to pass -mfma in addition to -mavx2. So our code was technically correct. But it was a bit misleading because this AVX2+FMA path was named just AVX2. One area where this has led to confusion, has been benchmarking against other libraries that rely on the user manually passing copts to enable ISA extensions (header-only libraries) and that are rigorous about only using FMA instructions if enabled without assuming that AVX2 implies it. Concretely, Benchmarking against Eigen with -mavx2 leads to the false impression that ruy is 2x faster in its AVX2 code path, while benchmarking with -mavx2 -mfma paints the correct picture that ruy is only about 5% faster. PiperOrigin-RevId: 320982698
Diffstat (limited to 'ruy/BUILD')
-rw-r--r--ruy/BUILD26
1 files changed, 13 insertions, 13 deletions
diff --git a/ruy/BUILD b/ruy/BUILD
index f595139..a8e6717 100644
--- a/ruy/BUILD
+++ b/ruy/BUILD
@@ -1,7 +1,7 @@
# Ruy is not BLAS
load("@bazel_skylib//lib:selects.bzl", "selects")
-load(":build_defs.bzl", "ruy_copts", "ruy_copts_avx2", "ruy_copts_avx512")
+load(":build_defs.bzl", "ruy_copts", "ruy_copts_avx2_fma", "ruy_copts_avx512")
load(":build_defs.oss.bzl", "ruy_linkopts_thread_standard_library")
load(":ruy_test_ext.oss.bzl", "ruy_test_ext_defines", "ruy_test_ext_deps")
load(":ruy_test.bzl", "ruy_benchmark", "ruy_test")
@@ -624,14 +624,14 @@ cc_library(
)
cc_library(
- name = "kernel_avx2",
+ name = "kernel_avx2_fma",
srcs = [
- "kernel_avx2.cc",
+ "kernel_avx2_fma.cc",
],
hdrs = [
"kernel_x86.h",
],
- copts = ruy_copts() + ruy_copts_avx2(),
+ copts = ruy_copts() + ruy_copts_avx2_fma(),
deps = [
":check_macros",
":kernel_common",
@@ -646,14 +646,14 @@ cc_library(
)
cc_library(
- name = "pack_avx2",
+ name = "pack_avx2_fma",
srcs = [
- "pack_avx2.cc",
+ "pack_avx2_fma.cc",
],
hdrs = [
"pack_x86.h",
],
- copts = ruy_copts() + ruy_copts_avx2(),
+ copts = ruy_copts() + ruy_copts_avx2_fma(),
deps = [
":check_macros",
":mat",
@@ -667,14 +667,14 @@ cc_library(
)
cc_library(
- name = "have_built_path_for_avx2",
+ name = "have_built_path_for_avx2_fma",
srcs = [
- "have_built_path_for_avx2.cc",
+ "have_built_path_for_avx2_fma.cc",
],
hdrs = [
"have_built_path_for.h",
],
- copts = ruy_copts() + ruy_copts_avx2(),
+ copts = ruy_copts() + ruy_copts_avx2_fma(),
deps = [
":opt_set",
":platform",
@@ -691,7 +691,7 @@ cc_library(
":apply_multiplier",
":check_macros",
":kernel_arm", # fixdeps: keep
- ":kernel_avx2", # fixdeps: keep
+ ":kernel_avx2_fma", # fixdeps: keep
":kernel_avx512", # fixdeps: keep
":kernel_common",
":mat",
@@ -719,7 +719,7 @@ cc_library(
":matrix",
":opt_set",
":pack_arm", # fixdeps: keep
- ":pack_avx2", # fixdeps: keep
+ ":pack_avx2_fma", # fixdeps: keep
":pack_avx512", # fixdeps: keep
":pack_common",
":path",
@@ -735,7 +735,7 @@ cc_library(
"have_built_path_for.h",
],
deps = [
- ":have_built_path_for_avx2",
+ ":have_built_path_for_avx2_fma",
":have_built_path_for_avx512",
":platform",
],