diff options
author | Benoit Jacob <benoitjacob@google.com> | 2020-07-13 20:27:06 +0300 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2020-07-13 20:27:26 +0300 |
commit | 7784e18d9f29e01ce16a62dfa05d58007f1c021c (patch) | |
tree | 82553fd6961e328da9800e2c3bd134890155961d /ruy/BUILD | |
parent | 27d16d0b47ad31a81aa1d7b044a4a2162159d928 (diff) |
FMA is technically a separate ISA extension from AVX2.
In practice, at least Intel CPUs supporting AVX2 also support FMA.
We have always chosen to only implement a code path for AVX2+FMA, not AVX2 without FMA. At some point we had also fixed our internal ruy_copts_avx2() to pass -mfma in addition to -mavx2. So our code was technically correct. But it was a bit misleading because this AVX2+FMA path was named just AVX2.
One area where this has led to confusion, has been benchmarking against other libraries that rely on the user manually passing copts to enable ISA extensions (header-only libraries) and that are rigorous about only using FMA instructions if enabled without assuming that AVX2 implies it. Concretely, Benchmarking against Eigen with -mavx2 leads to the false impression that ruy is 2x faster in its AVX2 code path, while benchmarking with -mavx2 -mfma paints the correct picture that ruy is only about 5% faster.
PiperOrigin-RevId: 320982698
Diffstat (limited to 'ruy/BUILD')
-rw-r--r-- | ruy/BUILD | 26 |
1 files changed, 13 insertions, 13 deletions
@@ -1,7 +1,7 @@ # Ruy is not BLAS load("@bazel_skylib//lib:selects.bzl", "selects") -load(":build_defs.bzl", "ruy_copts", "ruy_copts_avx2", "ruy_copts_avx512") +load(":build_defs.bzl", "ruy_copts", "ruy_copts_avx2_fma", "ruy_copts_avx512") load(":build_defs.oss.bzl", "ruy_linkopts_thread_standard_library") load(":ruy_test_ext.oss.bzl", "ruy_test_ext_defines", "ruy_test_ext_deps") load(":ruy_test.bzl", "ruy_benchmark", "ruy_test") @@ -624,14 +624,14 @@ cc_library( ) cc_library( - name = "kernel_avx2", + name = "kernel_avx2_fma", srcs = [ - "kernel_avx2.cc", + "kernel_avx2_fma.cc", ], hdrs = [ "kernel_x86.h", ], - copts = ruy_copts() + ruy_copts_avx2(), + copts = ruy_copts() + ruy_copts_avx2_fma(), deps = [ ":check_macros", ":kernel_common", @@ -646,14 +646,14 @@ cc_library( ) cc_library( - name = "pack_avx2", + name = "pack_avx2_fma", srcs = [ - "pack_avx2.cc", + "pack_avx2_fma.cc", ], hdrs = [ "pack_x86.h", ], - copts = ruy_copts() + ruy_copts_avx2(), + copts = ruy_copts() + ruy_copts_avx2_fma(), deps = [ ":check_macros", ":mat", @@ -667,14 +667,14 @@ cc_library( ) cc_library( - name = "have_built_path_for_avx2", + name = "have_built_path_for_avx2_fma", srcs = [ - "have_built_path_for_avx2.cc", + "have_built_path_for_avx2_fma.cc", ], hdrs = [ "have_built_path_for.h", ], - copts = ruy_copts() + ruy_copts_avx2(), + copts = ruy_copts() + ruy_copts_avx2_fma(), deps = [ ":opt_set", ":platform", @@ -691,7 +691,7 @@ cc_library( ":apply_multiplier", ":check_macros", ":kernel_arm", # fixdeps: keep - ":kernel_avx2", # fixdeps: keep + ":kernel_avx2_fma", # fixdeps: keep ":kernel_avx512", # fixdeps: keep ":kernel_common", ":mat", @@ -719,7 +719,7 @@ cc_library( ":matrix", ":opt_set", ":pack_arm", # fixdeps: keep - ":pack_avx2", # fixdeps: keep + ":pack_avx2_fma", # fixdeps: keep ":pack_avx512", # fixdeps: keep ":pack_common", ":path", @@ -735,7 +735,7 @@ cc_library( "have_built_path_for.h", ], deps = [ - ":have_built_path_for_avx2", + ":have_built_path_for_avx2_fma", ":have_built_path_for_avx512", ":platform", ], |