diff options
author | Benoit Jacob <benoitjacob@google.com> | 2020-10-28 04:56:14 +0300 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2020-10-28 04:56:37 +0300 |
commit | d79362c24fd70eab3196672273dbfd8f0dd6124c (patch) | |
tree | 2da03f4beccf06cee1a8e825b07247f1ca4e3bfc | |
parent | 7e1d379cd05679d86c0113ce271a6df8c51fed3a (diff) |
MSVC fixes:
1. Do not pass -O3 on MSVC (the need for such custom flags is only on mobile builds anyway).
2. Pass /arch:AVX* flags, actually enabling SIMD paths on MSVC
3. Fix the #if logic to detect AVX2+FMA on MSVX: /arch:AVX2 enables FMA but does not define __FMA__.
PiperOrigin-RevId: 339375591
-rw-r--r-- | ruy/BUILD | 25 | ||||
-rw-r--r-- | ruy/build_defs.bzl | 25 | ||||
-rw-r--r-- | ruy/platform.h | 2 |
3 files changed, 37 insertions, 15 deletions
@@ -40,8 +40,11 @@ config_setting( values = {"cpu": "haswell"}, ) +# MSVC toolchains define a different "cpu" value, which helps us as we need +# to pass different flags on MSVC vs GCC-compatible toolchains to enable +# x86 SIMD extensions. selects.config_setting_group( - name = "x86_64", + name = "x86_64_and_not_msvc", match_any = [ ":x86_64_k8", ":x86_64_haswell", @@ -68,12 +71,28 @@ config_setting( ) config_setting( - name = "optimized", + name = "dbg_build", values = { - "compilation_mode": "opt", + "compilation_mode": "dbg", }, ) +config_setting( + name = "fastbuild_build", + values = { + "compilation_mode": "fastbuild", + }, +) + +selects.config_setting_group( + name = "do_not_want_O3", + match_any = [ + "@bazel_tools//src/conditions:windows_msvc", + ":dbg_build", + ":fastbuild_build", + ], +) + cc_library( name = "platform", hdrs = ["platform.h"], diff --git a/ruy/build_defs.bzl b/ruy/build_defs.bzl index e2fc325..836f47a 100644 --- a/ruy/build_defs.bzl +++ b/ruy/build_defs.bzl @@ -46,31 +46,34 @@ def ruy_copts_optimize(): # optimizing for speed is the better compromise, so we override that. # Careful to keep debug builds debuggable, whence the select based # on the compilation mode. - "//ruy:optimized": ["-O3"], - "//conditions:default": [], + "//ruy:do_not_want_O3": [], + "//conditions:default": ["-O3"], }) # Returns compiler flags to use for all ruy code. def ruy_copts(): return ruy_copts_warnings() + ruy_copts_neon() + ruy_copts_optimize() -def ruy_copts_avx512(): - # In some clang-based toolchains, in the default compilation mode (not -c opt), - # heavy spillage in the AVX512 kernels results in stack frames > 50k. This issue does not exist - # in optimized builds (-c opt). +def ruy_copts_avx(): return select({ - "//ruy:x86_64": ["$(STACK_FRAME_UNLIMITED)", "-mavx512f", "-mavx512vl", "-mavx512cd", "-mavx512bw", "-mavx512dq"], + "//ruy:x86_64_and_not_msvc": ["-mavx"], + "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX"], "//conditions:default": [], }) -def ruy_copts_avx(): +def ruy_copts_avx2_fma(): return select({ - "//ruy:x86_64": ["-mavx"], + "//ruy:x86_64_and_not_msvc": ["-mavx2", "-mfma"], + "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX2"], "//conditions:default": [], }) -def ruy_copts_avx2_fma(): +def ruy_copts_avx512(): + # In some clang-based toolchains, in the default compilation mode (not -c opt), + # heavy spillage in the AVX512 kernels results in stack frames > 50k. This issue does not exist + # in optimized builds (-c opt). return select({ - "//ruy:x86_64": ["-mavx2", "-mfma"], + "//ruy:x86_64_and_not_msvc": ["$(STACK_FRAME_UNLIMITED)", "-mavx512f", "-mavx512vl", "-mavx512cd", "-mavx512bw", "-mavx512dq"], + "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX512"], "//conditions:default": [], }) diff --git a/ruy/platform.h b/ruy/platform.h index 942feac..ffffeb1 100644 --- a/ruy/platform.h +++ b/ruy/platform.h @@ -137,7 +137,7 @@ limitations under the License. #endif #if RUY_PLATFORM_X86_ENHANCEMENTS && RUY_PLATFORM_X86 && defined(__AVX2__) && \ - defined(__FMA__) + (defined(__FMA__) || defined(_MSC_VER)) #define RUY_PLATFORM_AVX2_FMA 1 #else #define RUY_PLATFORM_AVX2_FMA 0 |