Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/google/ruy.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenoit Jacob <benoitjacob@google.com>2020-10-28 04:56:14 +0300
committerCopybara-Service <copybara-worker@google.com>2020-10-28 04:56:37 +0300
commitd79362c24fd70eab3196672273dbfd8f0dd6124c (patch)
tree2da03f4beccf06cee1a8e825b07247f1ca4e3bfc
parent7e1d379cd05679d86c0113ce271a6df8c51fed3a (diff)
MSVC fixes:
1. Do not pass -O3 on MSVC (the need for such custom flags is only on mobile builds anyway). 2. Pass /arch:AVX* flags, actually enabling SIMD paths on MSVC 3. Fix the #if logic to detect AVX2+FMA on MSVX: /arch:AVX2 enables FMA but does not define __FMA__. PiperOrigin-RevId: 339375591
-rw-r--r--ruy/BUILD25
-rw-r--r--ruy/build_defs.bzl25
-rw-r--r--ruy/platform.h2
3 files changed, 37 insertions, 15 deletions
diff --git a/ruy/BUILD b/ruy/BUILD
index 59218d4..2bdfd41 100644
--- a/ruy/BUILD
+++ b/ruy/BUILD
@@ -40,8 +40,11 @@ config_setting(
values = {"cpu": "haswell"},
)
+# MSVC toolchains define a different "cpu" value, which helps us as we need
+# to pass different flags on MSVC vs GCC-compatible toolchains to enable
+# x86 SIMD extensions.
selects.config_setting_group(
- name = "x86_64",
+ name = "x86_64_and_not_msvc",
match_any = [
":x86_64_k8",
":x86_64_haswell",
@@ -68,12 +71,28 @@ config_setting(
)
config_setting(
- name = "optimized",
+ name = "dbg_build",
values = {
- "compilation_mode": "opt",
+ "compilation_mode": "dbg",
},
)
+config_setting(
+ name = "fastbuild_build",
+ values = {
+ "compilation_mode": "fastbuild",
+ },
+)
+
+selects.config_setting_group(
+ name = "do_not_want_O3",
+ match_any = [
+ "@bazel_tools//src/conditions:windows_msvc",
+ ":dbg_build",
+ ":fastbuild_build",
+ ],
+)
+
cc_library(
name = "platform",
hdrs = ["platform.h"],
diff --git a/ruy/build_defs.bzl b/ruy/build_defs.bzl
index e2fc325..836f47a 100644
--- a/ruy/build_defs.bzl
+++ b/ruy/build_defs.bzl
@@ -46,31 +46,34 @@ def ruy_copts_optimize():
# optimizing for speed is the better compromise, so we override that.
# Careful to keep debug builds debuggable, whence the select based
# on the compilation mode.
- "//ruy:optimized": ["-O3"],
- "//conditions:default": [],
+ "//ruy:do_not_want_O3": [],
+ "//conditions:default": ["-O3"],
})
# Returns compiler flags to use for all ruy code.
def ruy_copts():
return ruy_copts_warnings() + ruy_copts_neon() + ruy_copts_optimize()
-def ruy_copts_avx512():
- # In some clang-based toolchains, in the default compilation mode (not -c opt),
- # heavy spillage in the AVX512 kernels results in stack frames > 50k. This issue does not exist
- # in optimized builds (-c opt).
+def ruy_copts_avx():
return select({
- "//ruy:x86_64": ["$(STACK_FRAME_UNLIMITED)", "-mavx512f", "-mavx512vl", "-mavx512cd", "-mavx512bw", "-mavx512dq"],
+ "//ruy:x86_64_and_not_msvc": ["-mavx"],
+ "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX"],
"//conditions:default": [],
})
-def ruy_copts_avx():
+def ruy_copts_avx2_fma():
return select({
- "//ruy:x86_64": ["-mavx"],
+ "//ruy:x86_64_and_not_msvc": ["-mavx2", "-mfma"],
+ "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX2"],
"//conditions:default": [],
})
-def ruy_copts_avx2_fma():
+def ruy_copts_avx512():
+ # In some clang-based toolchains, in the default compilation mode (not -c opt),
+ # heavy spillage in the AVX512 kernels results in stack frames > 50k. This issue does not exist
+ # in optimized builds (-c opt).
return select({
- "//ruy:x86_64": ["-mavx2", "-mfma"],
+ "//ruy:x86_64_and_not_msvc": ["$(STACK_FRAME_UNLIMITED)", "-mavx512f", "-mavx512vl", "-mavx512cd", "-mavx512bw", "-mavx512dq"],
+ "@bazel_tools//src/conditions:windows_msvc": ["/arch:AVX512"],
"//conditions:default": [],
})
diff --git a/ruy/platform.h b/ruy/platform.h
index 942feac..ffffeb1 100644
--- a/ruy/platform.h
+++ b/ruy/platform.h
@@ -137,7 +137,7 @@ limitations under the License.
#endif
#if RUY_PLATFORM_X86_ENHANCEMENTS && RUY_PLATFORM_X86 && defined(__AVX2__) && \
- defined(__FMA__)
+ (defined(__FMA__) || defined(_MSC_VER))
#define RUY_PLATFORM_AVX2_FMA 1
#else
#define RUY_PLATFORM_AVX2_FMA 0