Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorMichael Jones <michael_jones>2022-04-12 21:36:55 +0300
committerMichael Jones <michael_p_jones@apple.com>2022-04-12 21:43:47 +0300
commit869a46df2980818644db4823fb1d29e9d525b645 (patch)
tree9a049e1c41af9c4e3d830d12f157c453eed12b3e /intern
parent2d472b70e5ae22ece199003f51742a72f485fd87 (diff)
Cycles fp consistency for Apple Silicon CPUs
Propagate the fp settings from the main thread to all the worker threads (the fp settings includes the FZ settings among other things) - this guarantees consistency in execution of floating point math regardless if its executed in tbb thread arena or on main thread Add FZ mode to arm64/aarch64 in parallel to the way its been done on intel processors, currently compiling for arm target does not set this mode at all, hence potentially runs slower and with possible results mismatch with intel x86. Reviewed By: brecht Differential Revision: https://developer.blender.org/D14454
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/integrator/path_trace.cpp3
-rw-r--r--intern/cycles/util/simd.h21
2 files changed, 24 insertions, 0 deletions
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index ab134179602..f1e70b7f28f 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -355,6 +355,9 @@ void PathTrace::path_trace(RenderWork &render_work)
const int num_works = path_trace_works_.size();
+ tbb::task_group_context *tbb_ctx = tbb::task::self().group();
+ tbb_ctx->capture_fp_settings();
+
tbb::parallel_for(0, num_works, [&](int i) {
const double work_start_time = time_dt();
const int num_samples = render_work.path_trace.num_samples;
diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h
index 15dda4e76a8..6772025d1de 100644
--- a/intern/cycles/util/simd.h
+++ b/intern/cycles/util/simd.h
@@ -32,6 +32,12 @@
# define SIMD_SET_FLUSH_TO_ZERO \
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define _MM_FLUSH_ZERO_ON 24
+#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
+#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
+# define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON);
+# define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON)
#else
# define SIMD_SET_FLUSH_TO_ZERO
#endif
@@ -105,6 +111,21 @@ static struct StepTy {
} step ccl_attr_maybe_unused;
#endif
+#if defined(__aarch64__) || defined(_M_ARM64)
+__forceinline int set_fz(uint32_t flag) {
+ uint64_t old_fpcr, new_fpcr;
+ __get_fpcr(old_fpcr);
+ new_fpcr = old_fpcr | (1ULL << flag);
+ __set_fpcr(new_fpcr);
+ __get_fpcr(old_fpcr);
+ return old_fpcr == new_fpcr;
+}
+__forceinline int get_fz(uint32_t flag) {
+ uint64_t cur_fpcr;
+ __get_fpcr(cur_fpcr);
+ return (cur_fpcr & (1ULL<< flag)) > 0 ? 1 : 0 ;
+}
+#endif
/* Utilities used by Neon */
#if defined(__KERNEL_NEON__)