Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/integrator/path_trace.cpp3
-rw-r--r--intern/cycles/util/simd.h21
2 files changed, 24 insertions, 0 deletions
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index ab134179602..f1e70b7f28f 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -355,6 +355,9 @@ void PathTrace::path_trace(RenderWork &render_work)
const int num_works = path_trace_works_.size();
+ tbb::task_group_context *tbb_ctx = tbb::task::self().group();
+ tbb_ctx->capture_fp_settings();
+
tbb::parallel_for(0, num_works, [&](int i) {
const double work_start_time = time_dt();
const int num_samples = render_work.path_trace.num_samples;
diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h
index 15dda4e76a8..6772025d1de 100644
--- a/intern/cycles/util/simd.h
+++ b/intern/cycles/util/simd.h
@@ -32,6 +32,12 @@
# define SIMD_SET_FLUSH_TO_ZERO \
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define _MM_FLUSH_ZERO_ON 24
+#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
+#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
+# define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON);
+# define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON)
#else
# define SIMD_SET_FLUSH_TO_ZERO
#endif
@@ -105,6 +111,21 @@ static struct StepTy {
} step ccl_attr_maybe_unused;
#endif
+#if defined(__aarch64__) || defined(_M_ARM64)
+__forceinline int set_fz(uint32_t flag) {
+ uint64_t old_fpcr, new_fpcr;
+ __get_fpcr(old_fpcr);
+ new_fpcr = old_fpcr | (1ULL << flag);
+ __set_fpcr(new_fpcr);
+ __get_fpcr(old_fpcr);
+ return old_fpcr == new_fpcr;
+}
+__forceinline int get_fz(uint32_t flag) {
+ uint64_t cur_fpcr;
+ __get_fpcr(cur_fpcr);
+ return (cur_fpcr & (1ULL<< flag)) > 0 ? 1 : 0 ;
+}
+#endif
/* Utilities used by Neon */
#if defined(__KERNEL_NEON__)