diff options
-rw-r--r-- | intern/cycles/integrator/path_trace.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/util/simd.h | 21 |
2 files changed, 24 insertions, 0 deletions
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index ab134179602..f1e70b7f28f 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -355,6 +355,9 @@ void PathTrace::path_trace(RenderWork &render_work) const int num_works = path_trace_works_.size(); + tbb::task_group_context *tbb_ctx = tbb::task::self().group(); + tbb_ctx->capture_fp_settings(); + tbb::parallel_for(0, num_works, [&](int i) { const double work_start_time = time_dt(); const int num_samples = render_work.path_trace.num_samples; diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h index 15dda4e76a8..6772025d1de 100644 --- a/intern/cycles/util/simd.h +++ b/intern/cycles/util/simd.h @@ -32,6 +32,12 @@ # define SIMD_SET_FLUSH_TO_ZERO \ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#elif defined(__aarch64__) || defined(_M_ARM64) +#define _MM_FLUSH_ZERO_ON 24 +#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr)) +#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr)) +# define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON); +# define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON) #else # define SIMD_SET_FLUSH_TO_ZERO #endif @@ -105,6 +111,21 @@ static struct StepTy { } step ccl_attr_maybe_unused; #endif +#if defined(__aarch64__) || defined(_M_ARM64) +__forceinline int set_fz(uint32_t flag) { + uint64_t old_fpcr, new_fpcr; + __get_fpcr(old_fpcr); + new_fpcr = old_fpcr | (1ULL << flag); + __set_fpcr(new_fpcr); + __get_fpcr(old_fpcr); + return old_fpcr == new_fpcr; +} +__forceinline int get_fz(uint32_t flag) { + uint64_t cur_fpcr; + __get_fpcr(cur_fpcr); + return (cur_fpcr & (1ULL<< flag)) > 0 ? 1 : 0 ; +} +#endif /* Utilities used by Neon */ #if defined(__KERNEL_NEON__) |