From a3abb020e37a072eb71fd30de9ab125d1c16623a Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 2 Oct 2016 14:48:39 +0200 Subject: Fix Cycles CUDA performance on CUDA 8.0. Mostly this is making inlining match CUDA 7.5 in a few performance critical places. The end result is that performance is now better than before, possibly due to less register spilling or other CUDA 8.0 compiler improvements. On benchmarks scenes, there are 3% to 35% render time reductions. Stack memory usage is reduced a little too. Reviewed By: sergey Differential Revision: https://developer.blender.org/D2269 --- intern/cycles/kernel/closure/bssrdf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'intern/cycles/kernel/closure/bssrdf.h') diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 35c95768b69..af0bbd861a9 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -141,7 +141,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r) } /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ -ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi) +ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi) { /* newton-raphson iteration, usually succeeds in 2-4 iterations, except * outside 0.02 ... 0.98 where it can go up to 10, so overall performance @@ -255,7 +255,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r) * Returns scaled radius, meaning the result is to be scaled up by d. * Since there's no closed form solution we do Newton-Raphson method to find it. */ -ccl_device_inline float bssrdf_burley_root_find(float xi) +ccl_device_forceinline float bssrdf_burley_root_find(float xi) { const float tolerance = 1e-6f; const int max_iteration_count = 10; @@ -389,7 +389,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float bssrdf_burley_sample(sc, xi, r, h); } -ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r) +ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r) { if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) return bssrdf_cubic_pdf(sc, r); -- cgit v1.2.3