diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:40:46 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:54:29 +0300 |
commit | 6353ecb996898b4ce2fe8065130ed1f5ea3b6989 (patch) | |
tree | b6d620152e4ff7920465d8396fe443dc9b3ffc56 /intern/cycles/kernel/closure/bssrdf.h | |
parent | 7065022f7aa23ba13d2999e1e40162a8f480af0e (diff) |
Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions,
so they match how inlining worked with previous toolkit.
This make kernel compiled by CUDA 8 render in average with same speed
as previous kernels. Some scenes are somewhat faster, some of them are
somewhat slower. But slowdown is within 1% so far.
On a positive side it allows us to enable newer generation cards on
buildbots (so GTX 10x0 will be officially supported soon).
Diffstat (limited to 'intern/cycles/kernel/closure/bssrdf.h')
-rw-r--r-- | intern/cycles/kernel/closure/bssrdf.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index a260ae9a31b..35c95768b69 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -141,7 +141,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r) } /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ -ccl_device float bssrdf_cubic_quintic_root_find(float xi) +ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi) { /* newton-raphson iteration, usually succeeds in 2-4 iterations, except * outside 0.02 ... 0.98 where it can go up to 10, so overall performance @@ -255,7 +255,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r) * Returns scaled radius, meaning the result is to be scaled up by d. * Since there's no closed form solution we do Newton-Raphson method to find it. */ -ccl_device float bssrdf_burley_root_find(float xi) +ccl_device_inline float bssrdf_burley_root_find(float xi) { const float tolerance = 1e-6f; const int max_iteration_count = 10; @@ -389,7 +389,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float bssrdf_burley_sample(sc, xi, r, h); } -ccl_device float bssrdf_pdf(const ShaderClosure *sc, float r) +ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r) { if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) return bssrdf_cubic_pdf(sc, r); |