diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:40:46 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:54:29 +0300 |
commit | 6353ecb996898b4ce2fe8065130ed1f5ea3b6989 (patch) | |
tree | b6d620152e4ff7920465d8396fe443dc9b3ffc56 /intern/cycles/kernel/kernel_shader.h | |
parent | 7065022f7aa23ba13d2999e1e40162a8f480af0e (diff) |
Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions,
so they match how inlining worked with previous toolkit.
This make kernel compiled by CUDA 8 render in average with same speed
as previous kernels. Some scenes are somewhat faster, some of them are
somewhat slower. But slowdown is within 1% so far.
On a positive side it allows us to enable newer generation cards on
buildbots (so GTX 10x0 will be officially supported soon).
Diffstat (limited to 'intern/cycles/kernel/kernel_shader.h')
-rw-r--r-- | intern/cycles/kernel/kernel_shader.h | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b7641c37d93..bb3fe933b2c 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -149,8 +149,11 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, /* ShaderData setup from BSSRDF scatter */ #ifdef __SUBSURFACE__ -ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd, - const Intersection *isect, const Ray *ray) +ccl_device void shader_setup_from_subsurface( + KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { bool backfacing = sd->flag & SD_BACKFACING; @@ -226,14 +229,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* ShaderData setup from position sampled on mesh */ -ccl_device void shader_setup_from_sample(KernelGlobals *kg, - ShaderData *sd, - const float3 P, - const float3 Ng, - const float3 I, - int shader, int object, int prim, - float u, float v, float t, - float time) +ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, + ShaderData *sd, + const float3 P, + const float3 Ng, + const float3 I, + int shader, int object, int prim, + float u, float v, float t, + float time) { /* vectors */ ccl_fetch(sd, P) = P; @@ -445,7 +448,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s /* Merging */ #if defined(__BRANCHED_PATH__) || defined(__VOLUME__) -ccl_device void shader_merge_closures(ShaderData *sd) +ccl_device_inline void shader_merge_closures(ShaderData *sd) { /* merge identical closures, better when we sample a single closure at a time */ for(int i = 0; i < sd->num_closure; i++) { @@ -554,9 +557,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg, } } -ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd, - float randu, float randv, BsdfEval *bsdf_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, + ShaderData *sd, + float randu, float randv, + BsdfEval *bsdf_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { int sampled = 0; @@ -991,8 +998,12 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData * /* Volume Evaluation */ -ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, - PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx) +ccl_device_inline void shader_eval_volume(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + VolumeStack *stack, + int path_flag, + ShaderContext ctx) { /* reset closures once at the start, we will be accumulating the closures * for all volumes in the stack into a single array of closures */ |