From a3abb020e37a072eb71fd30de9ab125d1c16623a Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 2 Oct 2016 14:48:39 +0200 Subject: Fix Cycles CUDA performance on CUDA 8.0. Mostly this is making inlining match CUDA 7.5 in a few performance critical places. The end result is that performance is now better than before, possibly due to less register spilling or other CUDA 8.0 compiler improvements. On benchmarks scenes, there are 3% to 35% render time reductions. Stack memory usage is reduced a little too. Reviewed By: sergey Differential Revision: https://developer.blender.org/D2269 --- intern/cycles/kernel/kernel_shadow.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'intern/cycles/kernel/kernel_shadow.h') diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 95b57404a77..2981f6ac566 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -155,7 +155,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, } else { Intersection isect; - blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); + blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); } #ifdef __VOLUME__ @@ -205,7 +205,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, Intersection *isect = &isect_object; #endif - bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); + bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); #ifdef __TRANSPARENT_SHADOWS__ if(blocked && kernel_data.integrator.transparent_shadows) { @@ -221,7 +221,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg, if(bounce >= kernel_data.integrator.transparent_max_bounce) return true; - if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) + if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) { #ifdef __VOLUME__ /* attenuation for last line segment towards light */ -- cgit v1.2.3