diff options
author | Thomas Dinges <blender@dingto.org> | 2014-10-05 05:53:51 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-10-05 05:53:51 +0400 |
commit | dde740bcd723f3fc149b44d06155396fb425ce4a (patch) | |
tree | f0b01994e1cb302506f1c32440b8797f8c391438 /intern/cycles/kernel | |
parent | 24ddfe0c63e314f53c2d2555b123e0aa379474ed (diff) |
Cycles / CUDA: Change inline rules for BVH intersection functions.
* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba407
* On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/geom/geom_bvh.h | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index 8c472028699..c5336e086b7 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -28,6 +28,13 @@ CCL_NAMESPACE_BEGIN +/* Don't inline intersect functions on GPU, this is faster */ +#ifdef __KERNEL_GPU__ +#define ccl_device_intersect ccl_device_noinline +#else +#define ccl_device_intersect ccl_device_inline +#endif + /* BVH intersection function variations */ #define BVH_INSTANCING 1 @@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_volume.h" #endif -ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, +ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) { #ifdef __OBJECT_MOTION__ @@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const } #ifdef __SUBSURFACE__ -ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) +ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { @@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray * #endif #ifdef __SHADOW_RECORD_ALL__ -ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { @@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray * #endif #ifdef __VOLUME__ -ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg, +ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, const Ray *ray, Intersection *isect) { |