Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-10-05 05:53:51 +0400
committerThomas Dinges <blender@dingto.org>2014-10-05 05:53:51 +0400
commitdde740bcd723f3fc149b44d06155396fb425ce4a (patch)
treef0b01994e1cb302506f1c32440b8797f8c391438
parent24ddfe0c63e314f53c2d2555b123e0aa379474ed (diff)
Cycles / CUDA: Change inline rules for BVH intersection functions.
* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba407 * On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h15
1 files changed, 11 insertions, 4 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 8c472028699..c5336e086b7 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -28,6 +28,13 @@
CCL_NAMESPACE_BEGIN
+/* Don't inline intersect functions on GPU, this is faster */
+#ifdef __KERNEL_GPU__
+#define ccl_device_intersect ccl_device_noinline
+#else
+#define ccl_device_intersect ccl_device_inline
+#endif
+
/* BVH intersection function variations */
#define BVH_INSTANCING 1
@@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_volume.h"
#endif
-ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
+ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
uint *lcg_state, float difl, float extmax)
{
#ifdef __OBJECT_MOTION__
@@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const
}
#ifdef __SUBSURFACE__
-ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *
#endif
#ifdef __SHADOW_RECORD_ALL__
-ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *
#endif
#ifdef __VOLUME__
-ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
const Ray *ray,
Intersection *isect)
{