diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2016-10-02 15:48:39 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2016-10-03 23:15:25 +0300 |
commit | a3abb020e37a072eb71fd30de9ab125d1c16623a (patch) | |
tree | b525be7f8a0792eedecb2b95802ede88dc3f330e /intern/cycles/kernel/geom/geom_curve.h | |
parent | 49ad4215baf16d850d0e367f003ab688e4a3d08e (diff) |
Fix Cycles CUDA performance on CUDA 8.0.
Mostly this is making inlining match CUDA 7.5 in a few performance critical
places. The end result is that performance is now better than before, possibly
due to less register spilling or other CUDA 8.0 compiler improvements.
On benchmarks scenes, there are 3% to 35% render time reductions. Stack memory
usage is reduced a little too.
Reviewed By: sergey
Differential Revision: https://developer.blender.org/D2269
Diffstat (limited to 'intern/cycles/kernel/geom/geom_curve.h')
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index aa9cd295452..84aaaab7453 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -222,10 +222,10 @@ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) #ifdef __KERNEL_SSE2__ /* Pass P and dir by reference to aligned vector */ -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, const float3 &P, const float3 &dir, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) #else -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax) #endif { @@ -621,7 +621,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect return hit; } -ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, float3 P, float3 direction, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) { /* define few macros to minimize code duplication for SSE */ |