From a3abb020e37a072eb71fd30de9ab125d1c16623a Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brechtvanlommel@gmail.com>
Date: Sun, 2 Oct 2016 14:48:39 +0200
Subject: Fix Cycles CUDA performance on CUDA 8.0.

Mostly this is making inlining match CUDA 7.5 in a few performance critical
places. The end result is that performance is now better than before, possibly
due to less register spilling or other CUDA 8.0 compiler improvements.

On benchmarks scenes, there are 3% to 35% render time reductions. Stack memory
usage is reduced a little too.

Reviewed By: sergey

Differential Revision: https://developer.blender.org/D2269
---
 intern/cycles/kernel/kernel_accumulate.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'intern/cycles/kernel/kernel_accumulate.h')

diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index dd5752f4c91..623c1dcaaa1 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -54,13 +54,7 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
 	}
 }
 
-/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
-#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500
-ccl_device_inline
-#else
-ccl_device_noinline
-#endif
-void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
+ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
 {
 #ifdef __PASSES__
 	if(eval->use_light_pass) {
-- 
cgit v1.2.3