From 6353ecb996898b4ce2fe8065130ed1f5ea3b6989 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 1 Aug 2016 15:40:46 +0200 Subject: Cycles: Tweaks to support CUDA 8 toolkit All the changes are mainly giving explicit tips on inlining functions, so they match how inlining worked with previous toolkit. This make kernel compiled by CUDA 8 render in average with same speed as previous kernels. Some scenes are somewhat faster, some of them are somewhat slower. But slowdown is within 1% so far. On a positive side it allows us to enable newer generation cards on buildbots (so GTX 10x0 will be officially supported soon). --- intern/cycles/kernel/svm/svm_ramp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'intern/cycles/kernel/svm/svm_ramp.h') diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index f959d90f309..368740f64c7 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg, + int offset, + float f, + bool interpolate, + bool extrapolate, + int table_size) { if((f < 0.0f || f > 1.0f) && extrapolate) { float4 t0, dy; -- cgit v1.2.3