Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-01-14 12:53:05 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-01-14 12:53:05 +0300
commit1f273cec00feddb1065847e3c8163cdcf8a6d89a (patch)
tree5d24030df25f02e3122340eabcc591767ef05576 /intern/cycles/kernel/kernel_path.h
parent5af103fe008aac0cb20631871bbee16f319835ed (diff)
Cycles: Tweak inline policy for some functions
The goal is to make Experimental kernel closer in performance to the official kernel, avoiding spills and such. There should not be big impact on official kernel, own tests showed few percent performance drop on laptop's GPU. CPU was always the same speed on AVX, AVX2 and SSE4.1 CPUs i've been testing here. This seems to be the last essential step before we can get rid of Experimental kernel and enable SSS officially on GPU without causing some major performance issues. Surely some more tweaks are possibly required, but that we can do for until cows go home anyway.
Diffstat (limited to 'intern/cycles/kernel/kernel_path.h')
-rw-r--r--intern/cycles/kernel/kernel_path.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index fc32201596f..650e3b047f0 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -391,7 +391,12 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
}
-ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ float3 throughput)
{
/* todo: solve correlation */
float bsdf_u, bsdf_v;
@@ -586,7 +591,11 @@ ccl_device void kernel_path_subsurface_setup_indirect(
#endif /* __SUBSURFACE__ */
-ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
+ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
+ RNG *rng,
+ int sample,
+ Ray ray,
+ ccl_global float *buffer)
{
/* initialize */
PathRadiance L;