Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-08-01 16:40:46 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-08-01 16:54:29 +0300
commit6353ecb996898b4ce2fe8065130ed1f5ea3b6989 (patch)
treeb6d620152e4ff7920465d8396fe443dc9b3ffc56 /intern/cycles/kernel/kernel_shader.h
parent7065022f7aa23ba13d2999e1e40162a8f480af0e (diff)
Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions, so they match how inlining worked with previous toolkit. This make kernel compiled by CUDA 8 render in average with same speed as previous kernels. Some scenes are somewhat faster, some of them are somewhat slower. But slowdown is within 1% so far. On a positive side it allows us to enable newer generation cards on buildbots (so GTX 10x0 will be officially supported soon).
Diffstat (limited to 'intern/cycles/kernel/kernel_shader.h')
-rw-r--r--intern/cycles/kernel/kernel_shader.h43
1 files changed, 27 insertions, 16 deletions
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b7641c37d93..bb3fe933b2c 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -149,8 +149,11 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* ShaderData setup from BSSRDF scatter */
#ifdef __SUBSURFACE__
-ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd,
- const Intersection *isect, const Ray *ray)
+ccl_device void shader_setup_from_subsurface(
+ KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
bool backfacing = sd->flag & SD_BACKFACING;
@@ -226,14 +229,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
/* ShaderData setup from position sampled on mesh */
-ccl_device void shader_setup_from_sample(KernelGlobals *kg,
- ShaderData *sd,
- const float3 P,
- const float3 Ng,
- const float3 I,
- int shader, int object, int prim,
- float u, float v, float t,
- float time)
+ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
+ ShaderData *sd,
+ const float3 P,
+ const float3 Ng,
+ const float3 I,
+ int shader, int object, int prim,
+ float u, float v, float t,
+ float time)
{
/* vectors */
ccl_fetch(sd, P) = P;
@@ -445,7 +448,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
/* Merging */
#if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
-ccl_device void shader_merge_closures(ShaderData *sd)
+ccl_device_inline void shader_merge_closures(ShaderData *sd)
{
/* merge identical closures, better when we sample a single closure at a time */
for(int i = 0; i < sd->num_closure; i++) {
@@ -554,9 +557,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg,
}
}
-ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd,
- float randu, float randv, BsdfEval *bsdf_eval,
- float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
+ ShaderData *sd,
+ float randu, float randv,
+ BsdfEval *bsdf_eval,
+ float3 *omega_in,
+ differential3 *domega_in,
+ float *pdf)
{
int sampled = 0;
@@ -991,8 +998,12 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
/* Volume Evaluation */
-ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
- PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx)
+ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ VolumeStack *stack,
+ int path_flag,
+ ShaderContext ctx)
{
/* reset closures once at the start, we will be accumulating the closures
* for all volumes in the stack into a single array of closures */