Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-08-02 16:04:34 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-08-02 16:13:34 +0300
commit500e0e9a3d5a9e7982d4df31a1477b8732c76e71 (patch)
tree92a4f5f1559143efdfeb2b087549c587477bba25
parentb416168d85a7cf5a27370f5c933fd1ad41ee333d (diff)
Cycles: Some more inline policy tweaks for CUDA 8
Makes it so toolkit does exactly the same decision about what to inline, but unfortunately it has really barely visible difference on GTX-980.
-rw-r--r--intern/cycles/kernel/closure/bsdf.h11
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h14
-rw-r--r--intern/cycles/kernel/kernel_shader.h25
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h20
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h10
5 files changed, 55 insertions, 25 deletions
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 55bdf3ecbb4..86e1a7f317f 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
return label;
}
-ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+float3 bsdf_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ const ShaderClosure *sc,
+ const float3 omega_in,
+ float *pdf)
{
float3 eval;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 56516967d8f..64f1468eacf 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
- ShaderData *sd,
- ShaderData *emission_sd,
- PathRadiance *L,
- PathState *state,
- RNG *rng,
- float3 throughput)
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ float3 throughput)
{
int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples;
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index bb3fe933b2c..98d321c9c16 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* ShaderData setup from BSSRDF scatter */
#ifdef __SUBSURFACE__
-ccl_device void shader_setup_from_subsurface(
+# ifndef __KERNEL_CUDS__
+ccl_device
+# else
+ccl_device_inline
+# endif
+void shader_setup_from_subsurface(
KernelGlobals *kg,
ShaderData *sd,
const Intersection *isect,
@@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
}
#endif
-ccl_device void shader_bsdf_eval(KernelGlobals *kg,
- ShaderData *sd,
- const float3 omega_in,
- BsdfEval *eval,
- float light_pdf,
- bool use_mis)
+
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+void shader_bsdf_eval(KernelGlobals *kg,
+ ShaderData *sd,
+ const float3 omega_in,
+ BsdfEval *eval,
+ float light_pdf,
+ bool use_mis)
{
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index ff92920c610..bd6013e9205 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
-ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeType type, mesh_type;
AttributeElement elem;
@@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
}
}
-ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
- ShaderData *sd,
- float *stack,
- uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dy(KernelGlobals *kg,
+ ShaderData *sd,
+ float *stack,
+ uint4 node)
{
NodeAttributeType type, mesh_type;
AttributeElement elem;
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 30ccd523add..6eed9bc1a99 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
/* Wireframe Node */
-ccl_device float wireframe(KernelGlobals *kg,
- ShaderData *sd,
- float size,
- int pixel_size,
- float3 *P)
+ccl_device_inline float wireframe(KernelGlobals *kg,
+ ShaderData *sd,
+ float size,
+ int pixel_size,
+ float3 *P)
{
#ifdef __HAIR__
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)