diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:40:46 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-08-01 16:54:29 +0300 |
commit | 6353ecb996898b4ce2fe8065130ed1f5ea3b6989 (patch) | |
tree | b6d620152e4ff7920465d8396fe443dc9b3ffc56 /intern/cycles/kernel/geom | |
parent | 7065022f7aa23ba13d2999e1e40162a8f480af0e (diff) |
Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions,
so they match how inlining worked with previous toolkit.
This make kernel compiled by CUDA 8 render in average with same speed
as previous kernels. Some scenes are somewhat faster, some of them are
somewhat slower. But slowdown is within 1% so far.
On a positive side it allows us to enable newer generation cards on
buildbots (so GTX 10x0 will be officially supported soon).
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r-- | intern/cycles/kernel/geom/geom_primitive.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_volume.h | 4 |
2 files changed, 16 insertions, 5 deletions
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 44734d1b70d..b16f0c9a99b 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -23,7 +23,11 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ -ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + AttributeElement elem, + int offset, + float *dx, float *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) @@ -48,7 +52,12 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData * } } -ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + AttributeElement elem, + int offset, + float3 *dx, + float3 *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) @@ -75,7 +84,7 @@ ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData /* Default UV coordinate */ -ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { AttributeElement elem_uv; int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv); @@ -144,7 +153,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) /* Motion vector for motion pass */ -ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { /* center position */ float3 center; diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 2044aafc877..7c8182bc430 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -44,7 +44,9 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z) } #endif /* __KERNEL_GPU__ */ -ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) +ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, + const ShaderData *sd, + float3 P) { /* todo: optimize this so it's just a single matrix multiplication when * possible (not motion blur), or perhaps even just translation + scale */ |