diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-04-27 22:13:03 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-04-27 22:38:32 +0300 |
commit | ae7d84dbc1936ef7ddd00f9c22d074389f97f04f (patch) | |
tree | 7103596bc78785ed769e90c1570d654cc14a839d /intern/cycles/kernel/kernel_film.h | |
parent | 278ff15c7f3c4a91ba049b7a13ecf8a4183ebf67 (diff) |
Cycles: Use native saturate function for CUDA
This more a workaround for CUDA optimizer which can't optimize clamp(x, 0, 1)
into a single instruction and uses 4 instructions instead.
Original patch by @lockal with own modification:
Don't make changes outside of the kernel. They don't make any difference
anyway and term saturate() has a bit different meaning outside of kernel.
This gives around 2% of speedup in Barcelona file, but in more complex shader
setups with lots of math nodes with clamping speedup could be much nicer.
Subscribers: dingto
Projects: #cycles
Differential Revision: https://developer.blender.org/D1224
Diffstat (limited to 'intern/cycles/kernel/kernel_film.h')
-rw-r--r-- | intern/cycles/kernel/kernel_film.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 4668b40b86d..f9e9b413898 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -27,7 +27,7 @@ ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale) result.z = color_scene_linear_to_srgb(result.z*exposure); /* clamp since alpha might be > 1.0 due to russian roulette */ - result.w = clamp(result.w, 0.0f, 1.0f); + result.w = saturate(result.w); return result; } @@ -37,10 +37,10 @@ ccl_device uchar4 film_float_to_byte(float4 color) uchar4 result; /* simple float to byte conversion */ - result.x = (uchar)clamp(color.x*255.0f, 0.0f, 255.0f); - result.y = (uchar)clamp(color.y*255.0f, 0.0f, 255.0f); - result.z = (uchar)clamp(color.z*255.0f, 0.0f, 255.0f); - result.w = (uchar)clamp(color.w*255.0f, 0.0f, 255.0f); + result.x = (uchar)(saturate(color.x)*255.0f); + result.y = (uchar)(saturate(color.y)*255.0f); + result.z = (uchar)(saturate(color.z)*255.0f); + result.w = (uchar)(saturate(color.w)*255.0f); return result; } |