Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-03-13 10:14:43 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-03-13 10:38:14 +0300
commit61eab743f1377fdfcf44f2e4928290a3fc4ccfea (patch)
tree6dff417678cc61e7096c03f2a6c05dd5e33d42c5 /intern
parentaa4cb95a5c8569704f166cfd6d8f65606502ea40 (diff)
Cycles: Optimization for CMJ in CUDA kernels
Two things: - Use intrinsics for clz/ctz (ctz is implemented via ffs()). - Use faster sqrt() function which precision is enough for integer values.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/kernel_jitter.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 6aa29311ee6..6953f005ea9 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -47,6 +47,8 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b)
# else
return a >> __builtin_ctz(b);
# endif
+#elif defined(__KERNEL_CUDA__)
+ return a >> (__ffs(b) - 1);
#else
return a/b;
#endif
@@ -63,6 +65,8 @@ ccl_device_inline uint cmj_w_mask(uint w)
# else
return ((1 << (32 - __builtin_clz(w))) - 1);
# endif
+#elif defined(__KERNEL_CUDA__)
+ return ((1 << (32 - __clz(w))) - 1);
#else
w |= w >> 1;
w |= w >> 2;
@@ -167,7 +171,11 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
{
kernel_assert(s < N);
+#if defined(__KERNEL_CUDA__)
+ int m = float_to_int(__fsqrt_ru(N));
+#else
int m = float_to_int(sqrtf(N));
+#endif
int n = (N + m - 1)/m;
float invN = 1.0f/N;
float invm = 1.0f/m;