Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSv. Lockal <lockalsash@gmail.com>2014-04-03 23:34:53 +0400
committerSv. Lockal <lockalsash@gmail.com>2014-04-03 23:35:10 +0400
commitab32a1807dd153723d26a7d53895ed071233dafc (patch)
treef4e27eac2ecab3353ff558f346c1be1ad780c41f /intern/cycles/kernel/svm
parentc019ae5ea3a8eb49010de76c14a359c4729bbcf0 (diff)
Cycles: SSE optimization for Voronoi cells texture
Gives 5-6% speedup for Caterpillar_PatazStudio.blend. Reviewed By: brecht, dingto Differential Revision: https://developer.blender.org/D419
Diffstat (limited to 'intern/cycles/kernel/svm')
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h14
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h93
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h19
3 files changed, 103 insertions, 23 deletions
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 282ad191470..91dda8972f9 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -357,15 +357,13 @@ ccl_device float3 cellnoise_color(float3 p)
return make_float3(r, g, b);
}
#else
-ccl_device float3 cellnoise_color(const float3& p)
+ccl_device __m128 cellnoise_color(const __m128& p)
{
- __m128i v_yxz = quick_floor_sse(_mm_setr_ps(p.y, p.x, p.z, 0.0f));
- __m128i v_xyy = shuffle<1, 0, 0, 3>(v_yxz);
- __m128i v_zzx = shuffle<2, 2, 1, 3>(v_yxz);
- __m128 rgb = bits_to_01_sse(hash_sse(v_xyy, v_yxz, v_zzx));
-
- float3 result = *(float3*)&rgb;
- return result;
+ __m128i ip = quick_floor_sse(p);
+ __m128i ip_yxz = shuffle<1, 0, 2, 3>(ip);
+ __m128i ip_xyy = shuffle<0, 1, 1, 3>(ip);
+ __m128i ip_zzx = shuffle<2, 2, 0, 3>(ip);
+ return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx));
}
#endif
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index 8ced8390b0b..5fd9204cbf6 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Voronoi Distances */
+#if 0
ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, float e)
{
#if 0
@@ -43,8 +44,7 @@ ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d,
}
/* Voronoi / Worley like */
-
-ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
+ccl_device_inline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
{
float da[4];
float3 pa[4];
@@ -119,7 +119,95 @@ ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
return result;
}
+#endif
+
+ccl_device float voronoi_F1_distance(float3 p)
+{
+ /* returns squared distance in da */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+ da = min(d, da);
+ }
+ }
+ }
+#else
+ __m128 vec_p = load_m128(p);
+ __m128i xyzi = quick_floor_sse(vec_p);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
+ __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
+ float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+ da = min(d, da);
+ }
+ }
+ }
+#endif
+
+ return da;
+}
+
+ccl_device float3 voronoi_F1_color(float3 p)
+{
+ /* returns color of the nearest point */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ float3 pa;
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ return cellnoise_color(pa);
+#else
+ __m128 pa, vec_p = load_m128(p);
+ __m128i xyzi = quick_floor_sse(vec_p);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
+ __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
+ float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ __m128 color = cellnoise_color(pa);
+ return (float3 &)color;
+#endif
+}
+#if 0
ccl_device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; }
ccl_device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; }
ccl_device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; }
@@ -139,6 +227,7 @@ ccl_device float voronoi_F3S(float3 p) { return 2.0f*voronoi_F3(p) - 1.0f; }
ccl_device float voronoi_F4S(float3 p) { return 2.0f*voronoi_F4(p) - 1.0f; }
ccl_device float voronoi_F1F2S(float3 p) { return 2.0f*voronoi_F1F2(p) - 1.0f; }
ccl_device float voronoi_CrS(float3 p) { return 2.0f*voronoi_Cr(p) - 1.0f; }
+#endif
/* Noise Bases */
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 7f597dc8bff..083a2f30e06 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -20,23 +20,16 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p)
{
- /* compute distance and point coordinate of 4 nearest neighbours */
- float4 dpa0 = voronoi_Fn(p, 1.0f, 0, -1);
-
- /* output */
- float fac;
- float3 color;
-
if(coloring == NODE_VORONOI_INTENSITY) {
- fac = fabsf(dpa0.w);
- color = make_float3(fac, fac, fac);
+ /* compute squared distance to the nearest neighbour */
+ float fac = voronoi_F1_distance(p);
+ return make_float4(fac, fac, fac, fac);
}
else {
- color = cellnoise_color(float4_to_float3(dpa0));
- fac = average(color);
+ /* compute color of the nearest neighbour */
+ float3 color = voronoi_F1_color(p);
+ return make_float4(color.x, color.y, color.z, average(color));
}
-
- return make_float4(color.x, color.y, color.z, fac);
}
ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)