Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2018-07-14 16:38:58 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2018-07-15 00:58:20 +0300
commit4697604331482c394c8a148c54a8e942120b634f (patch)
tree126cece12b7282389460a224706019fca327d349
parentceba8e28b7d1d00e9201fc626a8cd936893ea9d7 (diff)
Cleanup: use float3 SSE instead of ssef for voronoi texture.
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h57
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h57
-rw-r--r--intern/cycles/util/util_math.h10
-rw-r--r--intern/cycles/util/util_math_float3.h12
-rw-r--r--intern/cycles/util/util_math_int3.h18
5 files changed, 67 insertions, 87 deletions
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 38074f0faff..8c425ecf326 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -32,12 +32,7 @@
CCL_NAMESPACE_BEGIN
-#ifndef __KERNEL_SSE2__
-ccl_device int quick_floor(float x)
-{
- return float_to_int(x) - ((x < 0) ? 1 : 0);
-}
-#else
+#ifdef __KERNEL_SSE2__
ccl_device_inline ssei quick_floor_sse(const ssef& x)
{
ssei b = truncatei(x);
@@ -46,18 +41,6 @@ ccl_device_inline ssei quick_floor_sse(const ssef& x)
}
#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float bits_to_01(uint bits)
-{
- return bits * (1.0f/(float)0xFFFFFFFF);
-}
-#else
-ccl_device_inline ssef bits_to_01_sse(const ssei& bits)
-{
- return uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF);
-}
-#endif
-
ccl_device uint hash(uint kx, uint ky, uint kz)
{
// define some handy macros
@@ -129,7 +112,7 @@ ccl_device uint phash(int kx, int ky, int kz, int3 p)
#ifndef __KERNEL_SSE2__
ccl_device float floorfrac(float x, int* i)
{
- *i = quick_floor(x);
+ *i = quick_floor_to_int(x);
return x - *i;
}
#else
@@ -304,33 +287,27 @@ ccl_device float snoise(float3 p)
}
/* cell noise */
-#ifndef __KERNEL_SSE2__
-ccl_device_noinline float cellnoise(float3 p)
+ccl_device float cellnoise(float3 p)
{
- uint ix = quick_floor(p.x);
- uint iy = quick_floor(p.y);
- uint iz = quick_floor(p.z);
-
- return bits_to_01(hash(ix, iy, iz));
+ int3 ip = quick_floor_to_int3(p);
+ return bits_to_01(hash(ip.x, ip.y, ip.z));
}
-ccl_device float3 cellnoise_color(float3 p)
+ccl_device float3 cellnoise3(float3 p)
{
- float r = cellnoise(p);
- float g = cellnoise(make_float3(p.y, p.x, p.z));
- float b = cellnoise(make_float3(p.y, p.z, p.x));
-
+ int3 ip = quick_floor_to_int3(p);
+#ifndef __KERNEL_SSE__
+ float r = bits_to_01(hash(ip.x, ip.y, ip.z));
+ float g = bits_to_01(hash(ip.y, ip.x, ip.z));
+ float b = bits_to_01(hash(ip.y, ip.z, ip.x));
return make_float3(r, g, b);
-}
#else
-ccl_device ssef cellnoise_color(const ssef& p)
-{
- ssei ip = quick_floor_sse(p);
- ssei ip_yxz = shuffle<1, 0, 2, 3>(ip);
- ssei ip_xyy = shuffle<0, 1, 1, 3>(ip);
- ssei ip_zzx = shuffle<2, 2, 0, 3>(ip);
- return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx));
-}
+ ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
+ ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
+ ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
+ ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
+ return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF));
#endif
+}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 9bfb182544b..5d0b8a2a406 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -23,34 +23,19 @@ ccl_device float voronoi_F1_distance(float3 p)
/* returns squared distance in da */
float da = 1e10f;
-#ifndef __KERNEL_SSE2__
- int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+ int3 xyzi = quick_floor_to_int3(p);
for(int xx = -1; xx <= 1; xx++) {
for(int yy = -1; yy <= 1; yy++) {
for(int zz = -1; zz <= 1; zz++) {
- float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
- float3 vp = ip + cellnoise_color(ip);
+ int3 ip = xyzi + make_int3(xx, yy, zz);
+ float3 fp = make_float3(ip.x, ip.y, ip.z);
+ float3 vp = fp + cellnoise3(fp);
float d = len_squared(p - vp);
da = min(d, da);
}
}
}
-#else
- ssef vec_p = load4f(p);
- ssei xyzi = quick_floor_sse(vec_p);
-
- for(int xx = -1; xx <= 1; xx++) {
- for(int yy = -1; yy <= 1; yy++) {
- for(int zz = -1; zz <= 1; zz++) {
- ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
- ssef vp = ip + cellnoise_color(ip);
- float d = len_squared<1, 1, 1, 0>(vec_p - vp);
- da = min(d, da);
- }
- }
- }
-#endif
return da;
}
@@ -59,37 +44,17 @@ ccl_device float3 voronoi_F1_color(float3 p)
{
/* returns color of the nearest point */
float da = 1e10f;
-
-#ifndef __KERNEL_SSE2__
float3 pa;
- int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
- for(int xx = -1; xx <= 1; xx++) {
- for(int yy = -1; yy <= 1; yy++) {
- for(int zz = -1; zz <= 1; zz++) {
- float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
- float3 vp = ip + cellnoise_color(ip);
- float d = len_squared(p - vp);
-
- if(d < da) {
- da = d;
- pa = vp;
- }
- }
- }
- }
-
- return cellnoise_color(pa);
-#else
- ssef pa, vec_p = load4f(p);
- ssei xyzi = quick_floor_sse(vec_p);
+ int3 xyzi = quick_floor_to_int3(p);
for(int xx = -1; xx <= 1; xx++) {
for(int yy = -1; yy <= 1; yy++) {
for(int zz = -1; zz <= 1; zz++) {
- ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
- ssef vp = ip + cellnoise_color(ip);
- float d = len_squared<1, 1, 1, 0>(vec_p - vp);
+ int3 ip = xyzi + make_int3(xx, yy, zz);
+ float3 fp = make_float3(ip.x, ip.y, ip.z);
+ float3 vp = fp + cellnoise3(fp);
+ float d = len_squared(p - vp);
if(d < da) {
da = d;
@@ -99,9 +64,7 @@ ccl_device float3 voronoi_F1_color(float3 p)
}
}
- ssef color = cellnoise_color(pa);
- return (float3 &)color;
-#endif
+ return cellnoise3(pa);
}
ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p)
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index fd3199f209f..85cbd18b7ba 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -266,6 +266,11 @@ ccl_device_inline int floor_to_int(float f)
return float_to_int(floorf(f));
}
+ccl_device_inline int quick_floor_to_int(float x)
+{
+ return float_to_int(x) - ((x < 0) ? 1 : 0);
+}
+
ccl_device_inline int ceil_to_int(float f)
{
return float_to_int(ceilf(f));
@@ -550,6 +555,11 @@ ccl_device_inline float xor_signmask(float x, int y)
return __int_as_float(__float_as_int(x) ^ y);
}
+ccl_device float bits_to_01(uint bits)
+{
+ return bits * (1.0f/(float)0xFFFFFFFF);
+}
+
/* projections */
ccl_device_inline float2 map_to_tube(const float3 co)
{
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index f5149fe13ed..e42ded76c75 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -377,6 +377,18 @@ ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
#endif
}
+ccl_device_inline int3 quick_floor_to_int3(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+ int3 b = int3(_mm_cvttps_epi32(a.m128));
+ int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f))));
+ /* Unsaturated add 0xffffffff is the same as subtract -1. */
+ return b + isneg;
+#else
+ return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z));
+#endif
+}
+
ccl_device_inline bool isfinite3_safe(float3 v)
{
return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index 6eef8517665..81b10f31f4a 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -91,6 +91,24 @@ ccl_device_inline bool operator<(const int3 &a, const int3 &b)
{
return a.x < b.x && a.y < b.y && a.z < b.z;
}
+
+ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+ return int3(_mm_add_epi32(a.m128, b.m128));
+#else
+ return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
+}
+
+ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+ return int3(_mm_sub_epi32(a.m128, b.m128));
+#else
+ return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
+}
#endif /* !__KERNEL_OPENCL__ */
CCL_NAMESPACE_END