diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2018-07-14 16:38:58 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2018-07-15 00:58:20 +0300 |
commit | 4697604331482c394c8a148c54a8e942120b634f (patch) | |
tree | 126cece12b7282389460a224706019fca327d349 /intern | |
parent | ceba8e28b7d1d00e9201fc626a8cd936893ea9d7 (diff) |
Cleanup: use float3 SSE instead of ssef for voronoi texture.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/svm/svm_noise.h | 57 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_voronoi.h | 57 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 10 | ||||
-rw-r--r-- | intern/cycles/util/util_math_float3.h | 12 | ||||
-rw-r--r-- | intern/cycles/util/util_math_int3.h | 18 |
5 files changed, 67 insertions, 87 deletions
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 38074f0faff..8c425ecf326 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -32,12 +32,7 @@ CCL_NAMESPACE_BEGIN -#ifndef __KERNEL_SSE2__ -ccl_device int quick_floor(float x) -{ - return float_to_int(x) - ((x < 0) ? 1 : 0); -} -#else +#ifdef __KERNEL_SSE2__ ccl_device_inline ssei quick_floor_sse(const ssef& x) { ssei b = truncatei(x); @@ -46,18 +41,6 @@ ccl_device_inline ssei quick_floor_sse(const ssef& x) } #endif -#ifndef __KERNEL_SSE2__ -ccl_device float bits_to_01(uint bits) -{ - return bits * (1.0f/(float)0xFFFFFFFF); -} -#else -ccl_device_inline ssef bits_to_01_sse(const ssei& bits) -{ - return uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF); -} -#endif - ccl_device uint hash(uint kx, uint ky, uint kz) { // define some handy macros @@ -129,7 +112,7 @@ ccl_device uint phash(int kx, int ky, int kz, int3 p) #ifndef __KERNEL_SSE2__ ccl_device float floorfrac(float x, int* i) { - *i = quick_floor(x); + *i = quick_floor_to_int(x); return x - *i; } #else @@ -304,33 +287,27 @@ ccl_device float snoise(float3 p) } /* cell noise */ -#ifndef __KERNEL_SSE2__ -ccl_device_noinline float cellnoise(float3 p) +ccl_device float cellnoise(float3 p) { - uint ix = quick_floor(p.x); - uint iy = quick_floor(p.y); - uint iz = quick_floor(p.z); - - return bits_to_01(hash(ix, iy, iz)); + int3 ip = quick_floor_to_int3(p); + return bits_to_01(hash(ip.x, ip.y, ip.z)); } -ccl_device float3 cellnoise_color(float3 p) +ccl_device float3 cellnoise3(float3 p) { - float r = cellnoise(p); - float g = cellnoise(make_float3(p.y, p.x, p.z)); - float b = cellnoise(make_float3(p.y, p.z, p.x)); - + int3 ip = quick_floor_to_int3(p); +#ifndef __KERNEL_SSE__ + float r = bits_to_01(hash(ip.x, ip.y, ip.z)); + float g = bits_to_01(hash(ip.y, ip.x, ip.z)); + float b = bits_to_01(hash(ip.y, ip.z, ip.x)); return make_float3(r, g, b); -} #else -ccl_device ssef cellnoise_color(const ssef& p) -{ - ssei ip = quick_floor_sse(p); - ssei ip_yxz = shuffle<1, 0, 2, 3>(ip); - ssei ip_xyy = shuffle<0, 1, 1, 3>(ip); - ssei ip_zzx = shuffle<2, 2, 0, 3>(ip); - return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx)); -} + ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); + ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128)); + ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128)); + ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx); + return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF)); #endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 9bfb182544b..5d0b8a2a406 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -23,34 +23,19 @@ ccl_device float voronoi_F1_distance(float3 p) /* returns squared distance in da */ float da = 1e10f; -#ifndef __KERNEL_SSE2__ - int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); + int3 xyzi = quick_floor_to_int3(p); for(int xx = -1; xx <= 1; xx++) { for(int yy = -1; yy <= 1; yy++) { for(int zz = -1; zz <= 1; zz++) { - float3 ip = make_float3(ix + xx, iy + yy, iz + zz); - float3 vp = ip + cellnoise_color(ip); + int3 ip = xyzi + make_int3(xx, yy, zz); + float3 fp = make_float3(ip.x, ip.y, ip.z); + float3 vp = fp + cellnoise3(fp); float d = len_squared(p - vp); da = min(d, da); } } } -#else - ssef vec_p = load4f(p); - ssei xyzi = quick_floor_sse(vec_p); - - for(int xx = -1; xx <= 1; xx++) { - for(int yy = -1; yy <= 1; yy++) { - for(int zz = -1; zz <= 1; zz++) { - ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); - ssef vp = ip + cellnoise_color(ip); - float d = len_squared<1, 1, 1, 0>(vec_p - vp); - da = min(d, da); - } - } - } -#endif return da; } @@ -59,37 +44,17 @@ ccl_device float3 voronoi_F1_color(float3 p) { /* returns color of the nearest point */ float da = 1e10f; - -#ifndef __KERNEL_SSE2__ float3 pa; - int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); - for(int xx = -1; xx <= 1; xx++) { - for(int yy = -1; yy <= 1; yy++) { - for(int zz = -1; zz <= 1; zz++) { - float3 ip = make_float3(ix + xx, iy + yy, iz + zz); - float3 vp = ip + cellnoise_color(ip); - float d = len_squared(p - vp); - - if(d < da) { - da = d; - pa = vp; - } - } - } - } - - return cellnoise_color(pa); -#else - ssef pa, vec_p = load4f(p); - ssei xyzi = quick_floor_sse(vec_p); + int3 xyzi = quick_floor_to_int3(p); for(int xx = -1; xx <= 1; xx++) { for(int yy = -1; yy <= 1; yy++) { for(int zz = -1; zz <= 1; zz++) { - ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); - ssef vp = ip + cellnoise_color(ip); - float d = len_squared<1, 1, 1, 0>(vec_p - vp); + int3 ip = xyzi + make_int3(xx, yy, zz); + float3 fp = make_float3(ip.x, ip.y, ip.z); + float3 vp = fp + cellnoise3(fp); + float d = len_squared(p - vp); if(d < da) { da = d; @@ -99,9 +64,7 @@ ccl_device float3 voronoi_F1_color(float3 p) } } - ssef color = cellnoise_color(pa); - return (float3 &)color; -#endif + return cellnoise3(pa); } ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p) diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index fd3199f209f..85cbd18b7ba 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -266,6 +266,11 @@ ccl_device_inline int floor_to_int(float f) return float_to_int(floorf(f)); } +ccl_device_inline int quick_floor_to_int(float x) +{ + return float_to_int(x) - ((x < 0) ? 1 : 0); +} + ccl_device_inline int ceil_to_int(float f) { return float_to_int(ceilf(f)); @@ -550,6 +555,11 @@ ccl_device_inline float xor_signmask(float x, int y) return __int_as_float(__float_as_int(x) ^ y); } +ccl_device float bits_to_01(uint bits) +{ + return bits * (1.0f/(float)0xFFFFFFFF); +} + /* projections */ ccl_device_inline float2 map_to_tube(const float3 co) { diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h index f5149fe13ed..e42ded76c75 100644 --- a/intern/cycles/util/util_math_float3.h +++ b/intern/cycles/util/util_math_float3.h @@ -377,6 +377,18 @@ ccl_device_inline bool isequal_float3(const float3 a, const float3 b) #endif } +ccl_device_inline int3 quick_floor_to_int3(const float3 a) +{ +#ifdef __KERNEL_SSE__ + int3 b = int3(_mm_cvttps_epi32(a.m128)); + int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f)))); + /* Unsaturated add 0xffffffff is the same as subtract -1. */ + return b + isneg; +#else + return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z)); +#endif +} + ccl_device_inline bool isfinite3_safe(float3 v) { return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z); diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h index 6eef8517665..81b10f31f4a 100644 --- a/intern/cycles/util/util_math_int3.h +++ b/intern/cycles/util/util_math_int3.h @@ -91,6 +91,24 @@ ccl_device_inline bool operator<(const int3 &a, const int3 &b) { return a.x < b.x && a.y < b.y && a.z < b.z; } + +ccl_device_inline int3 operator+(const int3 &a, const int3 &b) +{ +#ifdef __KERNEL_SSE__ + return int3(_mm_add_epi32(a.m128, b.m128)); +#else + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +#endif +} + +ccl_device_inline int3 operator-(const int3 &a, const int3 &b) +{ +#ifdef __KERNEL_SSE__ + return int3(_mm_sub_epi32(a.m128, b.m128)); +#else + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +#endif +} #endif /* !__KERNEL_OPENCL__ */ CCL_NAMESPACE_END |