diff options
Diffstat (limited to 'intern/cycles/kernel/svm/svm_noise.h')
-rw-r--r-- | intern/cycles/kernel/svm/svm_noise.h | 330 |
1 files changed, 176 insertions, 154 deletions
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 8c425ecf326..322579ccfe3 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -33,280 +33,302 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ -ccl_device_inline ssei quick_floor_sse(const ssef& x) +ccl_device_inline ssei quick_floor_sse(const ssef &x) { - ssei b = truncatei(x); - ssei isneg = cast((x < ssef(0.0f)).m128); - return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1 + ssei b = truncatei(x); + ssei isneg = cast((x < ssef(0.0f)).m128); + return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1 } #endif ccl_device uint hash(uint kx, uint ky, uint kz) { - // define some handy macros -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - // now hash the data! - uint a, b, c, len = 3; - a = b = c = 0xdeadbeef + (len << 2) + 13; - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; - // macros not needed anymore + // define some handy macros +#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) +#define final(a, b, c) \ + { \ + c ^= b; \ + c -= rot(b, 14); \ + a ^= c; \ + a -= rot(c, 11); \ + b ^= a; \ + b -= rot(a, 25); \ + c ^= b; \ + c -= rot(b, 16); \ + a ^= c; \ + a -= rot(c, 4); \ + b ^= a; \ + b -= rot(a, 14); \ + c ^= b; \ + c -= rot(b, 24); \ + } + // now hash the data! + uint a, b, c, len = 3; + a = b = c = 0xdeadbeef + (len << 2) + 13; + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; + // macros not needed anymore #undef rot #undef final } #ifdef __KERNEL_SSE2__ -ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz) +ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz) { -# define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k)))) -# define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0) - - uint len = 3; - ssei magic = ssei(0xdeadbeef + (len << 2) + 13); - ssei a = magic + kx; - ssei b = magic + ky; - ssei c = magic + kz; - - xor_rot(c, b, 14); - xor_rot(a, c, 11); - xor_rot(b, a, 25); - xor_rot(c, b, 16); - xor_rot(a, c, 4); - xor_rot(b, a, 14); - xor_rot(c, b, 24); - - return c; +# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k)))) +# define xor_rot(a, b, c) \ + do { \ + a = a ^ b; \ + a = a - rot(b, c); \ + } while (0) + + uint len = 3; + ssei magic = ssei(0xdeadbeef + (len << 2) + 13); + ssei a = magic + kx; + ssei b = magic + ky; + ssei c = magic + kz; + + xor_rot(c, b, 14); + xor_rot(a, c, 11); + xor_rot(b, a, 25); + xor_rot(c, b, 16); + xor_rot(a, c, 4); + xor_rot(b, a, 14); + xor_rot(c, b, 24); + + return c; # undef rot # undef xor_rot } #endif -#if 0 // unused +#if 0 // unused ccl_device int imod(int a, int b) { - a %= b; - return a < 0 ? a + b : a; + a %= b; + return a < 0 ? a + b : a; } ccl_device uint phash(int kx, int ky, int kz, int3 p) { - return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z)); + return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z)); } #endif #ifndef __KERNEL_SSE2__ -ccl_device float floorfrac(float x, int* i) +ccl_device float floorfrac(float x, int *i) { - *i = quick_floor_to_int(x); - return x - *i; + *i = quick_floor_to_int(x); + return x - *i; } #else -ccl_device_inline ssef floorfrac_sse(const ssef& x, ssei *i) +ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i) { - *i = quick_floor_sse(x); - return x - ssef(*i); + *i = quick_floor_sse(x); + return x - ssef(*i); } #endif #ifndef __KERNEL_SSE2__ ccl_device float fade(float t) { - return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f); + return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f); } #else ccl_device_inline ssef fade_sse(const ssef *t) { - ssef a = madd(*t, ssef(6.0f), ssef(-15.0f)); - ssef b = madd(*t, a, ssef(10.0f)); - return ((*t) * (*t)) * ((*t) * b); + ssef a = madd(*t, ssef(6.0f), ssef(-15.0f)); + ssef b = madd(*t, a, ssef(10.0f)); + return ((*t) * (*t)) * ((*t) * b); } #endif #ifndef __KERNEL_SSE2__ ccl_device float nerp(float t, float a, float b) { - return (1.0f - t) * a + t * b; + return (1.0f - t) * a + t * b; } #else -ccl_device_inline ssef nerp_sse(const ssef& t, const ssef& a, const ssef& b) +ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b) { - ssef x1 = (ssef(1.0f) - t) * a; - return madd(t, b, x1); + ssef x1 = (ssef(1.0f) - t) * a; + return madd(t, b, x1); } #endif #ifndef __KERNEL_SSE2__ ccl_device float grad(int hash, float x, float y, float z) { - // use vectors pointing to the edges of the cube - int h = hash & 15; - float u = h<8 ? x : y; - float vt = ((h == 12) | (h == 14)) ? x : z; - float v = h < 4 ? y : vt; - return ((h&1) ? -u : u) + ((h&2) ? -v : v); + // use vectors pointing to the edges of the cube + int h = hash & 15; + float u = h < 8 ? x : y; + float vt = ((h == 12) | (h == 14)) ? x : z; + float v = h < 4 ? y : vt; + return ((h & 1) ? -u : u) + ((h & 2) ? -v : v); } #else -ccl_device_inline ssef grad_sse(const ssei& hash, const ssef& x, const ssef& y, const ssef& z) +ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z) { - ssei c1 = ssei(1); - ssei c2 = ssei(2); + ssei c1 = ssei(1); + ssei c2 = ssei(2); - ssei h = hash & ssei(15); // h = hash & 15 + ssei h = hash & ssei(15); // h = hash & 15 - sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0 + sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0 - ssef u = select(case_ux, x, y); // u = h<8 ? x : y + ssef u = select(case_ux, x, y); // u = h<8 ? x : y - sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0 + sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0 - sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0 - sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0 + sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0 + sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0 - sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0 + sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0 - ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z + ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z - ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0 - ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0 - ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign) + ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0 + ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0 + ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign) - ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0 - ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0 - ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign) + ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0 + ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0 + ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign) - ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v) - return r; + ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v) + return r; } #endif #ifndef __KERNEL_SSE2__ ccl_device float scale3(float result) { - return 0.9820f * result; + return 0.9820f * result; } #else -ccl_device_inline ssef scale3_sse(const ssef& result) +ccl_device_inline ssef scale3_sse(const ssef &result) { - return ssef(0.9820f) * result; + return ssef(0.9820f) * result; } #endif #ifndef __KERNEL_SSE2__ ccl_device_noinline float perlin(float x, float y, float z) { - int X; float fx = floorfrac(x, &X); - int Y; float fy = floorfrac(y, &Y); - int Z; float fz = floorfrac(z, &Z); - - float u = fade(fx); - float v = fade(fy); - float w = fade(fz); - - float result; - - result = nerp (w, nerp (v, nerp (u, grad (hash (X , Y , Z ), fx , fy , fz ), - grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz )), - nerp (u, grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ), - grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ))), - nerp (v, nerp (u, grad (hash (X , Y , Z+1), fx , fy , fz-1.0f ), - grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f )), - nerp (u, grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f ), - grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f )))); - float r = scale3(result); - - /* can happen for big coordinates, things even out to 0.0 then anyway */ - return (isfinite(r))? r: 0.0f; + int X; + float fx = floorfrac(x, &X); + int Y; + float fy = floorfrac(y, &Y); + int Z; + float fz = floorfrac(z, &Z); + + float u = fade(fx); + float v = fade(fy); + float w = fade(fz); + + float result; + + result = nerp( + w, + nerp(v, + nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)), + nerp(u, + grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz), + grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))), + nerp(v, + nerp(u, + grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f), + grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)), + nerp(u, + grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f), + grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f)))); + float r = scale3(result); + + /* can happen for big coordinates, things even out to 0.0 then anyway */ + return (isfinite(r)) ? r : 0.0f; } #else ccl_device_noinline float perlin(float x, float y, float z) { - ssef xyz = ssef(x, y, z, 0.0f); - ssei XYZ; + ssef xyz = ssef(x, y, z, 0.0f); + ssei XYZ; - ssef fxyz = floorfrac_sse(xyz, &XYZ); + ssef fxyz = floorfrac_sse(xyz, &XYZ); - ssef uvw = fade_sse(&fxyz); - ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw); + ssef uvw = fade_sse(&fxyz); + ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw); - ssei XYZ_ofc = XYZ + ssei(1); - ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1 - ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1 + ssei XYZ_ofc = XYZ + ssei(1); + ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1 + ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1 - ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011 - ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111 + ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011 + ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111 - ssef fxyz_ofc = fxyz - ssef(1.0f); - ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc); - ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc)); + ssef fxyz_ofc = fxyz - ssef(1.0f); + ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc); + ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc)); - ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz); - ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz); - ssef n1 = nerp_sse(u, g1, g2); + ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz); + ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz); + ssef n1 = nerp_sse(u, g1, g2); - ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector - ssef n2 = nerp_sse(v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _] + ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector + ssef n2 = nerp_sse( + v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _] - ssef n2_second = shuffle<1>(n2); // extract b to a separate vector - ssef result = nerp_sse(w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _] + ssef n2_second = shuffle<1>(n2); // extract b to a separate vector + ssef result = nerp_sse( + w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _] - ssef r = scale3_sse(result); + ssef r = scale3_sse(result); - ssef infmask = cast(ssei(0x7f800000)); - ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0 - ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r - return extract<0>(rfinite); + ssef infmask = cast(ssei(0x7f800000)); + ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0 + ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r + return extract<0>(rfinite); } #endif /* perlin noise in range 0..1 */ ccl_device float noise(float3 p) { - float r = perlin(p.x, p.y, p.z); - return 0.5f*r + 0.5f; + float r = perlin(p.x, p.y, p.z); + return 0.5f * r + 0.5f; } /* perlin noise in range -1..1 */ ccl_device float snoise(float3 p) { - return perlin(p.x, p.y, p.z); + return perlin(p.x, p.y, p.z); } /* cell noise */ ccl_device float cellnoise(float3 p) { - int3 ip = quick_floor_to_int3(p); - return bits_to_01(hash(ip.x, ip.y, ip.z)); + int3 ip = quick_floor_to_int3(p); + return bits_to_01(hash(ip.x, ip.y, ip.z)); } ccl_device float3 cellnoise3(float3 p) { - int3 ip = quick_floor_to_int3(p); + int3 ip = quick_floor_to_int3(p); #ifndef __KERNEL_SSE__ - float r = bits_to_01(hash(ip.x, ip.y, ip.z)); - float g = bits_to_01(hash(ip.y, ip.x, ip.z)); - float b = bits_to_01(hash(ip.y, ip.z, ip.x)); - return make_float3(r, g, b); + float r = bits_to_01(hash(ip.x, ip.y, ip.z)); + float g = bits_to_01(hash(ip.y, ip.x, ip.z)); + float b = bits_to_01(hash(ip.y, ip.z, ip.x)); + return make_float3(r, g, b); #else - ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); - ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128)); - ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128)); - ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx); - return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF)); + ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); + ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128)); + ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128)); + ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx); + return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF)); #endif } |