diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/camera/projection.h | 26 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel.cpp | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/noise.h | 268 | ||||
-rw-r--r-- | intern/cycles/kernel/types.h | 1 |
5 files changed, 178 insertions, 136 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 81c5f593974..3fbb346e94f 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -328,6 +328,7 @@ set(SRC_UTIL_HEADERS ../util/math_int2.h ../util/math_int3.h ../util/math_int4.h + ../util/math_int8.h ../util/math_matrix.h ../util/projection.h ../util/rect.h @@ -350,6 +351,8 @@ set(SRC_UTIL_HEADERS ../util/types_int3_impl.h ../util/types_int4.h ../util/types_int4_impl.h + ../util/types_int8.h + ../util/types_int8_impl.h ../util/types_spectrum.h ../util/types_uchar2.h ../util/types_uchar2_impl.h @@ -866,8 +869,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) else() list(APPEND sycl_compiler_flags -fPIC) - # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash - # it triggers at either AoT or JIT stages gets fixed. + # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation + # crash it triggers at either AoT or JIT stages gets fixed. list(APPEND sycl_compiler_flags -fhonor-nans) # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and @@ -881,7 +884,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) OUTPUT ${cycles_kernel_oneapi_lib} COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib" - "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld + # `$ENV{PATH}` is for compiler to find `ld`. + "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags} DEPENDS ${cycles_oneapi_kernel_sources}) endif() diff --git a/intern/cycles/kernel/camera/projection.h b/intern/cycles/kernel/camera/projection.h index c9fe3a6c7fb..1d16aa35abe 100644 --- a/intern/cycles/kernel/camera/projection.h +++ b/intern/cycles/kernel/camera/projection.h @@ -201,11 +201,35 @@ ccl_device float2 direction_to_mirrorball(float3 dir) return make_float2(u, v); } +/* Single face of a equiangular cube map projection as described in + https://blog.google/products/google-ar-vr/bringing-pixels-front-and-center-vr-video/ */ +ccl_device float3 equiangular_cubemap_face_to_direction(float u, float v) +{ + u = (1.0f - u); + + u = tanf(u * M_PI_2_F - M_PI_4_F); + v = tanf(v * M_PI_2_F - M_PI_4_F); + + return make_float3(1.0f, u, v); +} + +ccl_device float2 direction_to_equiangular_cubemap_face(float3 dir) +{ + float u = atan2f(dir.y, dir.x) * 2.0f / M_PI_F + 0.5f; + float v = atan2f(dir.z, dir.x) * 2.0f / M_PI_F + 0.5f; + + u = 1.0f - u; + + return make_float2(u, v); +} + ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v) { switch (cam->panorama_type) { case PANORAMA_EQUIRECTANGULAR: return equirectangular_range_to_direction(u, v, cam->equirectangular_range); + case PANORAMA_EQUIANGULAR_CUBEMAP_FACE: + return equiangular_cubemap_face_to_direction(u, v); case PANORAMA_MIRRORBALL: return mirrorball_to_direction(u, v); case PANORAMA_FISHEYE_EQUIDISTANT: @@ -230,6 +254,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f switch (cam->panorama_type) { case PANORAMA_EQUIRECTANGULAR: return direction_to_equirectangular_range(dir, cam->equirectangular_range); + case PANORAMA_EQUIANGULAR_CUBEMAP_FACE: + return direction_to_equiangular_cubemap_face(dir); case PANORAMA_MIRRORBALL: return direction_to_mirrorball(dir); case PANORAMA_FISHEYE_EQUIDISTANT: diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp index 01087c96dd6..558431961ab 100644 --- a/intern/cycles/kernel/device/cpu/kernel.cpp +++ b/intern/cycles/kernel/device/cpu/kernel.cpp @@ -7,6 +7,7 @@ * one with SSE2 intrinsics. */ #if defined(__x86_64__) || defined(_M_X64) +# define __KERNEL_SSE__ # define __KERNEL_SSE2__ #endif @@ -29,11 +30,15 @@ # define __KERNEL_SSE41__ # endif # ifdef __AVX__ -# define __KERNEL_SSE__ +# ifndef __KERNEL_SSE__ +# define __KERNEL_SSE__ +# endif # define __KERNEL_AVX__ # endif # ifdef __AVX2__ -# define __KERNEL_SSE__ +# ifndef __KERNEL_SSE__ +# define __KERNEL_SSE__ +# endif # define __KERNEL_AVX2__ # endif #endif diff --git a/intern/cycles/kernel/svm/noise.h b/intern/cycles/kernel/svm/noise.h index 31e77d87413..209195a03f1 100644 --- a/intern/cycles/kernel/svm/noise.h +++ b/intern/cycles/kernel/svm/noise.h @@ -39,11 +39,11 @@ ccl_device_noinline_cpu float perlin_1d(float x) } /* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if - * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not + * SSE is supported, that is, if __KERNEL_SSE__ is defined. If it is not * supported, we do a standard implementation, but if it is supported, we * do an implementation using SSE intrinsics. */ -#if !defined(__KERNEL_SSE2__) +#if !defined(__KERNEL_SSE__) /* ** Standard Implementation ** */ @@ -250,18 +250,18 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) /* SSE Bilinear Interpolation: * - * The function takes two ssef inputs: + * The function takes two float4 inputs: * - p : Contains the values at the points (v0, v1, v2, v3). * - f : Contains the values (x, y, _, _). The third and fourth values are unused. * * The interpolation is done in two steps: * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1). * (v2, v3) is generated by moving v2 and v3 to the first and second - * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and + * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and * fourth values are unused. * 2. Interpolate g0 and g1 along the y axis to get the final value. - * g1 is generated by populating an ssef with the second value of g. - * Only the first value is important in the final ssef. + * g1 is generated by populating an float4 with the second value of g. + * Only the first value is important in the final float4. * * v1 v3 g1 * @ + + + + @ @ y @@ -272,27 +272,27 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) * v0 v2 g0 * */ -ccl_device_inline ssef bi_mix(ssef p, ssef f) +ccl_device_inline float4 bi_mix(float4 p, float4 f) { - ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f)); + float4 g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f)); return mix(g, shuffle<1>(g), shuffle<1>(f)); } -ccl_device_inline ssef fade(const ssef &t) +ccl_device_inline float4 fade(const float4 t) { - ssef a = madd(t, 6.0f, -15.0f); - ssef b = madd(t, a, 10.0f); + float4 a = madd(t, make_float4(6.0f), make_float4(-15.0f)); + float4 b = madd(t, a, make_float4(10.0f)); return (t * t) * (t * b); } /* Negate val if the nth bit of h is 1. */ # define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n)))) -ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y) +ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y) { - ssei h = hash & 7; - ssef u = select(h < 4, x, y); - ssef v = 2.0f * select(h < 4, y, x); + int4 h = hash & 7; + float4 u = select(h < 4, x, y); + float4 v = 2.0f * select(h < 4, y, x); return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1); } @@ -310,28 +310,28 @@ ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y) */ ccl_device_noinline_cpu float perlin_2d(float x, float y) { - ssei XY; - ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY); - ssef uv = fade(fxy); + int4 XY; + float4 fxy = floorfrac(make_float4(x, y, 0.0f, 0.0f), &XY); + float4 uv = fade(fxy); - ssei XY1 = XY + 1; - ssei X = shuffle<0, 0, 0, 0>(XY, XY1); - ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1)); + int4 XY1 = XY + make_int4(1); + int4 X = shuffle<0, 0, 0, 0>(XY, XY1); + int4 Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1)); - ssei h = hash_ssei2(X, Y); + int4 h = hash_int4_2(X, Y); - ssef fxy1 = fxy - 1.0f; - ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1); - ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1)); + float4 fxy1 = fxy - make_float4(1.0f); + float4 fx = shuffle<0, 0, 0, 0>(fxy, fxy1); + float4 fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1)); - ssef g = grad(h, fx, fy); + float4 g = grad(h, fx, fy); return extract<0>(bi_mix(g, uv)); } /* SSE Trilinear Interpolation: * - * The function takes three ssef inputs: + * The function takes three float4 inputs: * - p : Contains the values at the points (v0, v1, v2, v3). * - q : Contains the values at the points (v4, v5, v6, v7). * - f : Contains the values (x, y, z, _). The fourth value is unused. @@ -340,11 +340,11 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y) * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3). * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1). * (s2, s3) is generated by moving v2 and v3 to the first and second - * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and + * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and * fourth values are unused. * 3. Interpolate g0 and g1 along the z axis to get the final value. - * g1 is generated by populating an ssef with the second value of g. - * Only the first value is important in the final ssef. + * g1 is generated by populating an float4 with the second value of g. + * Only the first value is important in the final float4. * * v3 v7 * @ + + + + + + @ s3 @ @@ -362,10 +362,10 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y) * @ + + + + + + @ @ * v0 v4 s0 */ -ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f) +ccl_device_inline float4 tri_mix(float4 p, float4 q, float4 f) { - ssef s = mix(p, q, shuffle<0>(f)); - ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f)); + float4 s = mix(p, q, shuffle<0>(f)); + float4 g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f)); return mix(g, shuffle<1>(g), shuffle<2>(f)); } @@ -374,24 +374,24 @@ ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f) * supported, we do an SSE implementation, but if it is supported, * we do an implementation using AVX intrinsics. */ -# if !defined(__KERNEL_AVX__) +# if !defined(__KERNEL_AVX2__) -ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z) +ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y, const float4 z) { - ssei h = hash & 15; - ssef u = select(h < 8, x, y); - ssef vt = select((h == 12) | (h == 14), x, z); - ssef v = select(h < 4, y, vt); + int4 h = hash & 15; + float4 u = select(h < 8, x, y); + float4 vt = select((h == 12) | (h == 14), x, z); + float4 v = select(h < 4, y, vt); return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1); } -ccl_device_inline ssef -grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w) +ccl_device_inline float4 +grad(const int4 hash, const float4 x, const float4 y, const float4 z, const float4 w) { - ssei h = hash & 31; - ssef u = select(h < 24, x, y); - ssef v = select(h < 16, y, z); - ssef s = select(h < 8, z, w); + int4 h = hash & 31; + float4 u = select(h < 24, x, y); + float4 v = select(h < 16, y, z); + float4 s = select(h < 8, z, w); return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2); } @@ -401,7 +401,7 @@ grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef & * between two trilinear interpolations. * */ -ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f) +ccl_device_inline float4 quad_mix(float4 p, float4 q, float4 r, float4 s, float4 f) { return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f)); } @@ -427,23 +427,23 @@ ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f) */ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) { - ssei XYZ; - ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ); - ssef uvw = fade(fxyz); + int4 XYZ; + float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ); + float4 uvw = fade(fxyz); - ssei XYZ1 = XYZ + 1; - ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1); - ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1)); + int4 XYZ1 = XYZ + make_int4(1); + int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1); + int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1)); - ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z); - ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z); + int4 h1 = hash_int4_3(shuffle<0>(XYZ), Y, Z); + int4 h2 = hash_int4_3(shuffle<0>(XYZ1), Y, Z); - ssef fxyz1 = fxyz - 1.0f; - ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1); - ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1)); + float4 fxyz1 = fxyz - make_float4(1.0f); + float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1); + float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1)); - ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz); - ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz); + float4 g1 = grad(h1, shuffle<0>(fxyz), fy, fz); + float4 g2 = grad(h2, shuffle<0>(fxyz1), fy, fz); return extract<0>(tri_mix(g1, g2, uvw)); } @@ -481,29 +481,29 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) */ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) { - ssei XYZW; - ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW); - ssef uvws = fade(fxyzw); + int4 XYZW; + float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW); + float4 uvws = fade(fxyzw); - ssei XYZW1 = XYZW + 1; - ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1); - ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1)); + int4 XYZW1 = XYZW + make_int4(1); + int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1); + int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1)); - ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW)); - ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW)); + int4 h1 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW)); + int4 h2 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW)); - ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1)); - ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1)); + int4 h3 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1)); + int4 h4 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1)); - ssef fxyzw1 = fxyzw - 1.0f; - ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1); - ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1)); + float4 fxyzw1 = fxyzw - make_float4(1.0f); + float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1); + float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1)); - ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw)); - ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw)); + float4 g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw)); + float4 g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw)); - ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1)); - ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1)); + float4 g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1)); + float4 g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1)); return extract<0>(quad_mix(g1, g2, g3, g4, uvws)); } @@ -512,22 +512,22 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) /* AVX Implementation */ -ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z) +ccl_device_inline vfloat8 grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z) { - avxi h = hash & 15; - avxf u = select(h < 8, x, y); - avxf vt = select((h == 12) | (h == 14), x, z); - avxf v = select(h < 4, y, vt); + vint8 h = hash & 15; + vfloat8 u = select(h < 8, x, y); + vfloat8 vt = select((h == 12) | (h == 14), x, z); + vfloat8 v = select(h < 4, y, vt); return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1); } -ccl_device_inline avxf -grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w) +ccl_device_inline vfloat8 +grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z, const vfloat8 w) { - avxi h = hash & 31; - avxf u = select(h < 24, x, y); - avxf v = select(h < 16, y, z); - avxf s = select(h < 8, z, w); + vint8 h = hash & 31; + vfloat8 u = select(h < 24, x, y); + vfloat8 v = select(h < 16, y, z); + vfloat8 s = select(h < 8, z, w); return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2); } @@ -537,13 +537,13 @@ grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf & * 1. Interpolate p and q along the w axis to get s. * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final * value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the - * low and high ssef from s. + * low and high float4 from s. * */ -ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f) +ccl_device_inline float4 quad_mix(vfloat8 p, vfloat8 q, float4 f) { - ssef fv = shuffle<3>(f); - avxf s = mix(p, q, avxf(fv, fv)); + float4 fv = shuffle<3>(f); + vfloat8 s = mix(p, q, make_vfloat8(fv, fv)); return tri_mix(low(s), high(s), f); } @@ -565,25 +565,25 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f) */ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) { - ssei XYZ; - ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ); - ssef uvw = fade(fxyz); + int4 XYZ; + float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ); + float4 uvw = fade(fxyz); - ssei XYZ1 = XYZ + 1; - ssei X = shuffle<0>(XYZ); - ssei X1 = shuffle<0>(XYZ1); - ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1); - ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1)); + int4 XYZ1 = XYZ + make_int4(1); + int4 X = shuffle<0>(XYZ); + int4 X1 = shuffle<0>(XYZ1); + int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1); + int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1)); - avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z)); + vint8 h = hash_int8_3(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z)); - ssef fxyz1 = fxyz - 1.0f; - ssef fx = shuffle<0>(fxyz); - ssef fx1 = shuffle<0>(fxyz1); - ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1); - ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1)); + float4 fxyz1 = fxyz - make_float4(1.0f); + float4 fx = shuffle<0>(fxyz); + float4 fx1 = shuffle<0>(fxyz1); + float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1); + float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1)); - avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz)); + vfloat8 g = grad(h, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz)); return extract<0>(tri_mix(low(g), high(g), uvw)); } @@ -617,31 +617,37 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z) */ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w) { - ssei XYZW; - ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW); - ssef uvws = fade(fxyzw); - - ssei XYZW1 = XYZW + 1; - ssei X = shuffle<0>(XYZW); - ssei X1 = shuffle<0>(XYZW1); - ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1); - ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1)); - ssei W = shuffle<3>(XYZW); - ssei W1 = shuffle<3>(XYZW1); - - avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W)); - avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1)); - - ssef fxyzw1 = fxyzw - 1.0f; - ssef fx = shuffle<0>(fxyzw); - ssef fx1 = shuffle<0>(fxyzw1); - ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1); - ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1)); - ssef fw = shuffle<3>(fxyzw); - ssef fw1 = shuffle<3>(fxyzw1); - - avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw)); - avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1)); + int4 XYZW; + float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW); + float4 uvws = fade(fxyzw); + + int4 XYZW1 = XYZW + make_int4(1); + int4 X = shuffle<0>(XYZW); + int4 X1 = shuffle<0>(XYZW1); + int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1); + int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1)); + int4 W = shuffle<3>(XYZW); + int4 W1 = shuffle<3>(XYZW1); + + vint8 h1 = hash_int8_4(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W, W)); + vint8 h2 = hash_int8_4( + make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W1, W1)); + + float4 fxyzw1 = fxyzw - make_float4(1.0f); + float4 fx = shuffle<0>(fxyzw); + float4 fx1 = shuffle<0>(fxyzw1); + float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1); + float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1)); + float4 fw = shuffle<3>(fxyzw); + float4 fw1 = shuffle<3>(fxyzw1); + + vfloat8 g1 = grad( + h1, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz), make_vfloat8(fw, fw)); + vfloat8 g2 = grad(h2, + make_vfloat8(fx, fx1), + make_vfloat8(fy, fy), + make_vfloat8(fz, fz), + make_vfloat8(fw1, fw1)); return extract<0>(quad_mix(g1, g2, uvws)); } diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 8f7cfd19169..24c5a6a4540 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -490,6 +490,7 @@ enum PanoramaType { PANORAMA_FISHEYE_EQUISOLID = 2, PANORAMA_MIRRORBALL = 3, PANORAMA_FISHEYE_LENS_POLYNOMIAL = 4, + PANORAMA_EQUIANGULAR_CUBEMAP_FACE = 5, PANORAMA_NUM_TYPES, }; |