Merge branch 'master' into soc-2020-io-performancesoc-2020-io-performance

author: Howard Trickey <howard.trickey@gmail.com> 2021-10-30 22:37:05 +0300
committer: Howard Trickey <howard.trickey@gmail.com> 2021-10-30 22:37:05 +0300
commit: e9bbfd0c8c7a508d220bf355722ff03f91e93183 (patch)
tree: 1230f26bc82f24547aeccbaa7fcd6d3db2655fd3 /intern/cycles/kernel/svm/svm_noise.h
parent: 1aa953bd1913c81b22c80a00edbf4ad88a32c52f (diff)
parent: 03a962d8cab44221650f59eb223cb0a767e05b2b (diff)
1 files changed, 0 insertions, 742 deletions
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
deleted file mode 100644
index ecb4df6afdf..00000000000
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ /dev/null
@@ -1,742 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* **** Perlin Noise **** */
-
-ccl_device float fade(float t)
-{
-  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
-}
-
-ccl_device_inline float negate_if(float val, int condition)
-{
-  return (condition) ? -val : val;
-}
-
-ccl_device float grad1(int hash, float x)
-{
-  int h = hash & 15;
-  float g = 1 + (h & 7);
-  return negate_if(g, h & 8) * x;
-}
-
-ccl_device_noinline_cpu float perlin_1d(float x)
-{
-  int X;
-  float fx = floorfrac(x, &X);
-  float u = fade(fx);
-
-  return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
-}
-
-/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
- * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
- * supported, we do a standard implementation, but if it is supported, we
- * do an implementation using SSE intrinsics.
- */
-#if !defined(__KERNEL_SSE2__)
-
-/* ** Standard Implementation ** */
-
-/* Bilinear Interpolation:
- *
- * v2          v3
- *  @ + + + + @       y
- *  +         +       ^
- *  +         +       |
- *  +         +       |
- *  @ + + + + @       @------> x
- * v0          v1
- *
- */
-ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
-{
-  float x1 = 1.0f - x;
-  return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
-}
-
-/* Trilinear Interpolation:
- *
- *   v6               v7
- *     @ + + + + + + @
- *     +\            +\
- *     + \           + \
- *     +  \          +  \
- *     +   \ v4      +   \ v5
- *     +    @ + + + +++ + @          z
- *     +    +        +    +      y   ^
- *  v2 @ + +++ + + + @ v3 +       \  |
- *      \   +         \   +        \ |
- *       \  +          \  +         \|
- *        \ +           \ +          +---------> x
- *         \+            \+
- *          @ + + + + + + @
- *        v0               v1
- */
-ccl_device float tri_mix(float v0,
-                         float v1,
-                         float v2,
-                         float v3,
-                         float v4,
-                         float v5,
-                         float v6,
-                         float v7,
-                         float x,
-                         float y,
-                         float z)
-{
-  float x1 = 1.0f - x;
-  float y1 = 1.0f - y;
-  float z1 = 1.0f - z;
-  return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
-         z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
-}
-
-ccl_device float quad_mix(float v0,
-                          float v1,
-                          float v2,
-                          float v3,
-                          float v4,
-                          float v5,
-                          float v6,
-                          float v7,
-                          float v8,
-                          float v9,
-                          float v10,
-                          float v11,
-                          float v12,
-                          float v13,
-                          float v14,
-                          float v15,
-                          float x,
-                          float y,
-                          float z,
-                          float w)
-{
-  return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
-             tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
-             w);
-}
-
-ccl_device float grad2(int hash, float x, float y)
-{
-  int h = hash & 7;
-  float u = h < 4 ? x : y;
-  float v = 2.0f * (h < 4 ? y : x);
-  return negate_if(u, h & 1) + negate_if(v, h & 2);
-}
-
-ccl_device float grad3(int hash, float x, float y, float z)
-{
-  int h = hash & 15;
-  float u = h < 8 ? x : y;
-  float vt = ((h == 12) || (h == 14)) ? x : z;
-  float v = h < 4 ? y : vt;
-  return negate_if(u, h & 1) + negate_if(v, h & 2);
-}
-
-ccl_device float grad4(int hash, float x, float y, float z, float w)
-{
-  int h = hash & 31;
-  float u = h < 24 ? x : y;
-  float v = h < 16 ? y : z;
-  float s = h < 8 ? z : w;
-  return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
-}
-
-ccl_device_noinline_cpu float perlin_2d(float x, float y)
-{
-  int X;
-  int Y;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-
-  float u = fade(fx);
-  float v = fade(fy);
-
-  float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
-                   grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
-                   grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
-                   grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
-                   u,
-                   v);
-
-  return r;
-}
-
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  int X;
-  int Y;
-  int Z;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-  float fz = floorfrac(z, &Z);
-
-  float u = fade(fx);
-  float v = fade(fy);
-  float w = fade(fz);
-
-  float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
-                    grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
-                    grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
-                    grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
-                    grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
-                    grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
-                    grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
-                    grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
-                    u,
-                    v,
-                    w);
-  return r;
-}
-
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  int X;
-  int Y;
-  int Z;
-  int W;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-  float fz = floorfrac(z, &Z);
-  float fw = floorfrac(w, &W);
-
-  float u = fade(fx);
-  float v = fade(fy);
-  float t = fade(fz);
-  float s = fade(fw);
-
-  float r = quad_mix(
-      grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
-      grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
-      grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
-      grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
-      grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
-      grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
-      grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
-      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
-      grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
-      grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
-      grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
-      u,
-      v,
-      t,
-      s);
-
-  return r;
-}
-
-#else /* SSE is supported. */
-
-/* ** SSE Implementation ** */
-
-/* SSE Bilinear Interpolation:
- *
- * The function takes two ssef inputs:
- * - p : Contains the values at the points (v0, v1, v2, v3).
- * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
- *
- * The interpolation is done in two steps:
- * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
- *    (v2, v3) is generated by moving v2 and v3 to the first and second
- *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
- *    fourth values are unused.
- * 2. Interpolate g0 and g1 along the y axis to get the final value.
- *    g1 is generated by populating an ssef with the second value of g.
- *    Only the first value is important in the final ssef.
- *
- * v1          v3          g1
- *  @ + + + + @            @                    y
- *  +         +     (1)    +    (2)             ^
- *  +         +     --->   +    --->   final    |
- *  +         +            +                    |
- *  @ + + + + @            @                    @------> x
- * v0          v2          g0
- *
- */
-ccl_device_inline ssef bi_mix(ssef p, ssef f)
-{
-  ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
-  return mix(g, shuffle<1>(g), shuffle<1>(f));
-}
-
-ccl_device_inline ssef fade(const ssef &t)
-{
-  ssef a = madd(t, 6.0f, -15.0f);
-  ssef b = madd(t, a, 10.0f);
-  return (t * t) * (t * b);
-}
-
-/* Negate val if the nth bit of h is 1. */
-#  define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
-
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
-{
-  ssei h = hash & 7;
-  ssef u = select(h < 4, x, y);
-  ssef v = 2.0f * select(h < 4, y, x);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once:
- *
- *    Point  Offset from v0
- *     v0       (0, 0)
- *     v1       (0, 1)
- *     v2       (1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
- *     v3       (1, 1)         ^
- *               |  |__________|       (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
- *               |                          ^
- *               |__________________________|
- *
- */
-ccl_device_noinline_cpu float perlin_2d(float x, float y)
-{
-  ssei XY;
-  ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
-  ssef uv = fade(fxy);
-
-  ssei XY1 = XY + 1;
-  ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
-  ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
-
-  ssei h = hash_ssei2(X, Y);
-
-  ssef fxy1 = fxy - 1.0f;
-  ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
-  ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
-
-  ssef g = grad(h, fx, fy);
-
-  return extract<0>(bi_mix(g, uv));
-}
-
-/* SSE Trilinear Interpolation:
- *
- * The function takes three ssef inputs:
- * - p : Contains the values at the points (v0, v1, v2, v3).
- * - q : Contains the values at the points (v4, v5, v6, v7).
- * - f : Contains the values (x, y, z, _). The fourth value is unused.
- *
- * The interpolation is done in three steps:
- * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
- * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
- *    (s2, s3) is generated by moving v2 and v3 to the first and second
- *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
- *    fourth values are unused.
- * 3. Interpolate g0 and g1 along the z axis to get the final value.
- *    g1 is generated by populating an ssef with the second value of g.
- *    Only the first value is important in the final ssef.
- *
- *   v3               v7
- *     @ + + + + + + @               s3 @
- *     +\            +\                 +\
- *     + \           + \                + \
- *     +  \          +  \               +  \             g1
- *     +   \ v1      +   \ v5           +   \ s1         @
- *     +    @ + + + +++ + @             +    @           +                     z
- *     +    +        +    +    (1)      +    +    (2)    +   (3)           y   ^
- *  v2 @ + +++ + + + @ v6 +    --->  s2 @    +    --->   +   --->  final    \  |
- *      \   +         \   +              \   +           +                   \ |
- *       \  +          \  +               \  +           +                    \|
- *        \ +           \ +                \ +           @                     +---------> x
- *         \+            \+                 \+           g0
- *          @ + + + + + + @                  @
- *        v0               v4                 s0
- */
-ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
-{
-  ssef s = mix(p, q, shuffle<0>(f));
-  ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
-  return mix(g, shuffle<1>(g), shuffle<2>(f));
-}
-
-/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
- * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
- * supported, we do an SSE implementation, but if it is supported,
- * we do an implementation using AVX intrinsics.
- */
-#  if !defined(__KERNEL_AVX__)
-
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
-{
-  ssei h = hash & 15;
-  ssef u = select(h < 8, x, y);
-  ssef vt = select((h == 12) | (h == 14), x, z);
-  ssef v = select(h < 4, y, vt);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-ccl_device_inline ssef
-grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
-{
-  ssei h = hash & 31;
-  ssef u = select(h < 24, x, y);
-  ssef v = select(h < 16, y, z);
-  ssef s = select(h < 8, z, w);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
-}
-
-/* SSE Quadrilinear Interpolation:
- *
- * Quadrilinear interpolation is as simple as a linear interpolation
- * between two trilinear interpolations.
- *
- */
-ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
-{
-  return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
- * gradients in 3D, we need to compute two sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0      (0, 0, 0)
- *     v1      (0, 0, 1)
- *     v2      (0, 1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v3      (0, 1, 1)         ^
- *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                 |                          ^
- *                 |__________________________|
- *
- *    Point  Offset from v0
- *     v4      (1, 0, 0)
- *     v5      (1, 0, 1)
- *     v6      (1, 1, 0)
- *     v7      (1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  ssei XYZ;
-  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
-  ssef uvw = fade(fxyz);
-
-  ssei XYZ1 = XYZ + 1;
-  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
-
-  ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
-  ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
-
-  ssef fxyz1 = fxyz - 1.0f;
-  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
-
-  ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
-  ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
-
-  return extract<0>(tri_mix(g1, g2, uvw));
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
- * gradients in 4D, we need to compute four sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0     (0, 0, 0, 0)
- *     v1     (0, 0, 1, 0)
- *     v2     (0, 1, 0, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v3     (0, 1, 1, 0)    ^
- *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                |                       ^
- *                |_______________________|
- *
- *    Point  Offset from v0
- *     v4     (1, 0, 0, 0)
- *     v5     (1, 0, 1, 0)
- *     v6     (1, 1, 0, 0)
- *     v7     (1, 1, 1, 0)
- *
- *    Point  Offset from v0
- *     v8     (0, 0, 0, 1)
- *     v9     (0, 0, 1, 1)
- *     v10    (0, 1, 0, 1)
- *     v11    (0, 1, 1, 1)
- *
- *    Point  Offset from v0
- *     v12    (1, 0, 0, 1)
- *     v13    (1, 0, 1, 1)
- *     v14    (1, 1, 0, 1)
- *     v15    (1, 1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  ssei XYZW;
-  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
-  ssef uvws = fade(fxyzw);
-
-  ssei XYZW1 = XYZW + 1;
-  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
-
-  ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
-  ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
-
-  ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
-  ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
-
-  ssef fxyzw1 = fxyzw - 1.0f;
-  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
-
-  ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
-  ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
-
-  ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
-  ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
-
-  return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
-}
-
-#  else /* AVX is supported. */
-
-/* AVX Implementation */
-
-ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
-{
-  avxi h = hash & 15;
-  avxf u = select(h < 8, x, y);
-  avxf vt = select((h == 12) | (h == 14), x, z);
-  avxf v = select(h < 4, y, vt);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-ccl_device_inline avxf
-grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
-{
-  avxi h = hash & 31;
-  avxf u = select(h < 24, x, y);
-  avxf v = select(h < 16, y, z);
-  avxf s = select(h < 8, z, w);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
-}
-
-/* SSE Quadrilinear Interpolation:
- *
- * The interpolation is done in two steps:
- * 1. Interpolate p and q along the w axis to get s.
- * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
- *    value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
- *    low and high ssef from s.
- *
- */
-ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
-{
-  ssef fv = shuffle<3>(f);
-  avxf s = mix(p, q, avxf(fv, fv));
-  return tri_mix(low(s), high(s), f);
-}
-
-/* We use AVX to compute and interpolate 8 gradients at once.
- *
- *    Point  Offset from v0
- *     v0      (0, 0, 0)
- *     v1      (0, 0, 1)    The full AVX type is computed by inserting the following
- *     v2      (0, 1, 0)    SSE types into both the low and high parts of the AVX.
- *     v3      (0, 1, 1)
- *     v4      (1, 0, 0)
- *     v5      (1, 0, 1)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v6      (1, 1, 0)         ^
- *     v7      (1, 1, 1)         |
- *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                 |                          ^
- *                 |__________________________|
- *
- */
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  ssei XYZ;
-  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
-  ssef uvw = fade(fxyz);
-
-  ssei XYZ1 = XYZ + 1;
-  ssei X = shuffle<0>(XYZ);
-  ssei X1 = shuffle<0>(XYZ1);
-  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
-
-  avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
-
-  ssef fxyz1 = fxyz - 1.0f;
-  ssef fx = shuffle<0>(fxyz);
-  ssef fx1 = shuffle<0>(fxyz1);
-  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
-
-  avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
-
-  return extract<0>(tri_mix(low(g), high(g), uvw));
-}
-
-/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
- * gradients in 4D, we need to compute two sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0     (0, 0, 0, 0)
- *     v1     (0, 0, 1, 0)  The full AVX type is computed by inserting the following
- *     v2     (0, 1, 0, 0)  SSE types into both the low and high parts of the AVX.
- *     v3     (0, 1, 1, 0)
- *     v4     (1, 0, 0, 0)
- *     v5     (1, 0, 1, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v6     (1, 1, 0, 0)    ^
- *     v7     (1, 1, 1, 0)    |
- *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                |                       ^
- *                |_______________________|
- *
- *    Point  Offset from v0
- *     v8     (0, 0, 0, 1)
- *     v9     (0, 0, 1, 1)
- *     v10    (0, 1, 0, 1)
- *     v11    (0, 1, 1, 1)
- *     v12    (1, 0, 0, 1)
- *     v13    (1, 0, 1, 1)
- *     v14    (1, 1, 0, 1)
- *     v15    (1, 1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  ssei XYZW;
-  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
-  ssef uvws = fade(fxyzw);
-
-  ssei XYZW1 = XYZW + 1;
-  ssei X = shuffle<0>(XYZW);
-  ssei X1 = shuffle<0>(XYZW1);
-  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
-  ssei W = shuffle<3>(XYZW);
-  ssei W1 = shuffle<3>(XYZW1);
-
-  avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
-  avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
-
-  ssef fxyzw1 = fxyzw - 1.0f;
-  ssef fx = shuffle<0>(fxyzw);
-  ssef fx1 = shuffle<0>(fxyzw1);
-  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
-  ssef fw = shuffle<3>(fxyzw);
-  ssef fw1 = shuffle<3>(fxyzw1);
-
-  avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
-  avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
-
-  return extract<0>(quad_mix(g1, g2, uvws));
-}
-#  endif
-
-#  undef negate_if_nth_bit
-
-#endif
-
-/* Remap the output of noise to a predictable range [-1, 1].
- * The scale values were computed experimentally by the OSL developers.
- */
-
-ccl_device_inline float noise_scale1(float result)
-{
-  return 0.2500f * result;
-}
-
-ccl_device_inline float noise_scale2(float result)
-{
-  return 0.6616f * result;
-}
-
-ccl_device_inline float noise_scale3(float result)
-{
-  return 0.9820f * result;
-}
-
-ccl_device_inline float noise_scale4(float result)
-{
-  return 0.8344f * result;
-}
-
-/* Safe Signed And Unsigned Noise */
-
-ccl_device_inline float snoise_1d(float p)
-{
-  return noise_scale1(ensure_finite(perlin_1d(p)));
-}
-
-ccl_device_inline float noise_1d(float p)
-{
-  return 0.5f * snoise_1d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_2d(float2 p)
-{
-  return noise_scale2(ensure_finite(perlin_2d(p.x, p.y)));
-}
-
-ccl_device_inline float noise_2d(float2 p)
-{
-  return 0.5f * snoise_2d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_3d(float3 p)
-{
-  return noise_scale3(ensure_finite(perlin_3d(p.x, p.y, p.z)));
-}
-
-ccl_device_inline float noise_3d(float3 p)
-{
-  return 0.5f * snoise_3d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_4d(float4 p)
-{
-  return noise_scale4(ensure_finite(perlin_4d(p.x, p.y, p.z, p.w)));
-}
-
-ccl_device_inline float noise_4d(float4 p)
-{
-  return 0.5f * snoise_4d(p) + 0.5f;
-}
-
-CCL_NAMESPACE_END
author	Howard Trickey <howard.trickey@gmail.com>	2021-10-30 22:37:05 +0300
committer	Howard Trickey <howard.trickey@gmail.com>	2021-10-30 22:37:05 +0300
commit	e9bbfd0c8c7a508d220bf355722ff03f91e93183 (patch)
tree	1230f26bc82f24547aeccbaa7fcd6d3db2655fd3 /intern/cycles/kernel/svm/svm_noise.h
parent	1aa953bd1913c81b22c80a00edbf4ad88a32c52f (diff)
parent	03a962d8cab44221650f59eb223cb0a767e05b2b (diff)