Shading: Extend Noise node to other dimenstions.

This patch extends perlin noise to operate in 1D, 2D, 3D, and 4D space. The noise code has also been refactored to be more readable. The Color output and distortion patterns changed, so this patch breaks backward compatibility. This is due to the fact that we now use random offsets as noise seeds, as opposed to swizzling and constants offsets. Reviewers: brecht, JacquesLucke Differential Revision: https://developer.blender.org/D5560
author: OmarSquircleArt <omar.squircleart@gmail.com> 2019-09-04 18:54:32 +0300
committer: OmarSquircleArt <omar.squircleart@gmail.com> 2019-09-04 18:54:32 +0300
commit: 23564583a4988778b4c43496fd21818b286f6ba1 (patch)
tree: 3dc149c4e5c26ea2aac460ed582cb31def988470 /intern/cycles/kernel/svm
parent: 45d4c925799e94c6d442a9a9066af2d3305724e1 (diff)
7 files changed, 789 insertions, 273 deletions
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ab8570618ab..95954aaf99e 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -158,7 +158,7 @@ CCL_NAMESPACE_END
 /* Nodes */
 
 #include "kernel/svm/svm_noise.h"
-#include "svm_texture.h"
+#include "svm_fractal_noise.h"
 
 #include "kernel/svm/svm_color_util.h"
 #include "kernel/svm/svm_math_util.h"
@@ -313,7 +313,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
         svm_node_tex_image_box(kg, sd, stack, node);
         break;
       case NODE_TEX_NOISE:
-        svm_node_tex_noise(kg, sd, stack, node, &offset);
+        svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
         break;
 #  endif /* __TEXTURES__ */
 #  ifdef __EXTRA_NODES__
diff --git a/intern/cycles/kernel/svm/svm_fractal_noise.h b/intern/cycles/kernel/svm/svm_fractal_noise.h
new file mode 100644
index 00000000000..5b2e4a28fce
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_fractal_noise.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_1d(float p, float octaves)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_1d(fscale * p);
+    sum += t * amp;
+    amp *= 0.5f;
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_1d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_2d(float2 p, float octaves)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_2d(fscale * p);
+    sum += t * amp;
+    amp *= 0.5f;
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_2d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_3d(float3 p, float octaves)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_3d(fscale * p);
+    sum += t * amp;
+    amp *= 0.5f;
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_3d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_4d(float4 p, float octaves)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_4d(fscale * p);
+    sum += t * amp;
+    amp *= 0.5f;
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_4d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
+    return sum;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 9291c7e7295..db87f04581f 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -37,14 +37,14 @@ ccl_device_noinline_cpu float noise_musgrave_fBm(float3 p,
   int i;
 
   for (i = 0; i < float_to_int(octaves); i++) {
-    value += snoise(p) * pwr;
+    value += snoise_3d(p) * pwr;
     pwr *= pwHL;
     p *= lacunarity;
   }
 
   rmd = octaves - floorf(octaves);
   if (rmd != 0.0f)
-    value += rmd * snoise(p) * pwr;
+    value += rmd * snoise_3d(p) * pwr;
 
   return value;
 }
@@ -68,14 +68,14 @@ ccl_device_noinline_cpu float noise_musgrave_multi_fractal(float3 p,
   int i;
 
   for (i = 0; i < float_to_int(octaves); i++) {
-    value *= (pwr * snoise(p) + 1.0f);
+    value *= (pwr * snoise_3d(p) + 1.0f);
     pwr *= pwHL;
     p *= lacunarity;
   }
 
   rmd = octaves - floorf(octaves);
   if (rmd != 0.0f)
-    value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
+    value *= (rmd * pwr * snoise_3d(p) + 1.0f); /* correct? */
 
   return value;
 }
@@ -97,11 +97,11 @@ ccl_device_noinline_cpu float noise_musgrave_hetero_terrain(
   int i;
 
   /* first unscaled octave of function; later octaves are scaled */
-  value = offset + snoise(p);
+  value = offset + snoise_3d(p);
   p *= lacunarity;
 
   for (i = 1; i < float_to_int(octaves); i++) {
-    increment = (snoise(p) + offset) * pwr * value;
+    increment = (snoise_3d(p) + offset) * pwr * value;
     value += increment;
     pwr *= pwHL;
     p *= lacunarity;
@@ -109,7 +109,7 @@ ccl_device_noinline_cpu float noise_musgrave_hetero_terrain(
 
   rmd = octaves - floorf(octaves);
   if (rmd != 0.0f) {
-    increment = (snoise(p) + offset) * pwr * value;
+    increment = (snoise_3d(p) + offset) * pwr * value;
     value += rmd * increment;
   }
 
@@ -132,7 +132,7 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal(
   float pwr = pwHL;
   int i;
 
-  result = snoise(p) + offset;
+  result = snoise_3d(p) + offset;
   weight = gain * result;
   p *= lacunarity;
 
@@ -140,7 +140,7 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal(
     if (weight > 1.0f)
       weight = 1.0f;
 
-    signal = (snoise(p) + offset) * pwr;
+    signal = (snoise_3d(p) + offset) * pwr;
     pwr *= pwHL;
     result += weight * signal;
     weight *= gain * signal;
@@ -149,7 +149,7 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal(
 
   rmd = octaves - floorf(octaves);
   if (rmd != 0.0f)
-    result += rmd * ((snoise(p) + offset) * pwr);
+    result += rmd * ((snoise_3d(p) + offset) * pwr);
 
   return result;
 }
@@ -170,7 +170,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal(
   float pwr = pwHL;
   int i;
 
-  signal = offset - fabsf(snoise(p));
+  signal = offset - fabsf(snoise_3d(p));
   signal *= signal;
   result = signal;
   weight = 1.0f;
@@ -178,7 +178,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal(
   for (i = 1; i < float_to_int(octaves); i++) {
     p *= lacunarity;
     weight = saturate(signal * gain);
-    signal = offset - fabsf(snoise(p));
+    signal = offset - fabsf(snoise_3d(p));
     signal *= signal;
     signal *= weight;
     result += signal * pwr;
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index dd375af27e5..35b74fb4b3e 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -32,246 +32,566 @@
 
 CCL_NAMESPACE_BEGIN
 
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei quick_floor_sse(const ssef &x)
-{
-  ssei b = truncatei(x);
-  ssei isneg = cast((x < ssef(0.0f)).m128);
-  return b + isneg;  // unsaturated add 0xffffffff is the same as subtract -1
-}
-#endif
-
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
-{
-#  define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
-#  define xor_rot(a, b, c) \
-    do { \
-      a = a ^ b; \
-      a = a - rot(b, c); \
-    } while (0)
-
-  uint len = 3;
-  ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
-  ssei a = magic + kx;
-  ssei b = magic + ky;
-  ssei c = magic + kz;
-
-  xor_rot(c, b, 14);
-  xor_rot(a, c, 11);
-  xor_rot(b, a, 25);
-  xor_rot(c, b, 16);
-  xor_rot(a, c, 4);
-  xor_rot(b, a, 14);
-  xor_rot(c, b, 24);
-
-  return c;
-#  undef rot
-#  undef xor_rot
-}
-#endif
+/* **** Perlin Noise **** */
 
-#if 0  // unused
-ccl_device int imod(int a, int b)
+ccl_device float fade(float t)
 {
-  a %= b;
-  return a < 0 ? a + b : a;
+  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
 }
 
-ccl_device uint phash(int kx, int ky, int kz, int3 p)
+ccl_device_inline float negate_if(float val, int condition)
 {
-  return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
+  return (condition) ? -val : val;
 }
-#endif
 
-#ifndef __KERNEL_SSE2__
-ccl_device float floorfrac(float x, int *i)
+ccl_device float grad1(int hash, float x)
 {
-  *i = quick_floor_to_int(x);
-  return x - *i;
+  int h = hash & 15;
+  float g = 1 + (h & 7);
+  return negate_if(g, h & 8) * x;
 }
-#else
-ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
+
+ccl_device_noinline_cpu float perlin_1d(float x)
 {
-  *i = quick_floor_sse(x);
-  return x - ssef(*i);
+  int X;
+  float fx = floorfrac(x, &X);
+  float u = fade(fx);
+
+  return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
 }
-#endif
 
+/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
+ * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
+ * supported, we do a standard implementation, but if it is supported, we
+ * do an implementation using SSE intrinsics.
+ */
 #ifndef __KERNEL_SSE2__
-ccl_device float fade(float t)
+
+/* ** Standard Implementation ** */
+
+/* Bilinear Interpolation:
+ *
+ * v2          v3
+ *  @ + + + + @       y
+ *  +         +       ^
+ *  +         +       |
+ *  +         +       |
+ *  @ + + + + @       @------> x
+ * v0          v1
+ *
+ */
+ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
 {
-  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+  float x1 = 1.0f - x;
+  return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
 }
-#else
-ccl_device_inline ssef fade_sse(const ssef *t)
+
+/* Trilinear Interpolation:
+ *
+ *   v6               v7
+ *     @ + + + + + + @
+ *     +\            +\
+ *     + \           + \
+ *     +  \          +  \
+ *     +   \ v4      +   \ v5
+ *     +    @ + + + +++ + @          z
+ *     +    +        +    +      y   ^
+ *  v2 @ + +++ + + + @ v3 +       \  |
+ *      \   +         \   +        \ |
+ *       \  +          \  +         \|
+ *        \ +           \ +          +---------> x
+ *         \+            \+
+ *          @ + + + + + + @
+ *        v0               v1
+ */
+ccl_device float tri_mix(float v0,
+                         float v1,
+                         float v2,
+                         float v3,
+                         float v4,
+                         float v5,
+                         float v6,
+                         float v7,
+                         float x,
+                         float y,
+                         float z)
 {
-  ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
-  ssef b = madd(*t, a, ssef(10.0f));
-  return ((*t) * (*t)) * ((*t) * b);
+  float x1 = 1.0f - x;
+  float y1 = 1.0f - y;
+  float z1 = 1.0f - z;
+  return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
+         z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
 }
-#endif
 
-#ifndef __KERNEL_SSE2__
-ccl_device float nerp(float t, float a, float b)
+ccl_device float quad_mix(float v0,
+                          float v1,
+                          float v2,
+                          float v3,
+                          float v4,
+                          float v5,
+                          float v6,
+                          float v7,
+                          float v8,
+                          float v9,
+                          float v10,
+                          float v11,
+                          float v12,
+                          float v13,
+                          float v14,
+                          float v15,
+                          float x,
+                          float y,
+                          float z,
+                          float w)
 {
-  return (1.0f - t) * a + t * b;
+  return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
+             tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
+             w);
 }
-#else
-ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
+
+ccl_device float grad2(int hash, float x, float y)
 {
-  ssef x1 = (ssef(1.0f) - t) * a;
-  return madd(t, b, x1);
+  int h = hash & 7;
+  float u = h < 4 ? x : y;
+  float v = 2.0f * (h < 4 ? y : x);
+  return negate_if(u, h & 1) + negate_if(v, h & 2);
 }
-#endif
 
-#ifndef __KERNEL_SSE2__
-ccl_device float grad(int hash, float x, float y, float z)
+ccl_device float grad3(int hash, float x, float y, float z)
 {
-  // use vectors pointing to the edges of the cube
   int h = hash & 15;
   float u = h < 8 ? x : y;
-  float vt = ((h == 12) | (h == 14)) ? x : z;
+  float vt = ((h == 12) || (h == 14)) ? x : z;
   float v = h < 4 ? y : vt;
-  return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
+  return negate_if(u, h & 1) + negate_if(v, h & 2);
 }
-#else
-ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
-{
-  ssei c1 = ssei(1);
-  ssei c2 = ssei(2);
 
-  ssei h = hash & ssei(15);  // h = hash & 15
+ccl_device float grad4(int hash, float x, float y, float z, float w)
+{
+  int h = hash & 31;
+  float u = h < 24 ? x : y;
+  float v = h < 16 ? y : z;
+  float s = h < 8 ? z : w;
+  return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
+}
 
-  sseb case_ux = h < ssei(8);  // 0xffffffff if h < 8 else 0
+ccl_device_noinline_cpu float perlin_2d(float x, float y)
+{
+  int X;
+  int Y;
 
-  ssef u = select(case_ux, x, y);  // u = h<8 ? x : y
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
 
-  sseb case_vy = h < ssei(4);  // 0xffffffff if h < 4 else 0
+  float u = fade(fx);
+  float v = fade(fy);
 
-  sseb case_h12 = h == ssei(12);  // 0xffffffff if h == 12 else 0
-  sseb case_h14 = h == ssei(14);  // 0xffffffff if h == 14 else 0
+  float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
+                   grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
+                   grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
+                   grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
+                   u,
+                   v);
 
-  sseb case_vx = case_h12 | case_h14;  // 0xffffffff if h == 12 or h == 14 else 0
+  return r;
+}
 
-  ssef v = select(case_vy, y, select(case_vx, x, z));  // v = h<4 ? y : h == 12 || h == 14 ? x : z
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
+{
+  int X;
+  int Y;
+  int Z;
 
-  ssei case_uneg = (h & c1) << 31;        // 1<<31 if h&1 else 0
-  ssef case_uneg_mask = cast(case_uneg);  // -0.0 if h&1 else +0.0
-  ssef ru = u ^ case_uneg_mask;           // -u if h&1 else u (copy float sign)
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
+  float fz = floorfrac(z, &Z);
 
-  ssei case_vneg = (h & c2) << 30;        // 2<<30 if h&2 else 0
-  ssef case_vneg_mask = cast(case_vneg);  // -0.0 if h&2 else +0.0
-  ssef rv = v ^ case_vneg_mask;           // -v if h&2 else v (copy float sign)
+  float u = fade(fx);
+  float v = fade(fy);
+  float w = fade(fz);
 
-  ssef r = ru + rv;  // ((h&1) ? -u : u) + ((h&2) ? -v : v)
+  float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
+                    grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
+                    grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
+                    grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
+                    grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
+                    grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
+                    grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+                    grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
+                    u,
+                    v,
+                    w);
   return r;
 }
-#endif
 
-#ifndef __KERNEL_SSE2__
-ccl_device float scale3(float result)
-{
-  return 0.9820f * result;
-}
-#else
-ccl_device_inline ssef scale3_sse(const ssef &result)
-{
-  return ssef(0.9820f) * result;
-}
-#endif
-
-#ifndef __KERNEL_SSE2__
-ccl_device_noinline_cpu float perlin(float x, float y, float z)
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
 {
   int X;
-  float fx = floorfrac(x, &X);
   int Y;
-  float fy = floorfrac(y, &Y);
   int Z;
+  int W;
+
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
   float fz = floorfrac(z, &Z);
+  float fw = floorfrac(w, &W);
 
   float u = fade(fx);
   float v = fade(fy);
-  float w = fade(fz);
+  float t = fade(fz);
+  float s = fade(fw);
+
+  float r = quad_mix(
+      grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
+      grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
+      grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
+      grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
+      grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
+      grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
+      grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
+      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
+      grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
+      grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
+      grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+      u,
+      v,
+      t,
+      s);
 
-  float result;
-
-  result = nerp(
-      w,
-      nerp(v,
-           nerp(u,
-                grad(hash_uint3(X, Y, Z), fx, fy, fz),
-                grad(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz)),
-           nerp(u,
-                grad(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
-                grad(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
-      nerp(v,
-           nerp(u,
-                grad(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
-                grad(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
-           nerp(u,
-                grad(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
-                grad(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
-  float r = scale3(result);
-
-  /* can happen for big coordinates, things even out to 0.0 then anyway */
-  return (isfinite(r)) ? r : 0.0f;
+  return r;
 }
+
 #else
-ccl_device_noinline float perlin(float x, float y, float z)
+
+/* ** SSE Implementation ** */
+
+/* SSE Bilinear Interpolation:
+ *
+ * The function takes two ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
+ *
+ * The interpolation is done in two steps:
+ * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
+ *    (v2, v3) is generated by moving v2 and v3 to the first and second
+ *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ *    fourth values are unused.
+ * 2. Interplate g0 and g1 along the y axis to get the final value.
+ *    g1 is generated by populating an ssef with the second value of g.
+ *    Only the first value is important in the final ssef.
+ *
+ * v1          v3          g1
+ *  @ + + + + @            @                    y
+ *  +         +     (1)    +    (2)             ^
+ *  +         +     --->   +    --->   final    |
+ *  +         +            +                    |
+ *  @ + + + + @            @                    @------> x
+ * v0          v2          g0
+ *
+ */
+ccl_device_inline ssef bi_mix(ssef p, ssef f)
+{
+  ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
+  return mix(g, shuffle<1>(g), shuffle<1>(f));
+}
+
+/* SSE Trilinear Interpolation:
+ *
+ * The function takes three ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - q : Contains the values at the points (v4, v5, v6, v7).
+ * - f : Contains the values (x, y, z, _). The fourth value is unused.
+ *
+ * The interpolation is done in three steps:
+ * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
+ * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
+ *    (s2, s3) is generated by moving v2 and v3 to the first and second
+ *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ *    fourth values are unused.
+ * 3. Interplate g0 and g1 along the z axis to get the final value.
+ *    g1 is generated by populating an ssef with the second value of g.
+ *    Only the first value is important in the final ssef.
+ *
+ *   v3               v7
+ *     @ + + + + + + @               s3 @
+ *     +\            +\                 +\
+ *     + \           + \                + \
+ *     +  \          +  \               +  \             g1
+ *     +   \ v1      +   \ v5           +   \ s1         @
+ *     +    @ + + + +++ + @             +    @           +                     z
+ *     +    +        +    +    (1)      +    +    (2)    +   (3)           y   ^
+ *  v2 @ + +++ + + + @ v6 +    --->  s2 @    +    --->   +   --->  final    \  |
+ *      \   +         \   +              \   +           +                   \ |
+ *       \  +          \  +               \  +           +                    \|
+ *        \ +           \ +                \ +           @                     +---------> x
+ *         \+            \+                 \+           g0
+ *          @ + + + + + + @                  @
+ *        v0               v4                 s0
+ */
+ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
+{
+  ssef s = mix(p, q, shuffle<0>(f));
+  ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
+  return mix(g, shuffle<1>(g), shuffle<2>(f));
+}
+
+/* SSE Quadrilinear Interpolation:
+ *
+ * Quadrilinear interpolation is as simple as a linear interpolation
+ * between two trilinear interpolations.
+ *
+ */
+ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
+{
+  return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
+}
+
+ccl_device_inline ssef fade(const ssef &t)
+{
+  ssef a = madd(t, 6.0f, -15.0f);
+  ssef b = madd(t, a, 10.0f);
+  return (t * t) * (t * b);
+}
+
+/* Negate val if the nth bit of h is 1. */
+#  define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
+{
+  ssei h = hash & 7;
+  ssef u = select(h < 4, x, y);
+  ssef v = 2.0f * select(h < 4, y, x);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
+{
+  ssei h = hash & 15;
+  ssef u = select(h < 8, x, y);
+  ssef vt = select((h == 12) | (h == 14), x, z);
+  ssef v = select(h < 4, y, vt);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+ccl_device_inline ssef
+grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
+{
+  ssei h = hash & 31;
+  ssef u = select(h < 24, x, y);
+  ssef v = select(h < 16, y, z);
+  ssef s = select(h < 8, z, w);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once:
+ *
+ *    Point  Offset from v0
+ *     v0       (0, 0)
+ *     v1       (0, 1)
+ *     v2       (1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
+ *     v3       (1, 1)         ^
+ *               |  |__________|       (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
+ *               |                          ^
+ *               |__________________________|
+ *
+ */
+ccl_device_noinline float perlin_2d(float x, float y)
+{
+  ssei XY;
+  ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
+  ssef uv = fade(fxy);
+
+  ssei XY1 = XY + 1;
+  ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
+  ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
+
+  ssei h = hash_ssei2(X, Y);
+
+  ssef fxy1 = fxy - 1.0f;
+  ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
+  ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
+
+  ssef g = grad(h, fx, fy);
+
+  return extract<0>(bi_mix(g, uv));
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
+ * gradients in 3D, we need to compute two sets of gradients at the points:
+ *
+ *    Point  Offset from v0
+ *     v0      (0, 0, 0)
+ *     v1      (0, 0, 1)
+ *     v2      (0, 1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v3      (0, 1, 1)         ^
+ *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                 |                          ^
+ *                 |__________________________|
+ *
+ *    Point  Offset from v0
+ *     v4      (1, 0, 0)
+ *     v5      (1, 0, 1)
+ *     v6      (1, 1, 0)
+ *     v7      (1, 1, 1)
+ *
+ */
+ccl_device_noinline float perlin_3d(float x, float y, float z)
 {
-  ssef xyz = ssef(x, y, z, 0.0f);
   ssei XYZ;
+  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
+  ssef uvw = fade(fxyz);
 
-  ssef fxyz = floorfrac_sse(xyz, &XYZ);
+  ssei XYZ1 = XYZ + 1;
+  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
 
-  ssef uvw = fade_sse(&fxyz);
-  ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
+  ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
+  ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
 
-  ssei XYZ_ofc = XYZ + ssei(1);
-  ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc);                       // +0, +0, +1, +1
-  ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc));  // +0, +1, +0, +1
+  ssef fxyz1 = fxyz - 1.0f;
+  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
 
-  ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz);      // hash directions 000, 001, 010, 011
-  ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz);  // hash directions 100, 101, 110, 111
+  ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
+  ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
 
-  ssef fxyz_ofc = fxyz - ssef(1.0f);
-  ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
-  ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
+  return extract<0>(tri_mix(g1, g2, uvw));
+}
 
-  ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
-  ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
-  ssef n1 = nerp_sse(u, g1, g2);
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
+ * gradients in 4D, we need to compute four sets of gradients at the points:
+ *
+ *    Point  Offset from v0
+ *     v0     (0, 0, 0, 0)
+ *     v1     (0, 0, 1, 0)
+ *     v2     (0, 1, 0, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v3     (0, 1, 1, 0)    ^
+ *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                |                       ^
+ *                |_______________________|
+ *
+ *    Point  Offset from v0
+ *     v4     (1, 0, 0, 0)
+ *     v5     (1, 0, 1, 0)
+ *     v6     (1, 1, 0, 0)
+ *     v7     (1, 1, 1, 0)
+ *
+ *    Point  Offset from v0
+ *     v8     (0, 0, 0, 1)
+ *     v9     (0, 0, 1, 1)
+ *     v10    (0, 1, 0, 1)
+ *     v11    (0, 1, 1, 1)
+ *
+ *    Point  Offset from v0
+ *     v12    (1, 0, 0, 1)
+ *     v13    (1, 0, 1, 1)
+ *     v14    (1, 1, 0, 1)
+ *     v15    (1, 1, 1, 1)
+ *
+ */
+ccl_device_noinline float perlin_4d(float x, float y, float z, float w)
+{
+  ssei XYZW;
+  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
+  ssef uvws = fade(fxyzw);
 
-  ssef n1_half = shuffle<2, 3, 2, 3>(n1);  // extract 2 floats to a separate vector
-  ssef n2 = nerp_sse(
-      v, n1, n1_half);  // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
+  ssei XYZW1 = XYZW + 1;
+  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
 
-  ssef n2_second = shuffle<1>(n2);  // extract b to a separate vector
-  ssef result = nerp_sse(
-      w, n2, n2_second);  // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
+  ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
+  ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
 
-  ssef r = scale3_sse(result);
+  ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
+  ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
 
-  ssef infmask = cast(ssei(0x7f800000));
-  ssef rinfmask = ((r & infmask) == infmask).m128;  // 0xffffffff if r is inf/-inf/nan else 0
-  ssef rfinite = andnot(rinfmask, r);               // 0 if r is inf/-inf/nan else r
-  return extract<0>(rfinite);
+  ssef fxyzw1 = fxyzw - 1.0f;
+  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+
+  ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
+  ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
+
+  ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
+  ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
+
+  return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
 }
 #endif
 
-/* perlin noise in range 0..1 */
-ccl_device float noise(float3 p)
+/* Remap the output of noise to a predictable range [-1, 1].
+ * The scale values were computed experimentally by the OSL developers.
+ */
+
+ccl_device_inline float noise_scale1(float result)
+{
+  return 0.2500f * result;
+}
+
+ccl_device_inline float noise_scale2(float result)
+{
+  return 0.6616f * result;
+}
+
+ccl_device_inline float noise_scale3(float result)
+{
+  return 0.9820f * result;
+}
+
+ccl_device_inline float noise_scale4(float result)
+{
+  return 0.8344f * result;
+}
+
+/* Safe Signed And Unsigned Noise */
+
+ccl_device_inline float snoise_1d(float p)
+{
+  float r = perlin_1d(p);
+  return isinf(r) ? 0.0f : noise_scale1(r);
+}
+
+ccl_device_inline float noise_1d(float p)
+{
+  return 0.5f * snoise_1d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_2d(float2 p)
+{
+  float r = perlin_2d(p.x, p.y);
+  return isinf(r) ? 0.0f : noise_scale2(r);
+}
+
+ccl_device_inline float noise_2d(float2 p)
+{
+  return 0.5f * snoise_2d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_3d(float3 p)
+{
+  float r = perlin_3d(p.x, p.y, p.z);
+  return isinf(r) ? 0.0f : noise_scale3(r);
+}
+
+ccl_device_inline float noise_3d(float3 p)
+{
+  return 0.5f * snoise_3d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_4d(float4 p)
 {
-  float r = perlin(p.x, p.y, p.z);
-  return 0.5f * r + 0.5f;
+  float r = perlin_4d(p.x, p.y, p.z, p.w);
+  return isinf(r) ? 0.0f : noise_scale4(r);
 }
 
-/* perlin noise in range -1..1 */
-ccl_device float snoise(float3 p)
+ccl_device_inline float noise_4d(float4 p)
 {
-  return perlin(p.x, p.y, p.z);
+  return 0.5f * snoise_4d(p) + 0.5f;
 }
 
 /* cell noise */
@@ -293,7 +613,7 @@ ccl_device float3 cellnoise3(float3 p)
   ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
   ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
   ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
-  ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
+  ssei bits = hash_ssei3(ip_xyy, ip_yxz, ip_zzx);
   return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF));
 #endif
 }
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 91dc11691e6..c5a1e43a729 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -16,44 +16,172 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Noise */
+/* The following offset functions generate random offsets to be added to texture
+ * coordinates to act as a seed since the noise functions don't have seed values.
+ * A seed value is needed for generating distortion textures and color outputs.
+ * The offset's components are in the range [100, 200], not too high to cause
+ * bad precision and not to small to be noticeable. We use float seed because
+ * OSL only support float hashes.
+ */
+
+ccl_device_inline float random_float_offset(float seed)
+{
+  return 100.0f + hash_float_to_float(seed) * 100.0f;
+}
+
+ccl_device_inline float2 random_float2_offset(float seed)
+{
+  return make_float2(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f);
+}
 
-ccl_device void svm_node_tex_noise(
-    KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
+ccl_device_inline float3 random_float3_offset(float seed)
 {
-  uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
+  return make_float3(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f);
+}
 
-  svm_unpack_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
-  svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
+ccl_device_inline float4 random_float4_offset(float seed)
+{
+  return make_float4(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 3.0f)) * 100.0f);
+}
 
-  uint4 node2 = read_node(kg, offset);
+ccl_device void noise_texture_1d(
+    float co, float detail, float distortion, bool color_is_needed, float *value, float3 *color)
+{
+  float p = co;
+  if (distortion != 0.0f) {
+    p += noise_1d(p + random_float_offset(0.0f)) * distortion;
+  }
+
+  *value = fractal_noise_1d(p, detail);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_1d(p + random_float_offset(1.0f), detail),
+                         fractal_noise_1d(p + random_float_offset(2.0f), detail));
+  }
+}
+
+ccl_device void noise_texture_2d(
+    float2 co, float detail, float distortion, bool color_is_needed, float *value, float3 *color)
+{
+  float2 p = co;
+  if (distortion != 0.0f) {
+    p += make_float2(noise_2d(p + random_float2_offset(0.0f)) * distortion,
+                     noise_2d(p + random_float2_offset(1.0f)) * distortion);
+  }
 
-  float scale = stack_load_float_default(stack, scale_offset, node2.x);
-  float detail = stack_load_float_default(stack, detail_offset, node2.y);
-  float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
-  float3 p = stack_load_float3(stack, co_offset) * scale;
-  int hard = 0;
+  *value = fractal_noise_2d(p, detail);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_2d(p + random_float2_offset(2.0f), detail),
+                         fractal_noise_2d(p + random_float2_offset(3.0f), detail));
+  }
+}
 
+ccl_device void noise_texture_3d(
+    float3 co, float detail, float distortion, bool color_is_needed, float *value, float3 *color)
+{
+  float3 p = co;
   if (distortion != 0.0f) {
-    float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
+    p += make_float3(noise_3d(p + random_float3_offset(0.0f)) * distortion,
+                     noise_3d(p + random_float3_offset(1.0f)) * distortion,
+                     noise_3d(p + random_float3_offset(2.0f)) * distortion);
+  }
 
-    r.x = noise(p + offset) * distortion;
-    r.y = noise(p) * distortion;
-    r.z = noise(p - offset) * distortion;
+  *value = fractal_noise_3d(p, detail);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_3d(p + random_float3_offset(3.0f), detail),
+                         fractal_noise_3d(p + random_float3_offset(4.0f), detail));
+  }
+}
 
-    p += r;
+ccl_device void noise_texture_4d(
+    float4 co, float detail, float distortion, bool color_is_needed, float *value, float3 *color)
+{
+  float4 p = co;
+  if (distortion != 0.0f) {
+    p += make_float4(noise_4d(p + random_float4_offset(0.0f)) * distortion,
+                     noise_4d(p + random_float4_offset(1.0f)) * distortion,
+                     noise_4d(p + random_float4_offset(2.0f)) * distortion,
+                     noise_4d(p + random_float4_offset(3.0f)) * distortion);
   }
 
-  float f = noise_turbulence(p, detail, hard);
+  *value = fractal_noise_4d(p, detail);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_4d(p + random_float4_offset(4.0f), detail),
+                         fractal_noise_4d(p + random_float4_offset(5.0f), detail));
+  }
+}
+
+ccl_device void svm_node_tex_noise(KernelGlobals *kg,
+                                   ShaderData *sd,
+                                   float *stack,
+                                   uint dimensions,
+                                   uint offsets1,
+                                   uint offsets2,
+                                   int *offset)
+{
+  uint vector_stack_offset, w_stack_offset, scale_stack_offset, detail_stack_offset;
+  uint distortion_stack_offset, value_stack_offset, color_stack_offset;
+
+  svm_unpack_node_uchar4(
+      offsets1, &vector_stack_offset, &w_stack_offset, &scale_stack_offset, &detail_stack_offset);
+  svm_unpack_node_uchar3(
+      offsets2, &distortion_stack_offset, &value_stack_offset, &color_stack_offset);
+
+  uint4 defaults = read_node(kg, offset);
+
+  float3 vector = stack_load_float3(stack, vector_stack_offset);
+  float w = stack_load_float_default(stack, w_stack_offset, defaults.x);
+  float scale = stack_load_float_default(stack, scale_stack_offset, defaults.y);
+  float detail = stack_load_float_default(stack, detail_stack_offset, defaults.z);
+  float distortion = stack_load_float_default(stack, distortion_stack_offset, defaults.w);
+
+  vector *= scale;
+  w *= scale;
+
+  float value;
+  float3 color;
+  switch (dimensions) {
+    case 1:
+      noise_texture_1d(w, detail, distortion, stack_valid(color_stack_offset), &value, &color);
+      break;
+    case 2:
+      noise_texture_2d(make_float2(vector.x, vector.y),
+                       detail,
+                       distortion,
+                       stack_valid(color_stack_offset),
+                       &value,
+                       &color);
+      break;
+    case 3:
+      noise_texture_3d(
+          vector, detail, distortion, stack_valid(color_stack_offset), &value, &color);
+      break;
+    case 4:
+      noise_texture_4d(make_float4(vector.x, vector.y, vector.z, w),
+                       detail,
+                       distortion,
+                       stack_valid(color_stack_offset),
+                       &value,
+                       &color);
+      break;
+    default:
+      kernel_assert(0);
+  }
 
-  if (stack_valid(fac_offset)) {
-    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(value_stack_offset)) {
+    stack_store_float(stack, value_stack_offset, value);
   }
-  if (stack_valid(color_offset)) {
-    float3 color = make_float3(f,
-                               noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
-                               noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
-    stack_store_float3(stack, color_offset, color);
+  if (stack_valid(color_stack_offset)) {
+    stack_store_float3(stack, color_stack_offset, color);
   }
 }
 
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
deleted file mode 100644
index 290aa85c831..00000000000
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Turbulence */
-
-ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard)
-{
-  float fscale = 1.0f;
-  float amp = 1.0f;
-  float sum = 0.0f;
-  int i, n;
-
-  octaves = clamp(octaves, 0.0f, 16.0f);
-  n = float_to_int(octaves);
-
-  for (i = 0; i <= n; i++) {
-    float t = noise(fscale * p);
-
-    if (hard)
-      t = fabsf(2.0f * t - 1.0f);
-
-    sum += t * amp;
-    amp *= 0.5f;
-    fscale *= 2.0f;
-  }
-
-  float rmd = octaves - floorf(octaves);
-
-  if (rmd != 0.0f) {
-    float t = noise(fscale * p);
-
-    if (hard)
-      t = fabsf(2.0f * t - 1.0f);
-
-    float sum2 = sum + t * amp;
-
-    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
-    sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1));
-
-    return (1.0f - rmd) * sum + rmd * sum2;
-  }
-  else {
-    sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1));
-    return sum;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index baaa89ab0cb..402c1c87414 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -33,7 +33,7 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
     n = len(p) * 20.0f;
 
   if (distortion != 0.0f)
-    n += distortion * noise_turbulence(p * dscale, detail, 0);
+    n += distortion * fractal_noise_3d(p * dscale, detail);
 
   if (profile == NODE_WAVE_PROFILE_SIN) {
     return 0.5f + 0.5f * sinf(n);
author	OmarSquircleArt <omar.squircleart@gmail.com>	2019-09-04 18:54:32 +0300
committer	OmarSquircleArt <omar.squircleart@gmail.com>	2019-09-04 18:54:32 +0300
commit	23564583a4988778b4c43496fd21818b286f6ba1 (patch)
tree	3dc149c4e5c26ea2aac460ed582cb31def988470 /intern/cycles/kernel/svm
parent	45d4c925799e94c6d442a9a9066af2d3305724e1 (diff)