Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2017-06-23 03:52:15 +0300
committerClément Foucault <foucault.clem@gmail.com>2017-06-23 03:52:34 +0300
commit3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53 (patch)
treec4b440decb8fa556064d477458b7ce1d2b3f6ff3 /source
parentfbffd6d3645af8364ede5b1b900eed10adb83ca3 (diff)
Eevee: Attempt to optimize GTAO shader.
Unroll horizon search loop. Use fast version of acos. On nvidia linux, unrolling the 2nd loop is giving very high compilation time.
Diffstat (limited to 'source')
-rw-r--r--source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl197
-rw-r--r--source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl16
2 files changed, 145 insertions, 68 deletions
diff --git a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
index 65ff09c4eae..b044cacf1b1 100644
--- a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
@@ -5,28 +5,21 @@
#define MAX_PHI_STEP 32
/* NOTICE : this is multiplied by 2 */
-#define MAX_THETA_STEP 6.0
+#define MAX_THETA_STEP 12
uniform sampler2D minMaxDepthTex;
uniform float aoDistance;
uniform float aoSamples;
uniform float aoFactor;
-float sample_depth(vec2 co, int level)
+float get_max_horizon(vec2 co, vec3 x, float h, float lod)
{
- return textureLod(minMaxDepthTex, co, float(level)).g;
-}
-
-float get_max_horizon(vec2 co, vec3 x, float h, float step)
-{
- if (co.x > 1.0 || co.x < 0.0 || co.y > 1.0 || co.y < 0.0)
- return h;
-
- float depth = sample_depth(co, int(step));
+ float depth = textureLod(minMaxDepthTex, co, floor(lod)).g;
/* Background case */
- if (depth == 1.0)
- return h;
+ /* this is really slow and is only a problem
+ * if the far clip plane is near enough to notice */
+ // depth += step(1.0, depth) * 1e20;
vec3 s = get_view_space_from_depth(co, depth); /* s View coordinate */
vec3 omega_s = s - x;
@@ -39,6 +32,124 @@ float get_max_horizon(vec2 co, vec3 x, float h, float step)
return mix(h, max_h, blend);
}
+void search_step(
+ vec2 t_phi, vec3 x, vec2 x_, float rand, vec2 pixel_ratio,
+ inout float j, inout float ofs, inout float h1, inout float h2)
+{
+ ofs += ofs; /* Step size is doubled each iteration */
+
+ vec2 s_ = t_phi * ofs * rand * pixel_ratio; /* s^ Screen coordinate */
+ vec2 co;
+
+ co = x_ + s_;
+ h1 = get_max_horizon(co, x, h1, j);
+
+ co = x_ - s_;
+ h2 = get_max_horizon(co, x, h2, j);
+
+ j += 0.5;
+}
+
+void search_horizon(
+ vec2 t_phi, vec3 x, vec2 x_, float rand,
+ float max_dist, vec2 pixel_ratio, float pixel_len,
+ inout float h1, inout float h2)
+{
+ float ofs = 1.5 * pixel_len;
+ float j = 0.0;
+
+#if 0 /* manually unrolled bellow */
+ for (int i = 0; i < MAX_THETA_STEP; i++) {
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist)
+ return;
+ }
+#endif
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+ if (ofs > max_dist) return;
+
+ search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+}
+
+void integrate_slice(
+ float iter, vec3 x, vec3 normal, vec2 x_, vec2 noise,
+ float max_dist, vec2 pixel_ratio, float pixel_len,
+ inout float visibility, inout vec3 bent_normal)
+{
+ float phi = M_PI * ((noise.r + iter) / aoSamples);
+
+ /* Rotate with random direction to get jittered result. */
+ vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */
+
+ /* Search maximum horizon angles h1 and h2 */
+ float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
+ search_horizon(t_phi, x, x_, noise.g, max_dist, pixel_ratio, pixel_len, h1, h2);
+
+ /* (Slide 54) */
+ h1 = -fast_acos(h1);
+ h2 = fast_acos(h2);
+
+ /* Projecting Normal to Plane P defined by t_phi and omega_o */
+ vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */
+ vec3 t = vec3(-t_phi, 0.0);
+ vec3 n_proj = normal - h * dot(h, normal);
+ float n_proj_len = max(1e-16, length(n_proj));
+
+ /* Clamping thetas (slide 58) */
+ float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
+ float n = sign(dot(n_proj, t)) * fast_acos(cos_n); /* Angle between view vec and normal */
+ h1 = n + max(h1 - n, -M_PI_2);
+ h2 = n + min(h2 - n, M_PI_2);
+
+ /* Solving inner integral */
+ float sin_n = sin(n);
+ float h1_2 = 2.0 * h1;
+ float h2_2 = 2.0 * h2;
+ float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n);
+ vd *= 0.25 * n_proj_len;
+ visibility += vd;
+
+#ifdef USE_BENT_NORMAL
+ /* Finding Bent normal */
+ float b_angle = (h1 + h2) / 2.0;
+ /* The 0.5 factor below is here to equilibrate the accumulated vectors.
+ * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5).
+ * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */
+ /* Weight sample by vd */
+ bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd;
+#endif
+}
+
void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility
#ifdef USE_BENT_NORMAL
, out vec3 bent_normal
@@ -66,62 +177,12 @@ void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility
visibility = 0.0;
#ifdef USE_BENT_NORMAL
bent_normal = vec3(0.0);
+#else
+ vec3 bent_normal = vec3(0.0);
#endif
- for (float i = 0.0; i < aoSamples && i < MAX_PHI_STEP; i++) {
- float phi = M_PI * ((noise.r + i) / aoSamples);
-
- /* Rotate with random direction to get jittered result. */
- vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */
-
- /* Search maximum horizon angles h1 and h2 */
- float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
- float ofs = 1.5 * pixel_len;
- for (float j = 0.0; ofs < max_dist && j < MAX_THETA_STEP; j += 0.5) {
- ofs += ofs; /* Step size is doubled each iteration */
-
- vec2 s_ = t_phi * ofs * noise.g * pixel_ratio; /* s^ Screen coordinate */
- vec2 co;
-
- co = x_ + s_;
- h1 = get_max_horizon(co, x, h1, j);
-
- co = x_ - s_;
- h2 = get_max_horizon(co, x, h2, j);
- }
-
- /* (Slide 54) */
- h1 = -acos(h1);
- h2 = acos(h2);
-
- /* Projecting Normal to Plane P defined by t_phi and omega_o */
- vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */
- vec3 t = vec3(-t_phi, 0.0);
- vec3 n_proj = normal - h * dot(h, normal);
- float n_proj_len = max(1e-16, length(n_proj));
-
- /* Clamping thetas (slide 58) */
- float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
- float n = sign(dot(n_proj, t)) * acos(cos_n); /* Angle between view vec and normal */
- h1 = n + max(h1 - n, -M_PI_2);
- h2 = n + min(h2 - n, M_PI_2);
-
- /* Solving inner integral */
- float sin_n = sin(n);
- float h1_2 = 2.0 * h1;
- float h2_2 = 2.0 * h2;
- float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n);
- vd *= 0.25 * n_proj_len;
- visibility += vd;
-
-#ifdef USE_BENT_NORMAL
- /* Finding Bent normal */
- float b_angle = (h1 + h2) / 2.0;
- /* The 0.5 factor below is here to equilibrate the accumulated vectors.
- * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5).
- * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */
- /* Weight sample by vd */
- bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd;
-#endif
+ for (float i = 0.0; i < MAX_PHI_STEP; i++) {
+ if (i >= aoSamples) break;
+ integrate_slice(i, x, normal, x_, noise, max_dist, pixel_ratio, pixel_len, visibility, bent_normal);
}
visibility = clamp(visibility / aoSamples, 1e-8, 1.0);
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
index 6ce4b2f7501..d4f1781ae6c 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
@@ -146,6 +146,22 @@ float distance_squared(vec3 a, vec3 b) { a -= b; return dot(a, a); }
float inverse_distance(vec3 V) { return max( 1 / length(V), 1e-8); }
+/* ------- Fast Math ------- */
+
+/* [Drobot2014a] Low Level Optimizations for GCN */
+float fast_sqrt(float x)
+{
+ return intBitsToFloat(0x1fbd1df5 + (floatBitsToInt(x) >> 1));
+}
+
+/* [Eberly2014] GPGPU Programming for Games and Science */
+float fast_acos(float x)
+{
+ float res = -0.156583 * abs(x) + M_PI_2;
+ res *= fast_sqrt(1.0 - abs(x));
+ return (x >= 0) ? res : M_PI - res;
+}
+
float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal)
{
return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection);