From 3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 23 Jun 2017 02:52:15 +0200 Subject: Eevee: Attempt to optimize GTAO shader. Unroll horizon search loop. Use fast version of acos. On nvidia linux, unrolling the 2nd loop is giving very high compilation time. --- .../eevee/shaders/ambient_occlusion_lib.glsl | 197 ++++++++++++++------- .../engines/eevee/shaders/bsdf_common_lib.glsl | 16 ++ 2 files changed, 145 insertions(+), 68 deletions(-) (limited to 'source') diff --git a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl index 65ff09c4eae..b044cacf1b1 100644 --- a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl @@ -5,28 +5,21 @@ #define MAX_PHI_STEP 32 /* NOTICE : this is multiplied by 2 */ -#define MAX_THETA_STEP 6.0 +#define MAX_THETA_STEP 12 uniform sampler2D minMaxDepthTex; uniform float aoDistance; uniform float aoSamples; uniform float aoFactor; -float sample_depth(vec2 co, int level) +float get_max_horizon(vec2 co, vec3 x, float h, float lod) { - return textureLod(minMaxDepthTex, co, float(level)).g; -} - -float get_max_horizon(vec2 co, vec3 x, float h, float step) -{ - if (co.x > 1.0 || co.x < 0.0 || co.y > 1.0 || co.y < 0.0) - return h; - - float depth = sample_depth(co, int(step)); + float depth = textureLod(minMaxDepthTex, co, floor(lod)).g; /* Background case */ - if (depth == 1.0) - return h; + /* this is really slow and is only a problem + * if the far clip plane is near enough to notice */ + // depth += step(1.0, depth) * 1e20; vec3 s = get_view_space_from_depth(co, depth); /* s View coordinate */ vec3 omega_s = s - x; @@ -39,6 +32,124 @@ float get_max_horizon(vec2 co, vec3 x, float h, float step) return mix(h, max_h, blend); } +void search_step( + vec2 t_phi, vec3 x, vec2 x_, float rand, vec2 pixel_ratio, + inout float j, inout float ofs, inout float h1, inout float h2) +{ + ofs += ofs; /* Step size is doubled each iteration */ + + vec2 s_ = t_phi * ofs * rand * pixel_ratio; /* s^ Screen coordinate */ + vec2 co; + + co = x_ + s_; + h1 = get_max_horizon(co, x, h1, j); + + co = x_ - s_; + h2 = get_max_horizon(co, x, h2, j); + + j += 0.5; +} + +void search_horizon( + vec2 t_phi, vec3 x, vec2 x_, float rand, + float max_dist, vec2 pixel_ratio, float pixel_len, + inout float h1, inout float h2) +{ + float ofs = 1.5 * pixel_len; + float j = 0.0; + +#if 0 /* manually unrolled bellow */ + for (int i = 0; i < MAX_THETA_STEP; i++) { + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) + return; + } +#endif + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); + if (ofs > max_dist) return; + + search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2); +} + +void integrate_slice( + float iter, vec3 x, vec3 normal, vec2 x_, vec2 noise, + float max_dist, vec2 pixel_ratio, float pixel_len, + inout float visibility, inout vec3 bent_normal) +{ + float phi = M_PI * ((noise.r + iter) / aoSamples); + + /* Rotate with random direction to get jittered result. */ + vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */ + + /* Search maximum horizon angles h1 and h2 */ + float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */ + search_horizon(t_phi, x, x_, noise.g, max_dist, pixel_ratio, pixel_len, h1, h2); + + /* (Slide 54) */ + h1 = -fast_acos(h1); + h2 = fast_acos(h2); + + /* Projecting Normal to Plane P defined by t_phi and omega_o */ + vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */ + vec3 t = vec3(-t_phi, 0.0); + vec3 n_proj = normal - h * dot(h, normal); + float n_proj_len = max(1e-16, length(n_proj)); + + /* Clamping thetas (slide 58) */ + float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0); + float n = sign(dot(n_proj, t)) * fast_acos(cos_n); /* Angle between view vec and normal */ + h1 = n + max(h1 - n, -M_PI_2); + h2 = n + min(h2 - n, M_PI_2); + + /* Solving inner integral */ + float sin_n = sin(n); + float h1_2 = 2.0 * h1; + float h2_2 = 2.0 * h2; + float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n); + vd *= 0.25 * n_proj_len; + visibility += vd; + +#ifdef USE_BENT_NORMAL + /* Finding Bent normal */ + float b_angle = (h1 + h2) / 2.0; + /* The 0.5 factor below is here to equilibrate the accumulated vectors. + * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5). + * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */ + /* Weight sample by vd */ + bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd; +#endif +} + void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility #ifdef USE_BENT_NORMAL , out vec3 bent_normal @@ -66,62 +177,12 @@ void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility visibility = 0.0; #ifdef USE_BENT_NORMAL bent_normal = vec3(0.0); +#else + vec3 bent_normal = vec3(0.0); #endif - for (float i = 0.0; i < aoSamples && i < MAX_PHI_STEP; i++) { - float phi = M_PI * ((noise.r + i) / aoSamples); - - /* Rotate with random direction to get jittered result. */ - vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */ - - /* Search maximum horizon angles h1 and h2 */ - float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */ - float ofs = 1.5 * pixel_len; - for (float j = 0.0; ofs < max_dist && j < MAX_THETA_STEP; j += 0.5) { - ofs += ofs; /* Step size is doubled each iteration */ - - vec2 s_ = t_phi * ofs * noise.g * pixel_ratio; /* s^ Screen coordinate */ - vec2 co; - - co = x_ + s_; - h1 = get_max_horizon(co, x, h1, j); - - co = x_ - s_; - h2 = get_max_horizon(co, x, h2, j); - } - - /* (Slide 54) */ - h1 = -acos(h1); - h2 = acos(h2); - - /* Projecting Normal to Plane P defined by t_phi and omega_o */ - vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */ - vec3 t = vec3(-t_phi, 0.0); - vec3 n_proj = normal - h * dot(h, normal); - float n_proj_len = max(1e-16, length(n_proj)); - - /* Clamping thetas (slide 58) */ - float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0); - float n = sign(dot(n_proj, t)) * acos(cos_n); /* Angle between view vec and normal */ - h1 = n + max(h1 - n, -M_PI_2); - h2 = n + min(h2 - n, M_PI_2); - - /* Solving inner integral */ - float sin_n = sin(n); - float h1_2 = 2.0 * h1; - float h2_2 = 2.0 * h2; - float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n); - vd *= 0.25 * n_proj_len; - visibility += vd; - -#ifdef USE_BENT_NORMAL - /* Finding Bent normal */ - float b_angle = (h1 + h2) / 2.0; - /* The 0.5 factor below is here to equilibrate the accumulated vectors. - * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5). - * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */ - /* Weight sample by vd */ - bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd; -#endif + for (float i = 0.0; i < MAX_PHI_STEP; i++) { + if (i >= aoSamples) break; + integrate_slice(i, x, normal, x_, noise, max_dist, pixel_ratio, pixel_len, visibility, bent_normal); } visibility = clamp(visibility / aoSamples, 1e-8, 1.0); diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl index 6ce4b2f7501..d4f1781ae6c 100644 --- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl @@ -146,6 +146,22 @@ float distance_squared(vec3 a, vec3 b) { a -= b; return dot(a, a); } float inverse_distance(vec3 V) { return max( 1 / length(V), 1e-8); } +/* ------- Fast Math ------- */ + +/* [Drobot2014a] Low Level Optimizations for GCN */ +float fast_sqrt(float x) +{ + return intBitsToFloat(0x1fbd1df5 + (floatBitsToInt(x) >> 1)); +} + +/* [Eberly2014] GPGPU Programming for Games and Science */ +float fast_acos(float x) +{ + float res = -0.156583 * abs(x) + M_PI_2; + res *= fast_sqrt(1.0 - abs(x)); + return (x >= 0) ? res : M_PI - res; +} + float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal) { return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection); -- cgit v1.2.3