diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2018-01-16 15:13:30 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2018-01-16 20:22:24 +0300 |
commit | 2221cdb5179467e5b654a41b4c24796bcd66cb93 (patch) | |
tree | 07f0d1e57607a7eefd775b60add19023305e4d56 /source/blender/draw/engines | |
parent | 3cb2b2956b140b840ba8a481ad15df1b567d9c07 (diff) |
Eevee: SSR: Optimise Texture fetches and solve noise issue.
There was some remaining issue caused by neighbor re-use. Randomizing them every _prime_number_ of iterations fixes this.
Diffstat (limited to 'source/blender/draw/engines')
4 files changed, 172 insertions, 75 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h index 6f276a891ba..5fb25229902 100644 --- a/source/blender/draw/engines/eevee/eevee_private.h +++ b/source/blender/draw/engines/eevee/eevee_private.h @@ -497,6 +497,7 @@ typedef struct EEVEE_EffectsInfo { bool use_ssr; bool reflection_trace_full; bool ssr_use_normalization; + int ssr_neighbor_ofs; float ssr_firefly_fac; float ssr_border_fac; float ssr_max_roughness; diff --git a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c index 27e72b9e8d6..568b34db088 100644 --- a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c +++ b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c @@ -249,6 +249,7 @@ void EEVEE_screen_raytrace_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *v DRW_shgroup_uniform_buffer(grp, "planarDepth", &vedata->txl->planar_depth); DRW_shgroup_uniform_buffer(grp, "hitBuffer", &vedata->txl->ssr_hit_output); DRW_shgroup_uniform_buffer(grp, "pdfBuffer", &stl->g_data->ssr_pdf_output); + DRW_shgroup_uniform_int(grp, "neighborOffset", &effects->ssr_neighbor_ofs, 1); DRW_shgroup_uniform_vec4(grp, "aoParameters[0]", &effects->ao_dist, 2); if (effects->use_ao) { @@ -305,6 +306,10 @@ void EEVEE_reflection_compute(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *v EEVEE_downsample_buffer(vedata, fbl->downsample_fb, txl->color_double_buffer, 9); /* Resolve at fullres */ + int sample = (DRW_state_is_image_render()) ? effects->taa_render_sample : effects->taa_current_sample; + /* Doing a neighbor shift only after a few iteration. We wait for a prime number of cycles to avoid + * noise correlation. This reduces variance faster. */ + effects->ssr_neighbor_ofs = ((sample / 5) % 8) * 4; DRW_framebuffer_texture_detach(dtxl->depth); DRW_framebuffer_texture_detach(txl->ssr_normal_input); DRW_framebuffer_texture_detach(txl->ssr_specrough_input); diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl index a9350dbc632..ff3e1717ca8 100644 --- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl @@ -125,6 +125,10 @@ float min_v3(vec3 v) { return min(v.x, min(v.y, v.z)); } float max_v2(vec2 v) { return max(v.x, v.y); } float max_v3(vec3 v) { return max(v.x, max(v.y, v.z)); } +float sum(vec2 v) { return dot(vec2(1.0), v); } +float sum(vec3 v) { return dot(vec3(1.0), v); } +float sum(vec4 v) { return dot(vec4(1.0), v); } + float saturate(float a) { return clamp(a, 0.0, 1.0); } vec2 saturate(vec2 a) { return clamp(a, 0.0, 1.0); } vec3 saturate(vec3 a) { return clamp(a, 0.0, 1.0); } diff --git a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl index ac73f9ea26b..6c770fa029f 100644 --- a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl @@ -129,7 +129,7 @@ void main() float a2 = roughnessSquared * roughnessSquared; if (roughness > maxRoughness + 0.2) { - hitData = ivec2(0); + hitData = encode_hit_data(vec2(0.5), false, false); pdfData = 0.0; return; } @@ -138,7 +138,7 @@ void main() /* Gives *perfect* reflection for very small roughness */ if (roughness < 0.04) { - rand *= vec4(0.0, 1.0, 0.0, 0.0); + rand.xzw *= 0.0; } vec3 worldPosition = transform_point(ViewMatrixInverse, viewPosition); @@ -180,6 +180,20 @@ uniform sampler2D pdfBuffer; uniform int probe_count; uniform int planar_count; +uniform int neighborOffset; + +const ivec2 neighbors[32] = ivec2[32]( + ivec2( 0, 0), ivec2( 1, 1), ivec2(-2, 0), ivec2( 0, -2), + ivec2( 0, 0), ivec2( 1, -1), ivec2(-2, 0), ivec2( 0, 2), + ivec2( 0, 0), ivec2(-1, -1), ivec2( 2, 0), ivec2( 0, 2), + ivec2( 0, 0), ivec2(-1, 1), ivec2( 2, 0), ivec2( 0, -2), + + ivec2( 0, 0), ivec2( 2, 2), ivec2(-2, 2), ivec2( 0, -1), + ivec2( 0, 0), ivec2( 2, -2), ivec2(-2, -2), ivec2( 0, 1), + ivec2( 0, 0), ivec2(-2, -2), ivec2(-2, 2), ivec2( 1, 0), + ivec2( 0, 0), ivec2( 2, 2), ivec2( 2, -2), ivec2(-1, 0) +); + uniform mat4 PastViewProjectionMatrix; out vec4 fragColor; @@ -250,96 +264,175 @@ vec2 get_reprojected_reflection(vec3 hit, vec3 pos, vec3 N) return project_point(PastViewProjectionMatrix, hit).xy * 0.5 + 0.5; } -vec4 get_ssr_sample( - PlanarData pd, float planar_index, vec3 worldPosition, vec3 N, vec3 V, - float roughnessSquared, float cone_tan, vec2 source_uvs, vec2 texture_size, ivec2 target_texel, - inout float weight_acc) +float get_sample_depth(vec2 hit_co, bool is_planar, float planar_index) { - float hit_pdf = texelFetch(pdfBuffer, target_texel, 0).r; - ivec2 hit_data = texelFetch(hitBuffer, target_texel, 0).rg; - - bool is_planar, has_hit; - vec2 hit_co = decode_hit_data(hit_data, has_hit, is_planar); - - /* Get precise depth of the hit. */ - float hit_depth; if (is_planar) { - hit_depth = textureLod(planarDepth, vec3(hit_co, planar_index), 0.0).r; + return textureLod(planarDepth, vec3(hit_co, planar_index), 0.0).r; } else { - hit_depth = textureLod(depthBuffer, hit_co, 0.0).r; + return textureLod(depthBuffer, hit_co, 0.0).r; } +} - /* Hit position in view space. */ - vec3 hit_view = get_view_space_from_depth(hit_co, hit_depth); - float homcoord = ProjectionMatrix[2][3] * hit_view.z + ProjectionMatrix[3][3]; - - /* Hit position in world space. */ - vec3 hit_pos = transform_point(ViewMatrixInverse, hit_view.xyz); - - vec2 ref_uvs; +vec3 get_hit_vector( + vec3 hit_pos, PlanarData pd, vec3 worldPosition, vec3 N, vec3 V, bool is_planar, + inout vec2 hit_co, inout float mask) +{ vec3 hit_vec; - float mask = 1.0; + if (is_planar) { /* Reflect back the hit position to have it in non-reflected world space */ vec3 trace_pos = line_plane_intersect(worldPosition, V, pd.pl_plane_eq); hit_vec = hit_pos - trace_pos; hit_vec = reflect(hit_vec, pd.pl_normal); - ref_uvs = hit_co; } else { /* Find hit position in previous frame. */ - ref_uvs = get_reprojected_reflection(hit_pos, worldPosition, N); + mask = screen_border_mask(gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0))); + hit_co = get_reprojected_reflection(hit_pos, worldPosition, N); hit_vec = hit_pos - worldPosition; - mask = screen_border_mask(gl_FragCoord.xy / texture_size); } - mask = min(mask, screen_border_mask(ref_uvs)); - float hit_dist = max(1e-8, length(hit_vec)); - vec3 L = hit_vec / hit_dist; + mask = min(mask, screen_border_mask(hit_co)); + return hit_vec; +} + +vec3 get_scene_color(vec2 ref_uvs, float mip, float planar_index, bool is_planar) +{ + if (is_planar) { + return textureLod(probePlanars, vec3(ref_uvs, planar_index), min(mip, lodPlanarMax)).rgb; + } + else { + return textureLod(prevColorBuffer, ref_uvs, mip).rgb; + } +} + +vec4 get_ssr_samples( + vec4 hit_pdf, ivec4 hit_data[2], + PlanarData pd, float planar_index, vec3 worldPosition, vec3 N, vec3 V, + float roughnessSquared, float cone_tan, vec2 source_uvs, + inout float weight_acc) +{ + bvec4 is_planar, has_hit; + vec4 hit_co[2]; + hit_co[0].xy = decode_hit_data(hit_data[0].xy, has_hit.x, is_planar.x); + hit_co[0].zw = decode_hit_data(hit_data[0].zw, has_hit.y, is_planar.y); + hit_co[1].xy = decode_hit_data(hit_data[1].xy, has_hit.z, is_planar.z); + hit_co[1].zw = decode_hit_data(hit_data[1].zw, has_hit.w, is_planar.w); + + vec4 hit_depth; + hit_depth.x = get_sample_depth(hit_co[0].xy, is_planar.x, planar_index); + hit_depth.y = get_sample_depth(hit_co[0].zw, is_planar.y, planar_index); + hit_depth.z = get_sample_depth(hit_co[1].xy, is_planar.z, planar_index); + hit_depth.w = get_sample_depth(hit_co[1].zw, is_planar.w, planar_index); + + /* Hit position in view space. */ + vec3 hit_view[4]; + hit_view[0] = get_view_space_from_depth(hit_co[0].xy, hit_depth.x); + hit_view[1] = get_view_space_from_depth(hit_co[0].zw, hit_depth.y); + hit_view[2] = get_view_space_from_depth(hit_co[1].xy, hit_depth.z); + hit_view[3] = get_view_space_from_depth(hit_co[1].zw, hit_depth.w); + + vec4 homcoord = vec4(hit_view[0].z, hit_view[1].z, hit_view[2].z, hit_view[3].z); + homcoord = ProjectionMatrix[2][3] * homcoord + ProjectionMatrix[3][3]; - float cone_footprint = hit_dist * cone_tan; + /* Hit position in world space. */ + vec3 hit_pos[4]; + hit_pos[0] = transform_point(ViewMatrixInverse, hit_view[0]); + hit_pos[1] = transform_point(ViewMatrixInverse, hit_view[1]); + hit_pos[2] = transform_point(ViewMatrixInverse, hit_view[2]); + hit_pos[3] = transform_point(ViewMatrixInverse, hit_view[3]); + + /* Get actual hit vector and hit coordinate (from last frame). */ + vec4 mask = vec4(1.0); + hit_pos[0] = get_hit_vector(hit_pos[0], pd, worldPosition, N, V, is_planar.x, hit_co[0].xy, mask.x); + hit_pos[1] = get_hit_vector(hit_pos[1], pd, worldPosition, N, V, is_planar.y, hit_co[0].zw, mask.y); + hit_pos[2] = get_hit_vector(hit_pos[2], pd, worldPosition, N, V, is_planar.z, hit_co[1].xy, mask.z); + hit_pos[3] = get_hit_vector(hit_pos[3], pd, worldPosition, N, V, is_planar.w, hit_co[1].zw, mask.w); + + vec4 hit_dist; + hit_dist.x = length(hit_pos[0]); + hit_dist.y = length(hit_pos[1]); + hit_dist.z = length(hit_pos[2]); + hit_dist.w = length(hit_pos[3]); + hit_dist = max(vec4(1e-8), hit_dist); + + /* Normalize */ + hit_pos[0] /= hit_dist.x; + hit_pos[1] /= hit_dist.y; + hit_pos[2] /= hit_dist.z; + hit_pos[3] /= hit_dist.w; /* Compute cone footprint in screen space. */ + vec4 cone_footprint = hit_dist * cone_tan; cone_footprint = BRDF_BIAS * 0.5 * cone_footprint * max(ProjectionMatrix[0][0], ProjectionMatrix[1][1]) / homcoord; /* Estimate a cone footprint to sample a corresponding mipmap level. */ - float mip = clamp(log2(cone_footprint * max(texture_size.x, texture_size.y)), 0.0, MAX_MIP); + vec4 mip = log2(cone_footprint * max_v2(vec2(textureSize(depthBuffer, 0)))); + mip = clamp(mip, 0.0, MAX_MIP); /* Correct UVs for mipmaping mis-alignment */ - ref_uvs *= mip_ratio_interp(mip); + hit_co[0].xy *= mip_ratio_interp(mip.x); + hit_co[0].zw *= mip_ratio_interp(mip.y); + hit_co[1].xy *= mip_ratio_interp(mip.z); + hit_co[1].zw *= mip_ratio_interp(mip.w); /* Slide 54 */ - float bsdf = bsdf_ggx(N, L, V, roughnessSquared); - float weight = step(1e-8, hit_pdf) * bsdf / max(1e-8, hit_pdf); - weight_acc += weight; + vec4 bsdf; + bsdf.x = bsdf_ggx(N, hit_pos[0], V, roughnessSquared); + bsdf.y = bsdf_ggx(N, hit_pos[1], V, roughnessSquared); + bsdf.z = bsdf_ggx(N, hit_pos[2], V, roughnessSquared); + bsdf.w = bsdf_ggx(N, hit_pos[3], V, roughnessSquared); - vec3 sample; - if (is_planar) { - sample = textureLod(probePlanars, vec3(ref_uvs, planar_index), min(mip, lodPlanarMax)).rgb; - } - else { - sample = textureLod(prevColorBuffer, ref_uvs, mip).rgb; - } + vec4 weight = step(1e-8, hit_pdf) * bsdf / max(vec4(1e-8), hit_pdf); + + vec3 sample[4]; + sample[0] = get_scene_color(hit_co[0].xy, mip.x, planar_index, is_planar.x); + sample[1] = get_scene_color(hit_co[0].zw, mip.y, planar_index, is_planar.y); + sample[2] = get_scene_color(hit_co[1].xy, mip.z, planar_index, is_planar.z); + sample[3] = get_scene_color(hit_co[1].zw, mip.w, planar_index, is_planar.w); /* Clamped brightness. */ - float luma = max(1e-8, brightness(sample)); - sample *= 1.0 - max(0.0, luma - fireflyFactor) / luma; + vec4 luma; + luma.x = brightness(sample[0]); + luma.y = brightness(sample[1]); + luma.z = brightness(sample[2]); + luma.w = brightness(sample[3]); + luma = max(vec4(1e-8), luma); + luma = 1.0 - max(vec4(0.0), luma - fireflyFactor) / luma; + + sample[0] *= luma.x; + sample[1] *= luma.y; + sample[2] *= luma.z; + sample[3] *= luma.w; /* Protection against NaNs in the history buffer. * This could be removed if some previous pass has already * sanitized the input. */ - if (any(isnan(sample))) { - sample = vec3(0.0); - weight = 0.0; + if (any(isnan(sample[0]))) { + sample[0] = vec3(0.0); weight.x = 0.0; + } + if (any(isnan(sample[1]))) { + sample[1] = vec3(0.0); weight.y = 0.0; } + if (any(isnan(sample[2]))) { + sample[2] = vec3(0.0); weight.z = 0.0; + } + if (any(isnan(sample[3]))) { + sample[3] = vec3(0.0); weight.w = 0.0; + } + + weight_acc += sum(weight); /* Do not add light if ray has failed. */ - return vec4(sample, mask) * weight * float(has_hit); + vec4 accum; + accum = vec4(sample[0], mask.x) * weight.x * float(has_hit.x); + accum += vec4(sample[1], mask.y) * weight.y * float(has_hit.y); + accum += vec4(sample[2], mask.z) * weight.z * float(has_hit.z); + accum += vec4(sample[3], mask.w) * weight.w * float(has_hit.w); + return accum; } -#define NUM_NEIGHBORS 4 - void main() { ivec2 fullres_texel = ivec2(gl_FragCoord.xy); @@ -348,8 +441,7 @@ void main() #else ivec2 halfres_texel = ivec2(gl_FragCoord.xy / 2.0); #endif - vec2 texture_size = vec2(textureSize(depthBuffer, 0)); - vec2 uvs = gl_FragCoord.xy / texture_size; + vec2 uvs = gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0)); float depth = textureLod(depthBuffer, uvs, 0.0).r; @@ -369,6 +461,20 @@ void main() if (dot(speccol_roughness.rgb, vec3(1.0)) == 0.0) discard; + /* TODO optimize with textureGather */ + /* Doing these fetches early to hide latency. */ + vec4 hit_pdf; + hit_pdf.x = texelFetch(pdfBuffer, halfres_texel + neighbors[0 + neighborOffset], 0).r; + hit_pdf.y = texelFetch(pdfBuffer, halfres_texel + neighbors[1 + neighborOffset], 0).r; + hit_pdf.z = texelFetch(pdfBuffer, halfres_texel + neighbors[2 + neighborOffset], 0).r; + hit_pdf.w = texelFetch(pdfBuffer, halfres_texel + neighbors[3 + neighborOffset], 0).r; + + ivec4 hit_data[2]; + hit_data[0].xy = texelFetch(hitBuffer, halfres_texel + neighbors[0 + neighborOffset], 0).rg; + hit_data[0].zw = texelFetch(hitBuffer, halfres_texel + neighbors[1 + neighborOffset], 0).rg; + hit_data[1].xy = texelFetch(hitBuffer, halfres_texel + neighbors[2 + neighborOffset], 0).rg; + hit_data[1].zw = texelFetch(hitBuffer, halfres_texel + neighbors[3 + neighborOffset], 0).rg; + /* Find Planar Reflections affecting this pixel */ PlanarData pd; float planar_index; @@ -397,29 +503,10 @@ void main() vec4 ssr_accum = vec4(0.0); float weight_acc = 0.0; - const ivec2 neighbors[9] = ivec2[9]( - ivec2(0, 0), - - ivec2(0, 1), - ivec2(-1, -1), ivec2(1, -1), - - ivec2(-1, 1), ivec2(1, 1), - ivec2(0, -1), - - ivec2(-1, 0), ivec2(1, 0) - ); - ivec2 invert_neighbor; - invert_neighbor.x = ((fullres_texel.x & 0x1) == 0) ? 1 : -1; - invert_neighbor.y = ((fullres_texel.y & 0x1) == 0) ? 1 : -1; if (roughness < maxRoughness + 0.2) { - for (int i = 0; i < NUM_NEIGHBORS; i++) { - ivec2 target_texel = halfres_texel + neighbors[i] * invert_neighbor; - - ssr_accum += get_ssr_sample(pd, planar_index, worldPosition, N, V, - roughnessSquared, cone_tan, source_uvs, - texture_size, target_texel, weight_acc); - } + ssr_accum += get_ssr_samples(hit_pdf, hit_data, pd, planar_index, worldPosition, N, V, + roughnessSquared, cone_tan, source_uvs, weight_acc); } /* Compute SSR contribution */ |