From 6d8e308eae4c980a093582c29084ae20ee814972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Mon, 23 Apr 2018 23:07:58 +0200 Subject: GPUShader: Optimize Multisample resolve shader. Group all fetches together without interleived alu to let compiler optimize. Also do the color samples only if needed. Went from 3.86ms to [1.11-2.22]ms [min-max] for the 16samples resolve pass on my nvidia card. --- .../gpu_shader_image_multisample_resolve_frag.glsl | 129 +++++++++++++++------ 1 file changed, 93 insertions(+), 36 deletions(-) diff --git a/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl index de1fd8b6b58..57362c88320 100644 --- a/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl +++ b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl @@ -8,60 +8,117 @@ out vec4 fragColor; #error "Too many samples" #endif +// #define USE_DEPTH_WEIGHTING + void main() { ivec2 texel = ivec2(gl_FragCoord.xy); - float depth = 1.0; - depth = min(depth, texelFetch(depthMulti, texel, 0).r); - depth = min(depth, texelFetch(depthMulti, texel, 1).r); + bvec4 b1, b2, b3, b4; + vec4 w1, w2, w3, w4; + vec4 d1, d2, d3, d4; + vec4 c1, c2, c3, c4, c5, c6, c7, c8; + vec4 c9, c10, c11, c12, c13, c14, c15, c16; + d1 = d2 = d3 = d4 = vec4(1.0); + w1 = w2 = w3 = w4 = vec4(0.0); + c1 = c2 = c3 = c4 = c5 = c6 = c7 = c8 = vec4(0.0); + c9 = c10 = c11 = c12 = c13 = c14 = c15 = c16 = vec4(0.0); + + /* Depth */ + + d1.x = texelFetch(depthMulti, texel, 0).r; + d1.y = texelFetch(depthMulti, texel, 1).r; +#if SAMPLES > 2 + d1.z = texelFetch(depthMulti, texel, 2).r; + d1.w = texelFetch(depthMulti, texel, 3).r; +#endif +#if SAMPLES > 4 + d2.x = texelFetch(depthMulti, texel, 4).r; + d2.y = texelFetch(depthMulti, texel, 5).r; + d2.z = texelFetch(depthMulti, texel, 6).r; + d2.w = texelFetch(depthMulti, texel, 7).r; +#endif +#if SAMPLES > 8 + d3.x = texelFetch(depthMulti, texel, 8).r; + d3.y = texelFetch(depthMulti, texel, 9).r; + d3.z = texelFetch(depthMulti, texel, 10).r; + d3.w = texelFetch(depthMulti, texel, 11).r; + d4.x = texelFetch(depthMulti, texel, 12).r; + d4.y = texelFetch(depthMulti, texel, 13).r; + d4.z = texelFetch(depthMulti, texel, 14).r; + d4.w = texelFetch(depthMulti, texel, 15).r; +#endif + + /* COLOR */ + b1 = notEqual(d1, vec4(1.0)); + if (any(b1)) { + c1 = texelFetch(colorMulti, texel, 0); + c2 = texelFetch(colorMulti, texel, 1); #if SAMPLES > 2 - depth = min(depth, texelFetch(depthMulti, texel, 2).r); - depth = min(depth, texelFetch(depthMulti, texel, 3).r); + c3 = texelFetch(colorMulti, texel, 2); + c4 = texelFetch(colorMulti, texel, 3); #endif + w1 = vec4(b1); + } #if SAMPLES > 4 - depth = min(depth, texelFetch(depthMulti, texel, 4).r); - depth = min(depth, texelFetch(depthMulti, texel, 5).r); - depth = min(depth, texelFetch(depthMulti, texel, 6).r); - depth = min(depth, texelFetch(depthMulti, texel, 7).r); + b2 = notEqual(d2, vec4(1.0)); + if (any(b2)) { + c5 = texelFetch(colorMulti, texel, 4); + c6 = texelFetch(colorMulti, texel, 5); + c7 = texelFetch(colorMulti, texel, 6); + c8 = texelFetch(colorMulti, texel, 7); + w2 = vec4(b2); + } #endif #if SAMPLES > 8 - depth = min(depth, texelFetch(depthMulti, texel, 8).r); - depth = min(depth, texelFetch(depthMulti, texel, 9).r); - depth = min(depth, texelFetch(depthMulti, texel, 10).r); - depth = min(depth, texelFetch(depthMulti, texel, 11).r); - depth = min(depth, texelFetch(depthMulti, texel, 12).r); - depth = min(depth, texelFetch(depthMulti, texel, 13).r); - depth = min(depth, texelFetch(depthMulti, texel, 14).r); - depth = min(depth, texelFetch(depthMulti, texel, 15).r); + b3 = notEqual(d3, vec4(1.0)); + if (any(b3)) { + c9 = texelFetch(colorMulti, texel, 8); + c10 = texelFetch(colorMulti, texel, 9); + c11 = texelFetch(colorMulti, texel, 10); + c12 = texelFetch(colorMulti, texel, 11); + w3 = vec4(b3); + } + b4 = notEqual(d4, vec4(1.0)); + if (any(b4)) { + c13 = texelFetch(colorMulti, texel, 12); + c14 = texelFetch(colorMulti, texel, 13); + c15 = texelFetch(colorMulti, texel, 14); + c16 = texelFetch(colorMulti, texel, 15); + w4 = vec4(b4); + } +#endif + +#if SAMPLES > 8 + d1 = min(d1, min(d3, d4)); +#endif +#if SAMPLES > 4 + d1 = min(d1, d2); +#endif +#if SAMPLES > 2 + d1.xy = min(d1.xy, d1.zw); +#endif + gl_FragDepth = min(d1.x, d1.y); + +#ifdef USE_DEPTH_WEIGHTING + c1 *= w1.x; c2 *= w1.y; c3 *= w1.z; c4 *= w1.w; + c5 *= w2.x; c6 *= w2.y; c7 *= w2.z; c8 *= w2.w; + c9 *= w3.x; c10 *= w3.y; c11 *= w3.z; c12 *= w3.w; + c13 *= w4.x; c14 *= w4.y; c15 *= w4.z; c16 *= w4.w; #endif - vec4 color = vec4(0.0); - color += texelFetch(colorMulti, texel, 0); - color += texelFetch(colorMulti, texel, 1); + c1 = c1 + c2; #if SAMPLES > 2 - color += texelFetch(colorMulti, texel, 2); - color += texelFetch(colorMulti, texel, 3); + c1 += c3 + c4; #endif #if SAMPLES > 4 - color += texelFetch(colorMulti, texel, 4); - color += texelFetch(colorMulti, texel, 5); - color += texelFetch(colorMulti, texel, 6); - color += texelFetch(colorMulti, texel, 7); + c1 += c5 + c6 + c7 + c8; #endif #if SAMPLES > 8 - color += texelFetch(colorMulti, texel, 8); - color += texelFetch(colorMulti, texel, 9); - color += texelFetch(colorMulti, texel, 10); - color += texelFetch(colorMulti, texel, 11); - color += texelFetch(colorMulti, texel, 12); - color += texelFetch(colorMulti, texel, 13); - color += texelFetch(colorMulti, texel, 14); - color += texelFetch(colorMulti, texel, 15); + c1 += c9 + c10 + c11 + c12 + c13 + c14 + c15 + c16; #endif const float inv_samples = 1.0 / float(SAMPLES); - fragColor = color * inv_samples; - gl_FragDepth = depth; + fragColor = c1 * inv_samples; } -- cgit v1.2.3