From 12570c737356a06e21052cdc767b26ed7584a948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Mon, 23 Apr 2018 21:08:11 +0200 Subject: GPUShader: Add GPU_SHADER_2D_IMAGE_MULTISAMPLE_2/4/8/16 This shader is used instead of blitting back and forth to a single sample buffer. This means it resolves the color and depth samples and outputs a fragment which can be depth tested and blended on top of an existing framebuffer. We do static shader variation with manual loop unrolling for performance reason. In my test I get 25% more perf with intel integrated gpu and 75% performance gain with dedicated nvidia card compared to a single shader with a uniform for sample count. --- source/blender/gpu/CMakeLists.txt | 1 + source/blender/gpu/GPU_shader.h | 4 ++ source/blender/gpu/intern/gpu_shader.c | 17 ++++++ .../gpu_shader_image_multisample_resolve_frag.glsl | 67 ++++++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 575dff1e811..f733e37ff51 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -158,6 +158,7 @@ data_to_c_simple(shaders/gpu_shader_image_varying_color_frag.glsl SRC) data_to_c_simple(shaders/gpu_shader_image_depth_linear_frag.glsl SRC) data_to_c_simple(shaders/gpu_shader_image_depth_copy_frag.glsl SRC) data_to_c_simple(shaders/gpu_shader_image_interlace_frag.glsl SRC) +data_to_c_simple(shaders/gpu_shader_image_multisample_resolve_frag.glsl SRC) data_to_c_simple(shaders/gpu_shader_3D_image_vert.glsl SRC) data_to_c_simple(shaders/gpu_shader_3D_vert.glsl SRC) data_to_c_simple(shaders/gpu_shader_3D_normal_vert.glsl SRC) diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h index 59bf92aa672..6d75447c8a3 100644 --- a/source/blender/gpu/GPU_shader.h +++ b/source/blender/gpu/GPU_shader.h @@ -119,6 +119,10 @@ typedef enum GPUBuiltinShader { GPU_SHADER_2D_IMAGE_ALPHA, GPU_SHADER_2D_IMAGE_RECT_COLOR, GPU_SHADER_2D_IMAGE_MULTI_RECT_COLOR, + GPU_SHADER_2D_IMAGE_MULTISAMPLE_2, + GPU_SHADER_2D_IMAGE_MULTISAMPLE_4, + GPU_SHADER_2D_IMAGE_MULTISAMPLE_8, + GPU_SHADER_2D_IMAGE_MULTISAMPLE_16, GPU_SHADER_2D_CHECKER, GPU_SHADER_2D_DIAG_STRIPES, /* for simple 3D drawing */ diff --git a/source/blender/gpu/intern/gpu_shader.c b/source/blender/gpu/intern/gpu_shader.c index 34dee37001e..26267b95f59 100644 --- a/source/blender/gpu/intern/gpu_shader.c +++ b/source/blender/gpu/intern/gpu_shader.c @@ -90,6 +90,7 @@ extern char datatoc_gpu_shader_image_mask_uniform_color_frag_glsl[]; extern char datatoc_gpu_shader_image_modulate_alpha_frag_glsl[]; extern char datatoc_gpu_shader_image_depth_linear_frag_glsl[]; extern char datatoc_gpu_shader_image_depth_copy_frag_glsl[]; +extern char datatoc_gpu_shader_image_multisample_resolve_frag_glsl[]; extern char datatoc_gpu_shader_3D_vert_glsl[]; extern char datatoc_gpu_shader_3D_normal_vert_glsl[]; extern char datatoc_gpu_shader_3D_flat_color_vert_glsl[]; @@ -690,6 +691,10 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader) datatoc_gpu_shader_image_depth_linear_frag_glsl }, [GPU_SHADER_3D_IMAGE_DEPTH_COPY] = { datatoc_gpu_shader_3D_image_vert_glsl, datatoc_gpu_shader_image_depth_copy_frag_glsl }, + [GPU_SHADER_2D_IMAGE_MULTISAMPLE_2] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl }, + [GPU_SHADER_2D_IMAGE_MULTISAMPLE_4] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl }, + [GPU_SHADER_2D_IMAGE_MULTISAMPLE_8] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl }, + [GPU_SHADER_2D_IMAGE_MULTISAMPLE_16] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl }, [GPU_SHADER_2D_IMAGE_INTERLACE] = { datatoc_gpu_shader_2D_image_vert_glsl, datatoc_gpu_shader_image_interlace_frag_glsl }, @@ -830,6 +835,18 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader) /* just a few special cases */ const char *defines = NULL; switch (shader) { + case GPU_SHADER_2D_IMAGE_MULTISAMPLE_2: + defines = "#define SAMPLES 2\n"; + break; + case GPU_SHADER_2D_IMAGE_MULTISAMPLE_4: + defines = "#define SAMPLES 4\n"; + break; + case GPU_SHADER_2D_IMAGE_MULTISAMPLE_8: + defines = "#define SAMPLES 8\n"; + break; + case GPU_SHADER_2D_IMAGE_MULTISAMPLE_16: + defines = "#define SAMPLES 16\n"; + break; case GPU_SHADER_2D_WIDGET_BASE_INST: case GPU_SHADER_2D_NODELINK_INST: defines = "#define USE_INSTANCE\n"; diff --git a/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl new file mode 100644 index 00000000000..de1fd8b6b58 --- /dev/null +++ b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl @@ -0,0 +1,67 @@ + +uniform sampler2DMS depthMulti; +uniform sampler2DMS colorMulti; + +out vec4 fragColor; + +#if SAMPLES > 16 +#error "Too many samples" +#endif + +void main() +{ + ivec2 texel = ivec2(gl_FragCoord.xy); + + float depth = 1.0; + depth = min(depth, texelFetch(depthMulti, texel, 0).r); + depth = min(depth, texelFetch(depthMulti, texel, 1).r); +#if SAMPLES > 2 + depth = min(depth, texelFetch(depthMulti, texel, 2).r); + depth = min(depth, texelFetch(depthMulti, texel, 3).r); +#endif +#if SAMPLES > 4 + depth = min(depth, texelFetch(depthMulti, texel, 4).r); + depth = min(depth, texelFetch(depthMulti, texel, 5).r); + depth = min(depth, texelFetch(depthMulti, texel, 6).r); + depth = min(depth, texelFetch(depthMulti, texel, 7).r); +#endif +#if SAMPLES > 8 + depth = min(depth, texelFetch(depthMulti, texel, 8).r); + depth = min(depth, texelFetch(depthMulti, texel, 9).r); + depth = min(depth, texelFetch(depthMulti, texel, 10).r); + depth = min(depth, texelFetch(depthMulti, texel, 11).r); + depth = min(depth, texelFetch(depthMulti, texel, 12).r); + depth = min(depth, texelFetch(depthMulti, texel, 13).r); + depth = min(depth, texelFetch(depthMulti, texel, 14).r); + depth = min(depth, texelFetch(depthMulti, texel, 15).r); +#endif + + vec4 color = vec4(0.0); + color += texelFetch(colorMulti, texel, 0); + color += texelFetch(colorMulti, texel, 1); +#if SAMPLES > 2 + color += texelFetch(colorMulti, texel, 2); + color += texelFetch(colorMulti, texel, 3); +#endif +#if SAMPLES > 4 + color += texelFetch(colorMulti, texel, 4); + color += texelFetch(colorMulti, texel, 5); + color += texelFetch(colorMulti, texel, 6); + color += texelFetch(colorMulti, texel, 7); +#endif +#if SAMPLES > 8 + color += texelFetch(colorMulti, texel, 8); + color += texelFetch(colorMulti, texel, 9); + color += texelFetch(colorMulti, texel, 10); + color += texelFetch(colorMulti, texel, 11); + color += texelFetch(colorMulti, texel, 12); + color += texelFetch(colorMulti, texel, 13); + color += texelFetch(colorMulti, texel, 14); + color += texelFetch(colorMulti, texel, 15); +#endif + + const float inv_samples = 1.0 / float(SAMPLES); + + fragColor = color * inv_samples; + gl_FragDepth = depth; +} -- cgit v1.2.3