diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2019-01-25 00:01:03 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2019-01-25 17:04:18 +0300 |
commit | 71e30028240798d4ec34ee380191058b9313902b (patch) | |
tree | f97cb7b10138a3468d6a790b945ccc3e0375ce59 /source/blender/draw | |
parent | 3f6e14e667c4442bcc1f4d1fc795633cc706150f (diff) |
Workbench: Depth Of Field: Optimisation
- Compute samples positions on CPU.
- Use 3x3 Box blur instead of 2x2.
- Implement bokeh parameters.
With this commit, dof performance is almost negligeable.
The quality is a bit lower than before but can be improve. Also now big
Circle of confusion are supported (up to 200px).
Cost is ~1.25ms on AMD Vega with a 2560p viewport than full HD and
pretty shallow depth of field.
Coc downsampling and dilation is not used anymore for now (commented).
Diffstat (limited to 'source/blender/draw')
5 files changed, 196 insertions, 42 deletions
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl index 08eb22d01c3..fb1a3ae120e 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl @@ -20,23 +20,12 @@ uniform sampler2D blurTex; #define dof_distance dofParams.y #define dof_invsensorsize dofParams.z -#define NUM_SAMPLES 25 - -#define THRESHOLD 1.0 #define M_PI 3.1415926535897932 /* pi */ -const float GOLDEN_ANGLE = 2.39996323; -const float MAX_BLUR_SIZE = 20.0; -const float RAD_SCALE = 2.0; // Smaller = nicer blur, larger = faster -const float MAX_COC_SIZE = 40.0; - float max_v4(vec4 v) { return max(max(v.x, v.y), max(v.z, v.w)); } #define weighted_sum(a, b, c, d, e, e_sum) ((a) * e.x + (b) * e.y + (c) * e.z + (d) * e.w) / max(1e-6, e_sum); -#define encode_signed_coc(coc) (((coc) / MAX_COC_SIZE) * 0.5 + 0.5); -#define decode_signed_coc(coc) (((coc) * 2.0 - 1.0) * MAX_COC_SIZE); - /* divide by sensor size to get the normalized size */ #define calculate_coc(zdepth) (dof_aperturesize * (dof_distance / zdepth - 1.0) * dof_invsensorsize) @@ -44,8 +33,11 @@ float max_v4(vec4 v) { return max(max(v.x, v.y), max(v.z, v.w)); } ? (nearFar.x * nearFar.y) / (z * (nearFar.x - nearFar.y) + nearFar.y) \ : (z * 2.0 - 1.0) * nearFar.y) + +const float MAX_COC_SIZE = 100.0; vec2 encode_coc(float near, float far) { return vec2(near, far) / MAX_COC_SIZE; } float decode_coc(vec2 cocs) { return max(cocs.x, cocs.y) * MAX_COC_SIZE; } +float decode_signed_coc(vec2 cocs) { return ((cocs.x > cocs.y) ? cocs.x : -cocs.y) * MAX_COC_SIZE; } /** * ----------------- STEP 0 ------------------ @@ -59,7 +51,7 @@ layout(location = 1) out vec2 normalizedCoc; void main() { /* Half Res pass */ - vec2 uv = (floor(gl_FragCoord.xy) * 2.0 + 0.5) * invertedViewportSize; + vec2 uv = (floor(gl_FragCoord.xy) * 2.0 + 0.5) * invertedViewportSize; ivec4 texel = ivec4(gl_FragCoord.xyxy) * 2 + ivec4(0, 0, 1, 1); @@ -150,24 +142,24 @@ void main() vec2 texel_size = 1.0 / vec2(textureSize(inputCocTex, 0)); vec2 uv = gl_FragCoord.xy * texel_size; #ifdef DILATE_VERTICAL - // vec2 cocs1 = texture(inputCocTex, uv + texel_size * vec2(-3, 0)).rg; + vec2 cocs1 = texture(inputCocTex, uv + texel_size * vec2(-3, 0)).rg; vec2 cocs2 = texture(inputCocTex, uv + texel_size * vec2(-2, 0)).rg; vec2 cocs3 = texture(inputCocTex, uv + texel_size * vec2(-1, 0)).rg; vec2 cocs4 = texture(inputCocTex, uv + texel_size * vec2( 0, 0)).rg; vec2 cocs5 = texture(inputCocTex, uv + texel_size * vec2( 1, 0)).rg; vec2 cocs6 = texture(inputCocTex, uv + texel_size * vec2( 2, 0)).rg; - // vec2 cocs7 = texture(inputCocTex, uv + texel_size * vec2( 3, 0)).rg; + vec2 cocs7 = texture(inputCocTex, uv + texel_size * vec2( 3, 0)).rg; #else /* DILATE_HORIZONTAL */ - // vec2 cocs1 = texture(inputCocTex, uv + texel_size * vec2(0, -3)).rg; + vec2 cocs1 = texture(inputCocTex, uv + texel_size * vec2(0, -3)).rg; vec2 cocs2 = texture(inputCocTex, uv + texel_size * vec2(0, -2)).rg; vec2 cocs3 = texture(inputCocTex, uv + texel_size * vec2(0, -1)).rg; vec2 cocs4 = texture(inputCocTex, uv + texel_size * vec2(0, 0)).rg; vec2 cocs5 = texture(inputCocTex, uv + texel_size * vec2(0, 1)).rg; vec2 cocs6 = texture(inputCocTex, uv + texel_size * vec2(0, 2)).rg; - // vec2 cocs7 = texture(inputCocTex, uv + texel_size * vec2(0, 3)).rg; + vec2 cocs7 = texture(inputCocTex, uv + texel_size * vec2(0, 3)).rg; #endif - dilatedCoc = max(max(cocs3, cocs4), max(max(cocs5, cocs6), cocs2)); - // dilatedCoc = max(max(max(cocs1, cocs2), max(cocs3, cocs4)), max(max(cocs5, cocs6), cocs7)); + // dilatedCoc = max(max(cocs3, cocs4), max(max(cocs5, cocs6), cocs2)); + dilatedCoc = max(max(max(cocs1, cocs2), max(cocs3, cocs4)), max(max(cocs5, cocs6), cocs7)); } #endif @@ -179,16 +171,24 @@ void main() #ifdef BLUR1 layout(location = 0) out vec4 blurColor; +#define NUM_SAMPLES 49 + +/* keep in sync with GlobalsUboStorage */ +layout(std140) uniform dofSamplesBlock { + vec4 samples[NUM_SAMPLES]; +}; + +#if 0 /* Spilar sampling. Better but slower */ void main() { /* Half Res pass */ - vec2 uv = gl_FragCoord.xy * invertedViewportSize * 2.0; + vec2 uv = gl_FragCoord.xy * invertedViewportSize * 2.0; vec2 size = vec2(textureSize(halfResColorTex, 0).xy); ivec2 texel = ivec2(uv * size); - vec3 color = texelFetch(halfResColorTex, texel, 0).rgb; - float coc = decode_coc(texelFetch(inputCocTex, texel, 0).rg); + vec4 color = texelFetch(halfResColorTex, texel, 0); + float coc = decode_signed_coc(texelFetch(inputCocTex, texel, 0).rg); /* TODO Ensure alignement */ vec2 max_radii = texture(maxCocTilesTex, (0.5 + floor(gl_FragCoord.xy / 8.0)) / vec2(textureSize(maxCocTilesTex, 0))).rg; @@ -196,28 +196,56 @@ void main() float center_coc = coc; float tot = 1.0; - float radius = RAD_SCALE; - for (float ang = 0.0; radius < MAX_BLUR_SIZE && radius < max_radius; ang += GOLDEN_ANGLE) { - vec2 tc = uv + vec2(cos(ang), sin(ang)) * invertedViewportSize * radius; + for (int i = 0; i < NUM_SAMPLES; ++i) { + vec2 tc = uv + samples[i].xy * invertedViewportSize * max_radius; - vec3 samp = texture(halfResColorTex, tc).rgb; - - coc = decode_coc(texture(inputCocTex, tc).rg); + vec4 samp = texture(halfResColorTex, tc); + coc = decode_signed_coc(texture(inputCocTex, tc).rg); if (coc > center_coc) { coc = clamp(abs(coc), 0.0, abs(center_coc) * 2.0); } - + float radius = max_radius * float(i + 1) / float(NUM_SAMPLES); float m = smoothstep(radius - 0.5, radius + 0.5, abs(coc)); color += mix(color / tot, samp, m); tot += 1.0; - radius += RAD_SCALE / radius; } - blurColor.rgb = color / tot; - blurColor.a = 1.0; + blurColor = color / tot; +} +#else +void main() +{ + /* Half Res pass */ + vec2 uv = gl_FragCoord.xy * invertedViewportSize * 2.0; + + vec2 size = vec2(textureSize(halfResColorTex, 0).xy); + ivec2 texel = ivec2(uv * size); + + float coc = decode_coc(texelFetch(inputCocTex, texel, 0).rg); + float tot = max(0.5, coc); + + vec4 color = texelFetch(halfResColorTex, texel, 0); + color *= tot; + + float max_radius = coc; + for (int i = 0; i < NUM_SAMPLES; ++i) { + vec2 tc = uv + samples[i].xy * invertedViewportSize * max_radius; + + vec4 samp = texture(halfResColorTex, tc); + + coc = decode_coc(texture(inputCocTex, tc).rg); + + float radius = samples[i].z * max_radius; + coc *= smoothstep(radius - 0.5, radius + 0.5, coc); + color += samp * coc; + tot += coc; + } + + blurColor = color / tot; } #endif +#endif /** * ----------------- STEP 3 ------------------ @@ -229,16 +257,24 @@ out vec4 finalColor; void main() { /* Half Res pass */ - vec2 pixel_size = vec2(1.0, 1.0) / vec2(textureSize(blurTex, 0).xy); + vec2 pixel_size = 1.0 / vec2(textureSize(blurTex, 0).xy); vec2 uv = gl_FragCoord.xy * pixel_size.xy; - vec2 max_radii = texture(inputCocTex, uv).rg; - /* Scale filter */ - float rad = min(max(max_radii.x, max_radii.y) * MAX_COC_SIZE, 4.0) * 0.25; - finalColor = texture(blurTex, uv + pixel_size * vec2(-0.5, -0.5) * rad); - finalColor += texture(blurTex, uv + pixel_size * vec2(-0.5, 1.5) * rad); - finalColor += texture(blurTex, uv + pixel_size * vec2( 1.5, -0.5) * rad); - finalColor += texture(blurTex, uv + pixel_size * vec2( 1.5, 1.5) * rad); - finalColor *= 0.25; + float coc = decode_coc(texture(inputCocTex, uv).rg); + /* Only use this filter if coc is > 9.0 + * since this filter is not weighted by CoC + * and can bleed a bit. */ + float rad = clamp(coc - 9.0, 0.0, 1.0); + rad *= 1.5; /* If not, it's a gaussian filter. */ + finalColor = texture(blurTex, uv + pixel_size * vec2(-1.0, -1.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2(-1.0, 0.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2(-1.0, 1.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 0.0, -1.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 0.0, 0.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 0.0, 1.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 1.0, -1.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 1.0, 0.0) * rad); + finalColor += texture(blurTex, uv + pixel_size * vec2( 1.0, 1.0) * rad); + finalColor *= 1.0 / 9.0; } #endif @@ -252,7 +288,7 @@ void main() { /* Fullscreen pass */ vec2 pixel_size = 0.5 / vec2(textureSize(halfResColorTex, 0).xy); - vec2 uv = gl_FragCoord.xy * pixel_size; + vec2 uv = gl_FragCoord.xy * pixel_size; /* TODO MAKE SURE TO ALIGN SAMPLE POSITION TO AVOID OFFSET IN THE BOKEH */ float depth = texelFetch(sceneDepthTex, ivec2(gl_FragCoord.xy), 0).r; diff --git a/source/blender/draw/engines/workbench/workbench_data.c b/source/blender/draw/engines/workbench/workbench_data.c index 2fdec4cf8fc..22a8f51c23c 100644 --- a/source/blender/draw/engines/workbench/workbench_data.c +++ b/source/blender/draw/engines/workbench/workbench_data.c @@ -217,5 +217,6 @@ void workbench_private_data_free(WORKBENCH_PrivateData *wpd) { BLI_ghash_free(wpd->material_hash, NULL, MEM_freeN); DRW_UBO_FREE_SAFE(wpd->world_ubo); + DRW_UBO_FREE_SAFE(wpd->dof_ubo); GPU_BATCH_DISCARD_SAFE(wpd->world_clip_planes_batch); } diff --git a/source/blender/draw/engines/workbench/workbench_deferred.c b/source/blender/draw/engines/workbench/workbench_deferred.c index d1e32ea42d9..0a17907a12d 100644 --- a/source/blender/draw/engines/workbench/workbench_deferred.c +++ b/source/blender/draw/engines/workbench/workbench_deferred.c @@ -1112,6 +1112,7 @@ void workbench_deferred_draw_finish(WORKBENCH_Data *vedata) WORKBENCH_StorageList *stl = vedata->stl; WORKBENCH_PrivateData *wpd = stl->g_data; + /* XXX TODO(fclem) do not discard UBOS after drawing! Store them per viewport. */ workbench_private_data_free(wpd); workbench_volume_smoke_textures_free(wpd); } diff --git a/source/blender/draw/engines/workbench/workbench_effect_dof.c b/source/blender/draw/engines/workbench/workbench_effect_dof.c index 85213a7460c..b74a77bf891 100644 --- a/source/blender/draw/engines/workbench/workbench_effect_dof.c +++ b/source/blender/draw/engines/workbench/workbench_effect_dof.c @@ -46,6 +46,88 @@ static struct { extern char datatoc_workbench_effect_dof_frag_glsl[]; /* *********** Functions *********** */ + +/** + * Transform [-1..1] square to unit circle. + **/ +static void square_to_circle(float x, float y, float *r, float *T) +{ + if (x > -y) { + if (x > y) { + *r = x; + *T = (M_PI / 4.0f) * (y / x); + } + else { + *r = y; + *T = (M_PI / 4.0f) * (2 - (x / y)); + } + } + else { + if (x < y) { + *r = -x; + *T = (M_PI / 4.0f) * (4 + (y / x)); + } + else { + *r = -y; + if (y != 0) { + *T = (M_PI / 4.0f) * (6 - (x / y)); + } + else { + *T = 0.0f; + } + } + } +} + +#define KERNEL_RAD 3 +#define SAMP_LEN SQUARE(KERNEL_RAD * 2 + 1) + +static void workbench_dof_setup_samples( + struct GPUUniformBuffer **ubo, float **data, + float bokeh_sides, float bokeh_rotation, float bokeh_ratio) +{ + if (*data == NULL) { + *data = MEM_callocN(sizeof(float) * 4 * SAMP_LEN, "workbench dof samples"); + } + if (*ubo == NULL) { + *ubo = DRW_uniformbuffer_create(sizeof(float) * 4 * SAMP_LEN, NULL); + } + + float *samp = *data; + for (int i = 0; i <= KERNEL_RAD; ++i) { + for (int j = -KERNEL_RAD; j <= KERNEL_RAD; ++j) { + for (int k = -KERNEL_RAD; k <= KERNEL_RAD; ++k) { + if (abs(j) > i || abs(k) > i) { + continue; + } + if (abs(j) < i && abs(k) < i) { + continue; + } + float x = ((float)j) / KERNEL_RAD; + float y = ((float)k) / KERNEL_RAD; + + float r, T; + square_to_circle(x, y, &r, &T); + samp[2] = r; + + /* Bokeh shape parametrisation */ + if (bokeh_sides > 1.0f) { + float denom = T - (2.0 * M_PI / bokeh_sides) * floorf((bokeh_sides * T + M_PI) / (2.0 * M_PI)); + r *= cosf(M_PI / bokeh_sides) / cosf(denom); + } + + T += bokeh_rotation; + + samp[0] = r * cosf(T) * bokeh_ratio; + samp[1] = r * sinf(T); + samp += 4; + } + } + } + + DRW_uniformbuffer_update(*ubo, *data); +} + void workbench_dof_engine_init(WORKBENCH_Data *vedata, Object *camera) { WORKBENCH_StorageList *stl = vedata->stl; @@ -93,22 +175,27 @@ void workbench_dof_engine_init(WORKBENCH_Data *vedata, Object *camera) const float *full_size = DRW_viewport_size_get(); int size[2] = {full_size[0] / 2, full_size[1] / 2}; +#if 0 /* NOTE: We Ceil here in order to not miss any edge texel if using a NPO2 texture. */ int shrink_h_size[2] = {ceilf(size[0] / 8.0f), size[1]}; int shrink_w_size[2] = {shrink_h_size[0], ceilf(size[1] / 8.0f)}; +#endif wpd->half_res_col_tx = DRW_texture_pool_query_2D(size[0], size[1], GPU_R11F_G11F_B10F, &draw_engine_workbench_solid); wpd->dof_blur_tx = DRW_texture_pool_query_2D(size[0], size[1], GPU_R11F_G11F_B10F, &draw_engine_workbench_solid); wpd->coc_halfres_tx = DRW_texture_pool_query_2D(size[0], size[1], GPU_RG8, &draw_engine_workbench_solid); +#if 0 wpd->coc_temp_tx = DRW_texture_pool_query_2D(shrink_h_size[0], shrink_h_size[1], GPU_RG8, &draw_engine_workbench_solid); wpd->coc_tiles_tx[0] = DRW_texture_pool_query_2D(shrink_w_size[0], shrink_w_size[1], GPU_RG8, &draw_engine_workbench_solid); wpd->coc_tiles_tx[1] = DRW_texture_pool_query_2D(shrink_w_size[0], shrink_w_size[1], GPU_RG8, &draw_engine_workbench_solid); +#endif GPU_framebuffer_ensure_config(&fbl->dof_downsample_fb, { GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(wpd->half_res_col_tx), GPU_ATTACHMENT_TEXTURE(wpd->coc_halfres_tx), }); +#if 0 GPU_framebuffer_ensure_config(&fbl->dof_coc_tile_h_fb, { GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(wpd->coc_temp_tx), @@ -121,6 +208,7 @@ void workbench_dof_engine_init(WORKBENCH_Data *vedata, Object *camera) GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(wpd->coc_tiles_tx[1]), }); +#endif GPU_framebuffer_ensure_config(&fbl->dof_blur1_fb, { GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(wpd->dof_blur_tx), @@ -166,6 +254,21 @@ void workbench_dof_engine_init(WORKBENCH_Data *vedata, Object *camera) wpd->dof_near_far[0] = -cam->clipsta; wpd->dof_near_far[1] = -cam->clipend; + + float blades = cam->gpu_dof.num_blades; + float rotation = cam->gpu_dof.rotation; + float ratio = 1.0f / cam->gpu_dof.ratio; + + if (wpd->dof_ubo == NULL || + blades != wpd->dof_blades || + rotation != wpd->dof_rotation || + ratio != wpd->dof_ratio) + { + wpd->dof_blades = blades; + wpd->dof_rotation = rotation; + wpd->dof_ratio = ratio; + workbench_dof_setup_samples(&wpd->dof_ubo, &stl->dof_ubo_data, blades, rotation, ratio); + } } wpd->dof_enabled = true; @@ -202,6 +305,7 @@ void workbench_dof_create_pass(WORKBENCH_Data *vedata, GPUTexture **dof_input) DRW_shgroup_uniform_vec2(grp, "nearFar", wpd->dof_near_far, 1); DRW_shgroup_call_add(grp, quad, NULL); } +#if 0 { DRWShadingGroup *grp = DRW_shgroup_create(e_data.effect_dof_flatten_h_sh, psl->dof_flatten_h_ps); DRW_shgroup_uniform_texture(grp, "inputCocTex", wpd->coc_halfres_tx); @@ -222,10 +326,11 @@ void workbench_dof_create_pass(WORKBENCH_Data *vedata, GPUTexture **dof_input) DRW_shgroup_uniform_texture(grp, "inputCocTex", wpd->coc_tiles_tx[1]); DRW_shgroup_call_add(grp, quad, NULL); } +#endif { DRWShadingGroup *grp = DRW_shgroup_create(e_data.effect_dof_blur1_sh, psl->dof_blur1_ps); + DRW_shgroup_uniform_block(grp, "dofSamplesBlock", wpd->dof_ubo); DRW_shgroup_uniform_texture(grp, "inputCocTex", wpd->coc_halfres_tx); - DRW_shgroup_uniform_texture(grp, "maxCocTilesTex", wpd->coc_tiles_tx[0]); DRW_shgroup_uniform_texture(grp, "halfResColorTex", wpd->half_res_col_tx); DRW_shgroup_uniform_vec2(grp, "invertedViewportSize", DRW_viewport_invert_size_get(), 1); DRW_shgroup_call_add(grp, quad, NULL); @@ -271,9 +376,12 @@ void workbench_dof_draw_pass(WORKBENCH_Data *vedata) return; } + DRW_stats_group_start("Depth Of Field"); + GPU_framebuffer_bind(fbl->dof_downsample_fb); DRW_draw_pass(psl->dof_down_ps); +#if 0 GPU_framebuffer_bind(fbl->dof_coc_tile_h_fb); DRW_draw_pass(psl->dof_flatten_h_ps); @@ -285,6 +393,7 @@ void workbench_dof_draw_pass(WORKBENCH_Data *vedata) GPU_framebuffer_bind(fbl->dof_coc_tile_v_fb); DRW_draw_pass(psl->dof_dilate_h_ps); +#endif GPU_framebuffer_bind(fbl->dof_blur1_fb); DRW_draw_pass(psl->dof_blur1_ps); @@ -294,4 +403,6 @@ void workbench_dof_draw_pass(WORKBENCH_Data *vedata) GPU_framebuffer_bind(fbl->color_only_fb); DRW_draw_pass(psl->dof_resolve_ps); + + DRW_stats_group_end(); } diff --git a/source/blender/draw/engines/workbench/workbench_private.h b/source/blender/draw/engines/workbench/workbench_private.h index 62622b56805..ef114370587 100644 --- a/source/blender/draw/engines/workbench/workbench_private.h +++ b/source/blender/draw/engines/workbench/workbench_private.h @@ -115,6 +115,7 @@ typedef struct WORKBENCH_TextureList { typedef struct WORKBENCH_StorageList { struct WORKBENCH_PrivateData *g_data; struct WORKBENCH_EffectInfo *effects; + float *dof_ubo_data; } WORKBENCH_StorageList; typedef struct WORKBENCH_PassList { @@ -239,10 +240,14 @@ typedef struct WORKBENCH_PrivateData { struct GPUTexture *coc_halfres_tx; struct GPUTexture *coc_temp_tx; struct GPUTexture *coc_tiles_tx[2]; + struct GPUUniformBuffer *dof_ubo; float dof_aperturesize; float dof_distance; float dof_invsensorsize; float dof_near_far[2]; + float dof_blades; + float dof_rotation; + float dof_ratio; bool dof_enabled; /* Color Management */ |