diff options
Diffstat (limited to 'source/blender/draw/engines/eevee')
22 files changed, 3328 insertions, 555 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_depth_of_field.c b/source/blender/draw/engines/eevee/eevee_depth_of_field.c index 92ba526c67c..8c0a44b2c9b 100644 --- a/source/blender/draw/engines/eevee/eevee_depth_of_field.c +++ b/source/blender/draw/engines/eevee/eevee_depth_of_field.c @@ -20,6 +20,13 @@ * \ingroup draw_engine * * Depth of field post process effect. + * + * There are 2 methods to achieve this effect. + * - The first uses projection matrix offsetting and sample accumulation to give reference quality + * depth of field. But this needs many samples to hide the undersampling. + * - The second one is a post-processing based one. It follows the implementation described in + * the presentation "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie. There are some + * difference with our actual implementation that prioritize quality. */ #include "DRW_render.h" @@ -40,10 +47,158 @@ #include "GPU_texture.h" #include "eevee_private.h" +#define CAMERA_JITTER_RING_DENSITY 6 + +static float coc_radius_from_camera_depth(bool is_ortho, EEVEE_EffectsInfo *fx, float camera_depth) +{ + float multiplier = fx->dof_coc_params[0]; + float bias = fx->dof_coc_params[1]; + if (multiplier == 0.0f || bias == 0.0f) { + return 0.0f; + } + else if (is_ortho) { + return (camera_depth + multiplier / bias) * multiplier; + } + else { + return multiplier / camera_depth - bias; + } +} + +static float polygon_sides_length(float sides_count) +{ + return 2.0 * sin(M_PI / sides_count); +} + +/* Returns intersection ratio between the radius edge at theta and the polygon edge. + * Start first corners at theta == 0. */ +static float circle_to_polygon_radius(float sides_count, float theta) +{ + /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide 36). */ + float side_angle = (2.0f * M_PI) / sides_count; + return cosf(side_angle * 0.5f) / + cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI))); +} + +/* Remap input angle to have homogenous spacing of points along a polygon edge. + * Expect theta to be in [0..2pi] range. */ +static float circle_to_polygon_angle(float sides_count, float theta) +{ + float side_angle = (2.0f * M_PI) / sides_count; + float halfside_angle = side_angle * 0.5f; + float side = floorf(theta / side_angle); + /* Length of segment from center to the middle of polygon side. */ + float adjacent = circle_to_polygon_radius(sides_count, 0.0f); + + /* This is the relative position of the sample on the polygon half side. */ + float local_theta = theta - side * side_angle; + float ratio = (local_theta - halfside_angle) / halfside_angle; + + float halfside_len = polygon_sides_length(sides_count) * 0.5f; + float opposite = ratio * halfside_len; + + /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */ + float final_local_theta = atanf(opposite / adjacent); + + return side * side_angle + final_local_theta; +} + +static int dof_jitter_total_sample_count(int ring_density, int ring_count) +{ + return ((ring_count * ring_count + ring_count) / 2) * ring_density + 1; +} + +bool EEVEE_depth_of_field_jitter_get(EEVEE_EffectsInfo *fx, + float r_jitter[2], + float *r_focus_distance) +{ + if (fx->dof_jitter_radius == 0.0f) { + return false; + } + + int ring_density = CAMERA_JITTER_RING_DENSITY; + int ring_count = fx->dof_jitter_ring_count; + int sample_count = dof_jitter_total_sample_count(ring_density, ring_count); + + int s = fx->taa_current_sample - 1; + + int ring = 0; + int ring_sample_count = 1; + int ring_sample = 1; + + s = s * (ring_density - 1); + s = s % sample_count; + + int samples_passed = 1; + while (s >= samples_passed) { + ring++; + ring_sample_count = ring * ring_density; + ring_sample = s - samples_passed; + ring_sample = (ring_sample + 1) % ring_sample_count; + samples_passed += ring_sample_count; + } + + r_jitter[0] = (float)ring / ring_count; + r_jitter[1] = (float)ring_sample / ring_sample_count; + + { + /* Bokeh shape parametrisation */ + float r = r_jitter[0]; + float T = r_jitter[1] * 2.0f * M_PI; + + if (fx->dof_jitter_blades >= 3.0f) { + T = circle_to_polygon_angle(fx->dof_jitter_blades, T); + r *= circle_to_polygon_radius(fx->dof_jitter_blades, T); + } + + T += fx->dof_bokeh_rotation; + + r_jitter[0] = r * cosf(T); + r_jitter[1] = r * sinf(T); + + mul_v2_v2(r_jitter, fx->dof_bokeh_aniso); + } + + mul_v2_fl(r_jitter, fx->dof_jitter_radius); + + *r_focus_distance = fx->dof_jitter_focus; + return true; +} + +int EEVEE_depth_of_field_sample_count_get(EEVEE_EffectsInfo *fx, + int sample_count, + int *r_ring_count) +{ + if (fx->dof_jitter_radius == 0.0f) { + if (r_ring_count != NULL) { + *r_ring_count = 0; + } + return 1; + } + + if (sample_count == TAA_MAX_SAMPLE) { + /* Special case for viewport continuous rendering. We clamp to a max sample to avoid the + * jittered dof never converging. */ + sample_count = 1024; + } + /* Inversion of dof_jitter_total_sample_count. */ + float x = 2.0f * (sample_count - 1.0f) / CAMERA_JITTER_RING_DENSITY; + /* Solving polynomial. We only search positive solution. */ + float discriminant = 1.0f + 4.0f * x; + int ring_count = ceilf(0.5f * (sqrt(discriminant) - 1.0f)); + + sample_count = dof_jitter_total_sample_count(CAMERA_JITTER_RING_DENSITY, ring_count); + + if (r_ring_count != NULL) { + *r_ring_count = ring_count; + } + return sample_count; +} + int EEVEE_depth_of_field_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata, Object *camera) { + EEVEE_TextureList *txl = vedata->txl; EEVEE_StorageList *stl = vedata->stl; EEVEE_FramebufferList *fbl = vedata->fbl; EEVEE_EffectsInfo *effects = stl->effects; @@ -57,59 +212,30 @@ int EEVEE_depth_of_field_init(EEVEE_ViewLayerData *UNUSED(sldata), RegionView3D *rv3d = draw_ctx->rv3d; const float *viewport_size = DRW_viewport_size_get(); - /* Retrieve Near and Far distance */ - effects->dof_near_far[0] = -cam->clip_start; - effects->dof_near_far[1] = -cam->clip_end; - - int buffer_size[2] = {(int)viewport_size[0] / 2, (int)viewport_size[1] / 2}; + effects->dof_hq_slight_focus = (scene_eval->eevee.flag & SCE_EEVEE_DOF_HQ_SLIGHT_FOCUS) != 0; - buffer_size[0] = max_ii(1, buffer_size[0]); - buffer_size[1] = max_ii(1, buffer_size[1]); - - eGPUTextureFormat down_format = DRW_state_draw_background() ? GPU_R11F_G11F_B10F : GPU_RGBA16F; - - effects->dof_down_near = DRW_texture_pool_query_2d( - buffer_size[0], buffer_size[1], down_format, &draw_engine_eevee_type); - effects->dof_down_far = DRW_texture_pool_query_2d( - buffer_size[0], buffer_size[1], down_format, &draw_engine_eevee_type); - effects->dof_coc = DRW_texture_pool_query_2d( - buffer_size[0], buffer_size[1], GPU_RG16F, &draw_engine_eevee_type); - - GPU_framebuffer_ensure_config(&fbl->dof_down_fb, - {GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(effects->dof_down_near), - GPU_ATTACHMENT_TEXTURE(effects->dof_down_far), - GPU_ATTACHMENT_TEXTURE(effects->dof_coc)}); - - /* Go full 32bits for rendering and reduce the color artifacts. */ - eGPUTextureFormat fb_format = DRW_state_is_image_render() ? GPU_RGBA32F : GPU_RGBA16F; - - effects->dof_blur = DRW_texture_pool_query_2d( - buffer_size[0] * 2, buffer_size[1], fb_format, &draw_engine_eevee_type); - - GPU_framebuffer_ensure_config(&fbl->dof_scatter_fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(effects->dof_blur), - }); - - if (!DRW_state_draw_background()) { - effects->dof_blur_alpha = DRW_texture_pool_query_2d( - buffer_size[0] * 2, buffer_size[1], GPU_R32F, &draw_engine_eevee_type); - GPU_framebuffer_texture_attach(fbl->dof_scatter_fb, effects->dof_blur_alpha, 1, 0); - } + /* Retrieve Near and Far distance */ + effects->dof_coc_near_dist = -cam->clip_start; + effects->dof_coc_far_dist = -cam->clip_end; /* Parameters */ - /* TODO UI Options */ + bool is_ortho = cam->type == CAM_ORTHO; float fstop = cam->dof.aperture_fstop; float blades = cam->dof.aperture_blades; float rotation = cam->dof.aperture_rotation; - float ratio = 1.0f / cam->dof.aperture_ratio; + float ratio = 1.0f / max_ff(cam->dof.aperture_ratio, 0.00001f); float sensor = BKE_camera_sensor_size(cam->sensor_fit, cam->sensor_x, cam->sensor_y); float focus_dist = BKE_camera_object_dof_distance(camera); float focal_len = cam->lens; - const float scale_camera = 0.001f; + if (is_ortho) { + /* (fclem) A bit of black magic here. I don't know if this is correct. */ + fstop *= 1.3f; + focal_len = 1.0f; + sensor = cam->ortho_scale; + } + + const float scale_camera = (is_ortho) ? 1.0 : 0.001f; /* we want radius here for the aperture number */ float aperture = 0.5f * scale_camera * focal_len / fstop; float focal_len_scaled = scale_camera * focal_len; @@ -119,93 +245,724 @@ int EEVEE_depth_of_field_init(EEVEE_ViewLayerData *UNUSED(sldata), sensor_scaled *= rv3d->viewcamtexcofac[0]; } - effects->dof_params[1] = aperture * fabsf(focal_len_scaled / (focus_dist - focal_len_scaled)); - effects->dof_params[1] *= viewport_size[0] / sensor_scaled; - effects->dof_params[0] = -focus_dist * effects->dof_params[1]; + if (ratio > 1.0) { + /* If ratio is scaling the bokeh outwards, we scale the aperture so that the gather + * kernel size will encompass the maximum axis. */ + aperture *= ratio; + } + + effects->dof_coc_params[1] = -aperture * + fabsf(focal_len_scaled / (focus_dist - focal_len_scaled)); + /* FIXME(fclem) This is broken for vertically fit sensor. */ + effects->dof_coc_params[1] *= viewport_size[0] / sensor_scaled; + + if ((scene_eval->eevee.flag & SCE_EEVEE_DOF_JITTER) != 0) { + effects->dof_jitter_radius = effects->dof_coc_params[1]; + effects->dof_jitter_focus = focus_dist; + effects->dof_jitter_blades = blades; + + int sample_count = EEVEE_temporal_sampling_sample_count_get(scene_eval, stl); + sample_count = EEVEE_depth_of_field_sample_count_get( + effects, sample_count, &effects->dof_jitter_ring_count); + + if (effects->dof_jitter_ring_count == 0) { + effects->dof_jitter_radius = 0.0f; + } + else { + /* Compute a minimal overblur radius to fill the gaps between the samples. + * This is just the simplified form of dividing the area of the bokeh + * by the number of samples. */ + float minimal_overblur = 1.0f / sqrtf(sample_count); + float user_overblur = scene_eval->eevee.bokeh_overblur / 100.0f; + + effects->dof_coc_params[1] *= minimal_overblur + user_overblur; + /* Avoid dilating the shape. Overblur only soften. */ + effects->dof_jitter_radius -= effects->dof_coc_params[1]; + } + } + else { + effects->dof_jitter_radius = 0.0f; + } + + if (is_ortho) { + /* (fclem) A bit of black magic here. Needed to match cycles. */ + effects->dof_coc_params[1] *= 0.225; + } + + effects->dof_coc_params[0] = -focus_dist * effects->dof_coc_params[1]; + + effects->dof_bokeh_blades = blades; + effects->dof_bokeh_rotation = rotation; + effects->dof_bokeh_aniso[0] = min_ff(ratio, 1.0f); + effects->dof_bokeh_aniso[1] = min_ff(1.0f / ratio, 1.0f); + effects->dof_bokeh_max_size = scene_eval->eevee.bokeh_max_size; + + copy_v2_v2(effects->dof_bokeh_aniso_inv, effects->dof_bokeh_aniso); + invert_v2(effects->dof_bokeh_aniso_inv); + + effects->dof_scatter_color_threshold = scene_eval->eevee.bokeh_threshold; + effects->dof_scatter_neighbor_max_color = scene_eval->eevee.bokeh_neighbor_max; + effects->dof_denoise_factor = clamp_f(scene_eval->eevee.bokeh_denoise_fac, 0.0f, 1.0f); - effects->dof_bokeh[0] = rotation; - effects->dof_bokeh[1] = ratio; - effects->dof_bokeh[2] = scene_eval->eevee.bokeh_max_size; + float max_abs_fg_coc, max_abs_bg_coc; + if (is_ortho) { + max_abs_fg_coc = fabsf(coc_radius_from_camera_depth(true, effects, -cam->clip_start)); + max_abs_bg_coc = fabsf(coc_radius_from_camera_depth(true, effects, -cam->clip_end)); + } + else { + max_abs_fg_coc = fabsf(coc_radius_from_camera_depth(false, effects, -cam->clip_start)); + /* Background is at infinity so maximum CoC is the limit of the function at -inf. */ + max_abs_bg_coc = fabsf(effects->dof_coc_params[1]); + } - /* Precompute values to save instructions in fragment shader. */ - effects->dof_bokeh_sides[0] = blades; - effects->dof_bokeh_sides[1] = blades > 0.0f ? 2.0f * M_PI / blades : 0.0f; - effects->dof_bokeh_sides[2] = blades / (2.0f * M_PI); - effects->dof_bokeh_sides[3] = blades > 0.0f ? cosf(M_PI / blades) : 0.0f; + float max_coc = max_ff(max_abs_bg_coc, max_abs_fg_coc); + /* Clamp with user defined max. */ + effects->dof_fx_max_coc = min_ff(scene_eval->eevee.bokeh_max_size, max_coc); + + if (effects->dof_fx_max_coc < 0.5f) { + return 0; + } return EFFECT_DOF | EFFECT_POST_BUFFER; } + effects->dof_jitter_radius = 0.0f; + /* Cleanup to release memory */ - GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_down_fb); - GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_scatter_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_setup_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_flatten_tiles_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_dilate_tiles_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_reduce_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_reduce_copy_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_gather_fg_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_gather_bg_fb); + GPU_FRAMEBUFFER_FREE_SAFE(fbl->dof_scatter_bg_fb); + DRW_TEXTURE_FREE_SAFE(txl->dof_reduced_color); + DRW_TEXTURE_FREE_SAFE(txl->dof_reduced_coc); return 0; } -void EEVEE_depth_of_field_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata) +#define WITH_FILTERING (GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER) +#define NO_FILTERING GPU_SAMPLER_MIPMAP +#define COLOR_FORMAT fx->dof_color_format +#define FG_TILE_FORMAT GPU_RGBA16F +#define BG_TILE_FORMAT GPU_R11F_G11F_B10F + +/** + * Create bokeh texture. + **/ +static void dof_bokeh_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + if ((fx->dof_bokeh_aniso[0] == 1.0f) && (fx->dof_bokeh_aniso[1] == 1.0f) && + (fx->dof_bokeh_blades == 0.0)) { + fx->dof_bokeh_gather_lut_tx = NULL; + fx->dof_bokeh_scatter_lut_tx = NULL; + fx->dof_bokeh_resolve_lut_tx = NULL; + return; + } + + void *owner = (void *)&EEVEE_depth_of_field_init; + int res[2] = {DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE}; + + DRW_PASS_CREATE(psl->dof_bokeh, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_bokeh_get(); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_bokeh); + DRW_shgroup_uniform_float_copy(grp, "bokehSides", fx->dof_bokeh_blades); + DRW_shgroup_uniform_float_copy(grp, "bokehRotation", fx->dof_bokeh_rotation); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropyInv", fx->dof_bokeh_aniso_inv); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + fx->dof_bokeh_gather_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_RG16F, owner); + fx->dof_bokeh_scatter_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner); + fx->dof_bokeh_resolve_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_bokeh_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_bokeh_gather_lut_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_bokeh_scatter_lut_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_bokeh_resolve_lut_tx), + }); +} + +/** + * Ouputs halfResColorBuffer and halfResCocBuffer. + **/ +static void dof_setup_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) { - EEVEE_PassList *psl = vedata->psl; - EEVEE_StorageList *stl = vedata->stl; - EEVEE_EffectsInfo *effects = stl->effects; DefaultTextureList *dtxl = DRW_viewport_texture_list_get(); - if ((effects->enabled_effects & EFFECT_DOF) != 0) { - /** Depth of Field algorithm - * - * Overview : - * - Down-sample the color buffer into 2 buffers weighted with - * CoC values. Also output CoC into a texture. - * - Shoot quads for every pixel and expand it depending on the CoC. - * Do one pass for near Dof and one pass for far Dof. - * - Finally composite the 2 blurred buffers with the original render. - */ - DRWShadingGroup *grp; - struct GPUBatch *quad = DRW_cache_fullscreen_quad_get(); - const bool use_alpha = !DRW_state_draw_background(); - - DRW_PASS_CREATE(psl->dof_down, DRW_STATE_WRITE_COLOR); - - grp = DRW_shgroup_create(EEVEE_shaders_depth_of_field_downsample_get(use_alpha), - psl->dof_down); - DRW_shgroup_uniform_texture_ref(grp, "colorBuffer", &effects->source_buffer); - DRW_shgroup_uniform_texture_ref(grp, "depthBuffer", &dtxl->depth); - DRW_shgroup_uniform_vec2(grp, "nearFar", effects->dof_near_far, 1); - DRW_shgroup_uniform_vec2(grp, "dofParams", effects->dof_params, 1); - DRW_shgroup_call(grp, quad, NULL); - - DRW_PASS_CREATE(psl->dof_scatter, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL); - - /* This create an empty batch of N triangles to be positioned - * by the vertex shader 0.4ms against 6ms with instancing */ - const float *viewport_size = DRW_viewport_size_get(); - const int sprite_len = ((int)viewport_size[0] / 2) * - ((int)viewport_size[1] / 2); /* brackets matters */ - grp = DRW_shgroup_create(EEVEE_shaders_depth_of_field_scatter_get(use_alpha), - psl->dof_scatter); - DRW_shgroup_uniform_texture_ref(grp, "nearBuffer", &effects->dof_down_near); - DRW_shgroup_uniform_texture_ref(grp, "farBuffer", &effects->dof_down_far); - DRW_shgroup_uniform_texture_ref(grp, "cocBuffer", &effects->dof_coc); - DRW_shgroup_uniform_vec4(grp, "bokehParams", effects->dof_bokeh, 2); + void *owner = (void *)&EEVEE_depth_of_field_init; + const float *fullres = DRW_viewport_size_get(); + int res[2] = {divide_ceil_u(fullres[0], 2), divide_ceil_u(fullres[1], 2)}; + + DRW_PASS_CREATE(psl->dof_setup, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_setup_get(); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_setup); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &fx->source_buffer, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "depthBuffer", &dtxl->depth, NO_FILTERING); + DRW_shgroup_uniform_vec4_copy(grp, "cocParams", fx->dof_coc_params); + DRW_shgroup_uniform_float_copy(grp, "bokehMaxSize", fx->dof_bokeh_max_size); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + fx->dof_half_res_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner); + fx->dof_half_res_coc_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_RG16F, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_setup_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_half_res_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_half_res_coc_tx), + }); +} + +/** + * Ouputs min & max coc in each 8x8 half res pixel tiles (so 1/16th of fullres). + **/ +static void dof_flatten_tiles_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + void *owner = (void *)&EEVEE_depth_of_field_init; + const float *fullres = DRW_viewport_size_get(); + int res[2] = {divide_ceil_u(fullres[0], DOF_TILE_DIVISOR), + divide_ceil_u(fullres[1], DOF_TILE_DIVISOR)}; + + DRW_PASS_CREATE(psl->dof_flatten_tiles, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_flatten_tiles_get(); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_flatten_tiles); + DRW_shgroup_uniform_texture_ref_ex( + grp, "halfResCocBuffer", &fx->dof_half_res_coc_tx, NO_FILTERING); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + fx->dof_coc_tiles_fg_tx = DRW_texture_pool_query_2d(UNPACK2(res), FG_TILE_FORMAT, owner); + fx->dof_coc_tiles_bg_tx = DRW_texture_pool_query_2d(UNPACK2(res), BG_TILE_FORMAT, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_flatten_tiles_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_coc_tiles_fg_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_coc_tiles_bg_tx), + }); +} + +/** + * Dilates the min & max cocs to cover maximum coc values. + * Output format/dimensions should be the same as coc_flatten_pass as they are swapped for + * doing multiple dilation passes. + **/ +static void dof_dilate_tiles_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + void *owner = (void *)&EEVEE_depth_of_field_init; + const float *fullres = DRW_viewport_size_get(); + int res[2] = {divide_ceil_u(fullres[0], DOF_TILE_DIVISOR), + divide_ceil_u(fullres[1], DOF_TILE_DIVISOR)}; + + DRW_PASS_CREATE(psl->dof_dilate_tiles_minmax, DRW_STATE_WRITE_COLOR); + DRW_PASS_CREATE(psl->dof_dilate_tiles_minabs, DRW_STATE_WRITE_COLOR); + + for (int pass = 0; pass < 2; pass++) { + DRWPass *drw_pass = (pass == 0) ? psl->dof_dilate_tiles_minmax : psl->dof_dilate_tiles_minabs; + GPUShader *sh = EEVEE_shaders_depth_of_field_dilate_tiles_get(pass); + DRWShadingGroup *grp = DRW_shgroup_create(sh, drw_pass); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesFgBuffer", &fx->dof_coc_tiles_fg_tx); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesBgBuffer", &fx->dof_coc_tiles_bg_tx); + DRW_shgroup_uniform_bool(grp, "dilateSlightFocus", &fx->dof_dilate_slight_focus, 1); + DRW_shgroup_uniform_int(grp, "ringCount", &fx->dof_dilate_ring_count, 1); + DRW_shgroup_uniform_int(grp, "ringWidthMultiplier", &fx->dof_dilate_ring_width_multiplier, 1); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + } + + fx->dof_coc_dilated_tiles_fg_tx = DRW_texture_pool_query_2d(UNPACK2(res), FG_TILE_FORMAT, owner); + fx->dof_coc_dilated_tiles_bg_tx = DRW_texture_pool_query_2d(UNPACK2(res), BG_TILE_FORMAT, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_dilate_tiles_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_coc_dilated_tiles_fg_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_coc_dilated_tiles_bg_tx), + }); +} + +static void dof_dilate_tiles_pass_draw(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + for (int pass = 0; pass < 2; pass++) { + DRWPass *drw_pass = (pass == 0) ? psl->dof_dilate_tiles_minmax : psl->dof_dilate_tiles_minabs; + + /* Error introduced by gather center jittering. */ + const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f); + int dilation_end_radius = ceilf((fx->dof_fx_max_coc * error_multiplier) / DOF_TILE_DIVISOR); - DRW_shgroup_call_procedural_triangles(grp, NULL, sprite_len); + /* This algorithm produce the exact dilation radius by dividing it in multiple passes. */ + int dilation_radius = 0; + while (dilation_radius < dilation_end_radius) { + /* Dilate slight focus only on first iteration. */ + fx->dof_dilate_slight_focus = (dilation_radius == 0) ? 1 : 0; - DRW_PASS_CREATE(psl->dof_resolve, DRW_STATE_WRITE_COLOR); + int remainder = dilation_end_radius - dilation_radius; + /* Do not step over any unvisited tile. */ + int max_multiplier = dilation_radius + 1; - grp = DRW_shgroup_create(EEVEE_shaders_depth_of_field_resolve_get(use_alpha), - psl->dof_resolve); - DRW_shgroup_uniform_texture_ref(grp, "scatterBuffer", &effects->dof_blur); - DRW_shgroup_uniform_texture_ref(grp, "colorBuffer", &effects->source_buffer); - DRW_shgroup_uniform_texture_ref(grp, "depthBuffer", &dtxl->depth); - DRW_shgroup_uniform_vec2(grp, "nearFar", effects->dof_near_far, 1); - DRW_shgroup_uniform_vec2(grp, "dofParams", effects->dof_params, 1); - DRW_shgroup_call(grp, quad, NULL); + int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier)); + int multiplier = min_ii(max_multiplier, floor(remainder / (float)ring_count)); - if (use_alpha) { - DRW_shgroup_uniform_texture_ref(grp, "scatterAlphaBuffer", &effects->dof_blur_alpha); - DRW_shgroup_uniform_bool_copy(grp, "unpremult", DRW_state_is_image_render()); + dilation_radius += ring_count * multiplier; + + fx->dof_dilate_ring_count = ring_count; + fx->dof_dilate_ring_width_multiplier = multiplier; + + GPU_framebuffer_bind(fbl->dof_dilate_tiles_fb); + DRW_draw_pass(drw_pass); + + SWAP(GPUFrameBuffer *, fbl->dof_dilate_tiles_fb, fbl->dof_flatten_tiles_fb); + SWAP(GPUTexture *, fx->dof_coc_dilated_tiles_bg_tx, fx->dof_coc_tiles_bg_tx); + SWAP(GPUTexture *, fx->dof_coc_dilated_tiles_fg_tx, fx->dof_coc_tiles_fg_tx); } } + /* Swap again so that final textures are dof_coc_dilated_tiles_*_tx. */ + SWAP(GPUFrameBuffer *, fbl->dof_dilate_tiles_fb, fbl->dof_flatten_tiles_fb); + SWAP(GPUTexture *, fx->dof_coc_dilated_tiles_bg_tx, fx->dof_coc_tiles_bg_tx); + SWAP(GPUTexture *, fx->dof_coc_dilated_tiles_fg_tx, fx->dof_coc_tiles_fg_tx); +} + +/** + * Create mipmaped color & coc textures for gather passes. + **/ +static void dof_reduce_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_TextureList *txl, + EEVEE_EffectsInfo *fx) +{ + const float *fullres = DRW_viewport_size_get(); + + /* Divide by 2 because dof_fx_max_coc is in fullres CoC radius and the reduce texture begins at + * half resolution. */ + float max_space_between_sample = fx->dof_fx_max_coc * 0.5f / DOF_GATHER_RING_COUNT; + + int mip_count = max_ii(1, log2_ceil_u(max_space_between_sample)); + + fx->dof_reduce_steps = mip_count - 1; + /* This ensure the mipmaps are aligned for the needed 4 mip levels. + * Starts at 2 because already at half resolution. */ + int multiple = 2 << (mip_count - 1); + int res[2] = {(multiple * divide_ceil_u(fullres[0], multiple)) / 2, + (multiple * divide_ceil_u(fullres[1], multiple)) / 2}; + + int quater_res[2] = {divide_ceil_u(fullres[0], 4), divide_ceil_u(fullres[1], 4)}; + + /* TODO(fclem): Make this dependent of the quality of the gather pass. */ + fx->dof_scatter_coc_threshold = 4.0f; + + { + DRW_PASS_CREATE(psl->dof_downsample, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_downsample_get(); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_downsample); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBuffer", &fx->dof_reduce_input_color_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex( + grp, "cocBuffer", &fx->dof_reduce_input_coc_tx, NO_FILTERING); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + void *owner = (void *)&EEVEE_depth_of_field_init; + fx->dof_downsample_tx = DRW_texture_pool_query_2d(UNPACK2(quater_res), COLOR_FORMAT, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_downsample_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_downsample_tx), + }); + } + + { + DRW_PASS_CREATE(psl->dof_reduce_copy, DRW_STATE_WRITE_COLOR); + + const bool is_copy_pass = true; + GPUShader *sh = EEVEE_shaders_depth_of_field_reduce_get(is_copy_pass); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_reduce_copy); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBuffer", &fx->dof_reduce_input_color_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex( + grp, "cocBuffer", &fx->dof_reduce_input_coc_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex( + grp, "downsampledBuffer", &fx->dof_downsample_tx, NO_FILTERING); + DRW_shgroup_uniform_float_copy(grp, "scatterColorThreshold", fx->dof_scatter_color_threshold); + DRW_shgroup_uniform_float_copy( + grp, "scatterColorNeighborMax", fx->dof_scatter_neighbor_max_color); + DRW_shgroup_uniform_float_copy(grp, "scatterCocThreshold", fx->dof_scatter_coc_threshold); + DRW_shgroup_uniform_float_copy(grp, "colorNeighborClamping", fx->dof_denoise_factor); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropy", fx->dof_bokeh_aniso); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + void *owner = (void *)&EEVEE_depth_of_field_init; + fx->dof_scatter_src_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R11F_G11F_B10F, owner); + } + + { + DRW_PASS_CREATE(psl->dof_reduce, DRW_STATE_WRITE_COLOR); + + const bool is_copy_pass = false; + GPUShader *sh = EEVEE_shaders_depth_of_field_reduce_get(is_copy_pass); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_reduce); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBuffer", &fx->dof_reduce_input_color_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex( + grp, "cocBuffer", &fx->dof_reduce_input_coc_tx, NO_FILTERING); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + } + + if (txl->dof_reduced_color) { + /* TODO(fclem) In the future, we need to check if mip_count did not change. + * For now it's ok as we always define all mip level.*/ + if (res[0] != GPU_texture_width(txl->dof_reduced_color) || + res[1] != GPU_texture_width(txl->dof_reduced_color)) { + DRW_TEXTURE_FREE_SAFE(txl->dof_reduced_color); + DRW_TEXTURE_FREE_SAFE(txl->dof_reduced_coc); + } + } + + if (txl->dof_reduced_color == NULL) { + /* Color needs to be signed format here. See note in shader for explanation. */ + /* Do not use texture pool because of needs mipmaps. */ + txl->dof_reduced_color = GPU_texture_create_2d( + "dof_reduced_color", UNPACK2(res), mip_count, GPU_RGBA16F, NULL); + txl->dof_reduced_coc = GPU_texture_create_2d( + "dof_reduced_coc", UNPACK2(res), mip_count, GPU_R16F, NULL); + + /* TODO(fclem) Remove once we have immutable storage or when mips are generated on creation. */ + GPU_texture_generate_mipmap(txl->dof_reduced_color); + GPU_texture_generate_mipmap(txl->dof_reduced_coc); + } + + GPU_framebuffer_ensure_config(&fbl->dof_reduce_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(txl->dof_reduced_color), + GPU_ATTACHMENT_TEXTURE(txl->dof_reduced_coc), + }); + + GPU_framebuffer_ensure_config(&fbl->dof_reduce_copy_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(txl->dof_reduced_color), + GPU_ATTACHMENT_TEXTURE(txl->dof_reduced_coc), + GPU_ATTACHMENT_TEXTURE(fx->dof_scatter_src_tx), + }); +} + +/** + * Do the gather convolution. For each pixels we gather multiple pixels in its neighborhood + * depending on the min & max CoC tiles. + **/ +static void dof_gather_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_TextureList *txl, + EEVEE_EffectsInfo *fx) +{ + void *owner = (void *)&EEVEE_depth_of_field_init; + const float *fullres = DRW_viewport_size_get(); + int res[2] = {divide_ceil_u(fullres[0], 2), divide_ceil_u(fullres[1], 2)}; + int input_size[2]; + GPU_texture_get_mipmap_size(txl->dof_reduced_color, 0, input_size); + float uv_correction_fac[2] = {res[0] / (float)input_size[0], res[1] / (float)input_size[1]}; + float output_texel_size[2] = {1.0f / res[0], 1.0f / res[1]}; + const bool use_bokeh_tx = (fx->dof_bokeh_gather_lut_tx != NULL); + + { + DRW_PASS_CREATE(psl->dof_gather_fg_holefill, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_gather_get(DOF_GATHER_HOLEFILL, false); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_gather_fg_holefill); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBufferBilinear", &txl->dof_reduced_color, WITH_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &txl->dof_reduced_color, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "cocBuffer", &txl->dof_reduced_coc, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesFgBuffer", &fx->dof_coc_dilated_tiles_fg_tx); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesBgBuffer", &fx->dof_coc_dilated_tiles_bg_tx); + DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex()); + DRW_shgroup_uniform_vec2_copy(grp, "gatherInputUvCorrection", uv_correction_fac); + DRW_shgroup_uniform_vec2_copy(grp, "gatherOutputTexelSize", output_texel_size); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + /* Reuse textures from the setup pass. */ + /* NOTE: We could use the texture pool do that for us but it does not track usage and it might + * backfire (it does in practice). */ + fx->dof_fg_holefill_color_tx = fx->dof_half_res_color_tx; + fx->dof_fg_holefill_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner); + + GPU_framebuffer_ensure_config(&fbl->dof_gather_fg_holefill_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_weight_tx), + }); + } + { + DRW_PASS_CREATE(psl->dof_gather_fg, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_gather_get(DOF_GATHER_FOREGROUND, use_bokeh_tx); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_gather_fg); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBufferBilinear", &txl->dof_reduced_color, WITH_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &txl->dof_reduced_color, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "cocBuffer", &txl->dof_reduced_coc, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesFgBuffer", &fx->dof_coc_dilated_tiles_fg_tx); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesBgBuffer", &fx->dof_coc_dilated_tiles_bg_tx); + DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex()); + DRW_shgroup_uniform_vec2_copy(grp, "gatherInputUvCorrection", uv_correction_fac); + DRW_shgroup_uniform_vec2_copy(grp, "gatherOutputTexelSize", output_texel_size); + if (use_bokeh_tx) { + /* Negate to flip bokeh shape. Mimics optical phenomenon. */ + negate_v2(fx->dof_bokeh_aniso); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropy", fx->dof_bokeh_aniso); + DRW_shgroup_uniform_texture_ref(grp, "bokehLut", &fx->dof_bokeh_gather_lut_tx); + /* Restore. */ + negate_v2(fx->dof_bokeh_aniso); + } + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + fx->dof_fg_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner); + fx->dof_fg_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner); + /* Reuse textures from the setup pass. */ + /* NOTE: We could use the texture pool do that for us but it does not track usage and it might + * backfire (it does in practice). */ + fx->dof_fg_occlusion_tx = fx->dof_half_res_coc_tx; + + /* NOTE: First target is holefill texture so we can use the median filter on it. + * See the filter function. */ + GPU_framebuffer_ensure_config(&fbl->dof_gather_fg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_weight_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_occlusion_tx), + }); + } + { + DRW_PASS_CREATE(psl->dof_gather_bg, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_gather_get(DOF_GATHER_BACKGROUND, use_bokeh_tx); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_gather_bg); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBufferBilinear", &txl->dof_reduced_color, WITH_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &txl->dof_reduced_color, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "cocBuffer", &txl->dof_reduced_coc, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesFgBuffer", &fx->dof_coc_dilated_tiles_fg_tx); + DRW_shgroup_uniform_texture_ref(grp, "cocTilesBgBuffer", &fx->dof_coc_dilated_tiles_bg_tx); + DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex()); + DRW_shgroup_uniform_vec2_copy(grp, "gatherInputUvCorrection", uv_correction_fac); + DRW_shgroup_uniform_vec2_copy(grp, "gatherOutputTexelSize", output_texel_size); + if (use_bokeh_tx) { + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropy", fx->dof_bokeh_aniso); + DRW_shgroup_uniform_texture_ref(grp, "bokehLut", &fx->dof_bokeh_gather_lut_tx); + } + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + fx->dof_bg_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner); + fx->dof_bg_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner); + /* Reuse, since only used for scatter. Foreground is processed before background. */ + fx->dof_bg_occlusion_tx = fx->dof_fg_occlusion_tx; + + /* NOTE: First target is holefill texture so we can use the median filter on it. + * See the filter function. */ + GPU_framebuffer_ensure_config(&fbl->dof_gather_bg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_holefill_weight_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_bg_occlusion_tx), + }); + } +} + +/** + * Filter an input buffer using a median filter to reduce noise. + * NOTE: We use the holefill texture as our input to reduce memory usage. + * Thus, the holefill pass cannot be filtered. + **/ +static void dof_filter_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + DRW_PASS_CREATE(psl->dof_filter, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_filter_get(); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_filter); + DRW_shgroup_uniform_texture_ref_ex( + grp, "colorBuffer", &fx->dof_fg_holefill_color_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex( + grp, "weightBuffer", &fx->dof_fg_holefill_weight_tx, NO_FILTERING); + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + + GPU_framebuffer_ensure_config(&fbl->dof_filter_fg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_weight_tx), + }); + + GPU_framebuffer_ensure_config(&fbl->dof_filter_bg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_bg_color_tx), + GPU_ATTACHMENT_TEXTURE(fx->dof_bg_weight_tx), + }); +} + +/** + * Do the Scatter convolution. A sprite is emited for every 4 pixels but is only expanded if the + * pixels are bright enough to be scattered. + **/ +static void dof_scatter_pass_init(EEVEE_FramebufferList *fbl, + EEVEE_PassList *psl, + EEVEE_TextureList *txl, + EEVEE_EffectsInfo *fx) +{ + int input_size[2], target_size[2]; + GPU_texture_get_mipmap_size(fx->dof_half_res_color_tx, 0, input_size); + GPU_texture_get_mipmap_size(fx->dof_bg_color_tx, 0, target_size); + /* Draw a sprite for every four halfres pixels. */ + int sprite_count = (input_size[0] / 2) * (input_size[1] / 2); + float target_texel_size[2] = {1.0f / target_size[0], 1.0f / target_size[1]}; + const bool use_bokeh_tx = (fx->dof_bokeh_gather_lut_tx != NULL); + + { + DRW_PASS_CREATE(psl->dof_scatter_fg, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL); + + const bool is_foreground = true; + GPUShader *sh = EEVEE_shaders_depth_of_field_scatter_get(is_foreground, use_bokeh_tx); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_scatter_fg); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &fx->dof_scatter_src_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "cocBuffer", &txl->dof_reduced_coc, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "occlusionBuffer", &fx->dof_fg_occlusion_tx); + DRW_shgroup_uniform_vec2_copy(grp, "targetTexelSize", target_texel_size); + DRW_shgroup_uniform_int_copy(grp, "spritePerRow", input_size[0] / 2); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropy", fx->dof_bokeh_aniso); + if (use_bokeh_tx) { + /* Negate to flip bokeh shape. Mimics optical phenomenon. */ + negate_v2(fx->dof_bokeh_aniso_inv); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropyInv", fx->dof_bokeh_aniso_inv); + DRW_shgroup_uniform_texture_ref(grp, "bokehLut", &fx->dof_bokeh_scatter_lut_tx); + /* Restore. */ + negate_v2(fx->dof_bokeh_aniso_inv); + } + DRW_shgroup_call_procedural_triangles(grp, NULL, sprite_count); + + GPU_framebuffer_ensure_config(&fbl->dof_scatter_fg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_fg_color_tx), + }); + } + { + DRW_PASS_CREATE(psl->dof_scatter_bg, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL); + + const bool is_foreground = false; + GPUShader *sh = EEVEE_shaders_depth_of_field_scatter_get(is_foreground, use_bokeh_tx); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_scatter_bg); + DRW_shgroup_uniform_texture_ref_ex(grp, "colorBuffer", &fx->dof_scatter_src_tx, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "cocBuffer", &txl->dof_reduced_coc, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "occlusionBuffer", &fx->dof_bg_occlusion_tx); + DRW_shgroup_uniform_vec2_copy(grp, "targetTexelSize", target_texel_size); + DRW_shgroup_uniform_int_copy(grp, "spritePerRow", input_size[0] / 2); + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropy", fx->dof_bokeh_aniso); + if (use_bokeh_tx) { + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropyInv", fx->dof_bokeh_aniso_inv); + DRW_shgroup_uniform_texture_ref(grp, "bokehLut", &fx->dof_bokeh_scatter_lut_tx); + } + DRW_shgroup_call_procedural_triangles(grp, NULL, sprite_count); + + GPU_framebuffer_ensure_config(&fbl->dof_scatter_bg_fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(fx->dof_bg_color_tx), + }); + } +} + +/** + * Recombine the result of the foreground and background processing. Also perform a slight out of + * focus blur to improve geometric continuity. + **/ +static void dof_recombine_pass_init(EEVEE_FramebufferList *UNUSED(fbl), + EEVEE_PassList *psl, + EEVEE_EffectsInfo *fx) +{ + DefaultTextureList *dtxl = DRW_viewport_texture_list_get(); + const bool use_bokeh_tx = (fx->dof_bokeh_gather_lut_tx != NULL); + + DRW_PASS_CREATE(psl->dof_resolve, DRW_STATE_WRITE_COLOR); + + GPUShader *sh = EEVEE_shaders_depth_of_field_resolve_get(use_bokeh_tx, fx->dof_hq_slight_focus); + DRWShadingGroup *grp = DRW_shgroup_create(sh, psl->dof_resolve); + DRW_shgroup_uniform_texture_ref_ex(grp, "fullResColorBuffer", &fx->source_buffer, NO_FILTERING); + DRW_shgroup_uniform_texture_ref_ex(grp, "fullResDepthBuffer", &dtxl->depth, NO_FILTERING); + DRW_shgroup_uniform_texture_ref(grp, "bgColorBuffer", &fx->dof_bg_color_tx); + DRW_shgroup_uniform_texture_ref(grp, "bgWeightBuffer", &fx->dof_bg_weight_tx); + DRW_shgroup_uniform_texture_ref(grp, "bgTileBuffer", &fx->dof_coc_dilated_tiles_bg_tx); + DRW_shgroup_uniform_texture_ref(grp, "fgColorBuffer", &fx->dof_fg_color_tx); + DRW_shgroup_uniform_texture_ref(grp, "fgWeightBuffer", &fx->dof_fg_weight_tx); + DRW_shgroup_uniform_texture_ref(grp, "holefillColorBuffer", &fx->dof_fg_holefill_color_tx); + DRW_shgroup_uniform_texture_ref(grp, "holefillWeightBuffer", &fx->dof_fg_holefill_weight_tx); + DRW_shgroup_uniform_texture_ref(grp, "fgTileBuffer", &fx->dof_coc_dilated_tiles_fg_tx); + DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex()); + DRW_shgroup_uniform_vec4_copy(grp, "cocParams", fx->dof_coc_params); + DRW_shgroup_uniform_float_copy(grp, "bokehMaxSize", fx->dof_bokeh_max_size); + if (use_bokeh_tx) { + DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropyInv", fx->dof_bokeh_aniso_inv); + DRW_shgroup_uniform_texture_ref(grp, "bokehLut", &fx->dof_bokeh_resolve_lut_tx); + } + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); +} + +void EEVEE_depth_of_field_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata) +{ + EEVEE_TextureList *txl = vedata->txl; + EEVEE_FramebufferList *fbl = vedata->fbl; + EEVEE_PassList *psl = vedata->psl; + EEVEE_StorageList *stl = vedata->stl; + EEVEE_EffectsInfo *fx = stl->effects; + + if ((fx->enabled_effects & EFFECT_DOF) != 0) { + /* GPU_RGBA16F is sufficient now that all scattered bokeh are premultiplied. + * GPU_R11F_G11F_B10F is not enough when lots of scattered sprites are big and offers + * relatively small benefits. */ + fx->dof_color_format = GPU_RGBA16F; + + dof_bokeh_pass_init(fbl, psl, fx); + dof_setup_pass_init(fbl, psl, fx); + dof_flatten_tiles_pass_init(fbl, psl, fx); + dof_dilate_tiles_pass_init(fbl, psl, fx); + dof_reduce_pass_init(fbl, psl, txl, fx); + dof_gather_pass_init(fbl, psl, txl, fx); + dof_filter_pass_init(fbl, psl, fx); + dof_scatter_pass_init(fbl, psl, txl, fx); + dof_recombine_pass_init(fbl, psl, fx); + } +} + +static void dof_recursive_reduce(void *vedata, int UNUSED(level)) +{ + EEVEE_PassList *psl = ((EEVEE_Data *)vedata)->psl; + EEVEE_TextureList *txl = ((EEVEE_Data *)vedata)->txl; + EEVEE_EffectsInfo *fx = ((EEVEE_Data *)vedata)->stl->effects; + + fx->dof_reduce_input_color_tx = txl->dof_reduced_color; + fx->dof_reduce_input_coc_tx = txl->dof_reduced_coc; + + DRW_draw_pass(psl->dof_reduce); } void EEVEE_depth_of_field_draw(EEVEE_Data *vedata) @@ -214,24 +971,77 @@ void EEVEE_depth_of_field_draw(EEVEE_Data *vedata) EEVEE_TextureList *txl = vedata->txl; EEVEE_FramebufferList *fbl = vedata->fbl; EEVEE_StorageList *stl = vedata->stl; - EEVEE_EffectsInfo *effects = stl->effects; + EEVEE_EffectsInfo *effects = stl->effects; /* TODO(fclem): Because of silly SWAP_BUFFERS. */ + EEVEE_EffectsInfo *fx = effects; /* Depth Of Field */ if ((effects->enabled_effects & EFFECT_DOF) != 0) { - const float clear_col[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + DRW_stats_group_start("Depth of Field"); + + if (fx->dof_bokeh_gather_lut_tx != NULL) { + GPU_framebuffer_bind(fbl->dof_bokeh_fb); + DRW_draw_pass(psl->dof_bokeh); + } + + GPU_framebuffer_bind(fbl->dof_setup_fb); + DRW_draw_pass(psl->dof_setup); + + GPU_framebuffer_bind(fbl->dof_flatten_tiles_fb); + DRW_draw_pass(psl->dof_flatten_tiles); - /* Downsample */ - GPU_framebuffer_bind(fbl->dof_down_fb); - DRW_draw_pass(psl->dof_down); + dof_dilate_tiles_pass_draw(fbl, psl, fx); - /* Scatter */ - GPU_framebuffer_bind(fbl->dof_scatter_fb); - GPU_framebuffer_clear_color(fbl->dof_scatter_fb, clear_col); - DRW_draw_pass(psl->dof_scatter); + fx->dof_reduce_input_color_tx = fx->dof_half_res_color_tx; + fx->dof_reduce_input_coc_tx = fx->dof_half_res_coc_tx; - /* Resolve */ - GPU_framebuffer_bind(effects->target_buffer); + /* First step is just a copy. */ + GPU_framebuffer_bind(fbl->dof_downsample_fb); + DRW_draw_pass(psl->dof_downsample); + + /* First step is just a copy. */ + GPU_framebuffer_bind(fbl->dof_reduce_copy_fb); + DRW_draw_pass(psl->dof_reduce_copy); + + GPU_framebuffer_recursive_downsample( + fbl->dof_reduce_fb, fx->dof_reduce_steps, &dof_recursive_reduce, vedata); + + { + /* Foreground convolution. */ + GPU_framebuffer_bind(fbl->dof_gather_fg_fb); + DRW_draw_pass(psl->dof_gather_fg); + + GPU_framebuffer_bind(fbl->dof_filter_fg_fb); + DRW_draw_pass(psl->dof_filter); + + GPU_framebuffer_bind(fbl->dof_scatter_fg_fb); + DRW_draw_pass(psl->dof_scatter_fg); + } + + { + /* Background convolution. */ + GPU_framebuffer_bind(fbl->dof_gather_bg_fb); + DRW_draw_pass(psl->dof_gather_bg); + + GPU_framebuffer_bind(fbl->dof_filter_bg_fb); + DRW_draw_pass(psl->dof_filter); + + GPU_framebuffer_bind(fbl->dof_scatter_bg_fb); + DRW_draw_pass(psl->dof_scatter_bg); + } + + { + /* Holefill convolution. */ + GPU_framebuffer_bind(fbl->dof_gather_fg_holefill_fb); + DRW_draw_pass(psl->dof_gather_fg_holefill); + + /* NOTE: do not filter the holefill pass as we use it as out filter input buffer. */ + } + + GPU_framebuffer_bind(fx->target_buffer); DRW_draw_pass(psl->dof_resolve); + SWAP_BUFFERS(); + + DRW_stats_group_end(); } } diff --git a/source/blender/draw/engines/eevee/eevee_effects.c b/source/blender/draw/engines/eevee/eevee_effects.c index 698b959f1a9..77586f4b43a 100644 --- a/source/blender/draw/engines/eevee/eevee_effects.c +++ b/source/blender/draw/engines/eevee/eevee_effects.c @@ -102,11 +102,9 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata, effects->enabled_effects |= EEVEE_occlusion_init(sldata, vedata); effects->enabled_effects |= EEVEE_screen_raytrace_init(sldata, vedata); - if ((effects->enabled_effects & EFFECT_TAA) && effects->taa_current_sample > 1) { - /* Update matrices here because EEVEE_screen_raytrace_init can have reset the - * taa_current_sample. (See T66811) */ - EEVEE_temporal_sampling_update_matrices(vedata); - } + /* Update matrices here because EEVEE_screen_raytrace_init can have reset the + * taa_current_sample. (See T66811) */ + EEVEE_temporal_sampling_update_matrices(vedata); EEVEE_volumes_init(sldata, vedata); EEVEE_subsurface_init(sldata, vedata); diff --git a/source/blender/draw/engines/eevee/eevee_engine.c b/source/blender/draw/engines/eevee/eevee_engine.c index 802b47b61a4..d1238d7b82e 100644 --- a/source/blender/draw/engines/eevee/eevee_engine.c +++ b/source/blender/draw/engines/eevee/eevee_engine.c @@ -456,13 +456,17 @@ static void eevee_render_to_image(void *vedata, } EEVEE_PrivateData *g_data = ved->stl->g_data; - EEVEE_render_modules_init(vedata, engine, depsgraph); - int initial_frame = CFRA; float initial_subframe = SUBFRA; float shuttertime = (do_motion_blur) ? scene->eevee.motion_blur_shutter : 0.0f; int time_steps_tot = (do_motion_blur) ? max_ii(1, scene->eevee.motion_blur_steps) : 1; - g_data->render_tot_samples = divide_ceil_u(scene->eevee.taa_render_samples, time_steps_tot); + g_data->render_timesteps = time_steps_tot; + + EEVEE_render_modules_init(vedata, engine, depsgraph); + + g_data->render_sample_count_per_timestep = EEVEE_temporal_sampling_sample_count_get(scene, + ved->stl); + /* Compute start time. The motion blur will cover `[time ...time + shuttertime]`. */ float time = initial_frame + initial_subframe; switch (scene->eevee.motion_blur_position) { @@ -553,7 +557,8 @@ static void eevee_render_to_image(void *vedata, /* Actual drawing. */ { - EEVEE_renderpasses_output_init(sldata, vedata, g_data->render_tot_samples * time_steps_tot); + EEVEE_renderpasses_output_init( + sldata, vedata, g_data->render_sample_count_per_timestep * time_steps_tot); EEVEE_temporal_sampling_create_view(vedata); EEVEE_render_draw(vedata, engine, render_layer, rect); diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h index e48f5f9dd32..9761264f03e 100644 --- a/source/blender/draw/engines/eevee/eevee_private.h +++ b/source/blender/draw/engines/eevee/eevee_private.h @@ -55,6 +55,9 @@ extern struct DrawEngineType draw_engine_eevee_type; #define MAX_BLOOM_STEP 16 #define MAX_AOVS 64 +/* Special value chosen to not be altered by depth of field sample count. */ +#define TAA_MAX_SAMPLE 10000926 + // #define DEBUG_SHADOW_DISTRIBUTION /* Only define one of these. */ @@ -206,6 +209,28 @@ typedef enum EEVEE_SSRShaderOptions { SSR_MAX_SHADER = (1 << 4), } EEVEE_SSRShaderOptions; +/* DOF Gather pass shader variations */ +typedef enum EEVEE_DofGatherPass { + DOF_GATHER_FOREGROUND = 0, + DOF_GATHER_BACKGROUND = 1, + DOF_GATHER_HOLEFILL = 2, + + DOF_GATHER_MAX_PASS, +} EEVEE_DofGatherPass; + +#define DOF_TILE_DIVISOR 16 +#define DOF_BOKEH_LUT_SIZE 32 +#define DOF_GATHER_RING_COUNT 5 +#define DOF_DILATE_RING_COUNT 3 +#define DOF_FAST_GATHER_COC_ERROR 0.05 + +#define DOF_SHADER_DEFINES \ + "#define DOF_TILE_DIVISOR " STRINGIFY(DOF_TILE_DIVISOR) "\n" \ + "#define DOF_BOKEH_LUT_SIZE " STRINGIFY(DOF_BOKEH_LUT_SIZE) "\n" \ + "#define DOF_GATHER_RING_COUNT " STRINGIFY(DOF_GATHER_RING_COUNT) "\n" \ + "#define DOF_DILATE_RING_COUNT " STRINGIFY(DOF_DILATE_RING_COUNT) "\n" \ + "#define DOF_FAST_GATHER_COC_ERROR " STRINGIFY(DOF_FAST_GATHER_COC_ERROR) "\n" + /* ************ PROBE UBO ************* */ /* They are the same struct as their Cache siblings. @@ -258,8 +283,20 @@ typedef struct EEVEE_PassList { struct DRWPass *bloom_upsample; struct DRWPass *bloom_resolve; struct DRWPass *bloom_accum_ps; - struct DRWPass *dof_down; - struct DRWPass *dof_scatter; + struct DRWPass *dof_setup; + struct DRWPass *dof_flatten_tiles; + struct DRWPass *dof_dilate_tiles_minmax; + struct DRWPass *dof_dilate_tiles_minabs; + struct DRWPass *dof_reduce_copy; + struct DRWPass *dof_downsample; + struct DRWPass *dof_reduce; + struct DRWPass *dof_bokeh; + struct DRWPass *dof_gather_fg; + struct DRWPass *dof_gather_fg_holefill; + struct DRWPass *dof_gather_bg; + struct DRWPass *dof_scatter_fg; + struct DRWPass *dof_scatter_bg; + struct DRWPass *dof_filter; struct DRWPass *dof_resolve; struct DRWPass *volumetric_world_ps; struct DRWPass *volumetric_objects_ps; @@ -339,8 +376,20 @@ typedef struct EEVEE_FramebufferList { struct GPUFrameBuffer *sss_clear_fb; struct GPUFrameBuffer *sss_translucency_fb; struct GPUFrameBuffer *sss_accum_fb; - struct GPUFrameBuffer *dof_down_fb; - struct GPUFrameBuffer *dof_scatter_fb; + struct GPUFrameBuffer *dof_setup_fb; + struct GPUFrameBuffer *dof_flatten_tiles_fb; + struct GPUFrameBuffer *dof_dilate_tiles_fb; + struct GPUFrameBuffer *dof_downsample_fb; + struct GPUFrameBuffer *dof_reduce_fb; + struct GPUFrameBuffer *dof_reduce_copy_fb; + struct GPUFrameBuffer *dof_bokeh_fb; + struct GPUFrameBuffer *dof_gather_fg_fb; + struct GPUFrameBuffer *dof_filter_fg_fb; + struct GPUFrameBuffer *dof_gather_fg_holefill_fb; + struct GPUFrameBuffer *dof_gather_bg_fb; + struct GPUFrameBuffer *dof_filter_bg_fb; + struct GPUFrameBuffer *dof_scatter_fg_fb; + struct GPUFrameBuffer *dof_scatter_bg_fb; struct GPUFrameBuffer *volumetric_fb; struct GPUFrameBuffer *volumetric_scat_fb; struct GPUFrameBuffer *volumetric_integ_fb; @@ -390,6 +439,9 @@ typedef struct EEVEE_TextureList { struct GPUTexture *cryptomatte; struct GPUTexture *refract_color; struct GPUTexture *taa_history; + /* Could not be pool texture because of mipmapping. */ + struct GPUTexture *dof_reduced_color; + struct GPUTexture *dof_reduced_coc; struct GPUTexture *volume_prop_scattering; struct GPUTexture *volume_prop_extinction; @@ -727,16 +779,45 @@ typedef struct EEVEE_EffectsInfo { struct GPUTexture *velocity_tiles_x_tx; struct GPUTexture *velocity_tiles_tx; /* Depth Of Field */ - float dof_near_far[2]; - float dof_params[2]; - float dof_bokeh[4]; - float dof_bokeh_sides[4]; - int dof_target_size[2]; - struct GPUTexture *dof_down_near; /* Textures from pool */ - struct GPUTexture *dof_down_far; - struct GPUTexture *dof_coc; - struct GPUTexture *dof_blur; - struct GPUTexture *dof_blur_alpha; + float dof_jitter_radius; + float dof_jitter_blades; + float dof_jitter_focus; + int dof_jitter_ring_count; + float dof_coc_params[2], dof_coc_near_dist, dof_coc_far_dist; + float dof_bokeh_blades, dof_bokeh_rotation, dof_bokeh_aniso[2], dof_bokeh_max_size; + float dof_bokeh_aniso_inv[2]; + float dof_scatter_color_threshold; + float dof_scatter_coc_threshold; + float dof_scatter_neighbor_max_color; + float dof_fx_max_coc; + float dof_denoise_factor; + int dof_dilate_slight_focus; + int dof_dilate_ring_count; + int dof_dilate_ring_width_multiplier; + int dof_reduce_steps; + bool dof_hq_slight_focus; + eGPUTextureFormat dof_color_format; + struct GPUTexture *dof_bg_color_tx; /* All textures from pool... */ + struct GPUTexture *dof_bg_occlusion_tx; + struct GPUTexture *dof_bg_weight_tx; + struct GPUTexture *dof_bokeh_gather_lut_tx; + struct GPUTexture *dof_bokeh_scatter_lut_tx; + struct GPUTexture *dof_bokeh_resolve_lut_tx; + struct GPUTexture *dof_coc_dilated_tiles_bg_tx; + struct GPUTexture *dof_coc_dilated_tiles_fg_tx; + struct GPUTexture *dof_coc_tiles_bg_tx; + struct GPUTexture *dof_coc_tiles_fg_tx; + struct GPUTexture *dof_downsample_tx; + struct GPUTexture *dof_fg_color_tx; + struct GPUTexture *dof_fg_occlusion_tx; + struct GPUTexture *dof_fg_weight_tx; + struct GPUTexture *dof_fg_holefill_color_tx; + struct GPUTexture *dof_fg_holefill_weight_tx; + struct GPUTexture *dof_half_res_coc_tx; + struct GPUTexture *dof_half_res_color_tx; + struct GPUTexture *dof_scatter_src_tx; + struct GPUTexture *dof_reduce_input_coc_tx; /* Just references to actual textures. */ + struct GPUTexture *dof_reduce_input_color_tx; /* Alpha Checker */ float color_checker_dark[4]; float color_checker_light[4]; @@ -1002,7 +1083,8 @@ typedef struct EEVEE_PrivateData { /** For rendering planar reflections. */ struct DRWView *planar_views[MAX_PLANAR]; - int render_tot_samples; + int render_timesteps; + int render_sample_count_per_timestep; } EEVEE_PrivateData; /* Transient data */ /* eevee_data.c */ @@ -1110,9 +1192,16 @@ struct GPUShader *EEVEE_shaders_bloom_blit_get(bool high_quality); struct GPUShader *EEVEE_shaders_bloom_downsample_get(bool high_quality); struct GPUShader *EEVEE_shaders_bloom_upsample_get(bool high_quality); struct GPUShader *EEVEE_shaders_bloom_resolve_get(bool high_quality); -struct GPUShader *EEVEE_shaders_depth_of_field_downsample_get(bool use_alpha); -struct GPUShader *EEVEE_shaders_depth_of_field_scatter_get(bool use_alpha); -struct GPUShader *EEVEE_shaders_depth_of_field_resolve_get(bool use_alpha); +struct GPUShader *EEVEE_shaders_depth_of_field_bokeh_get(void); +struct GPUShader *EEVEE_shaders_depth_of_field_setup_get(void); +struct GPUShader *EEVEE_shaders_depth_of_field_flatten_tiles_get(void); +struct GPUShader *EEVEE_shaders_depth_of_field_dilate_tiles_get(bool pass); +struct GPUShader *EEVEE_shaders_depth_of_field_downsample_get(void); +struct GPUShader *EEVEE_shaders_depth_of_field_reduce_get(bool is_copy_pass); +struct GPUShader *EEVEE_shaders_depth_of_field_gather_get(EEVEE_DofGatherPass pass, bool bokeh_tx); +struct GPUShader *EEVEE_shaders_depth_of_field_filter_get(void); +struct GPUShader *EEVEE_shaders_depth_of_field_scatter_get(bool is_foreground, bool bokeh_tx); +struct GPUShader *EEVEE_shaders_depth_of_field_resolve_get(bool use_bokeh_tx, bool use_hq_gather); struct GPUShader *EEVEE_shaders_effect_downsample_sh_get(void); struct GPUShader *EEVEE_shaders_effect_downsample_cube_sh_get(void); struct GPUShader *EEVEE_shaders_effect_minz_downlevel_sh_get(void); @@ -1232,6 +1321,12 @@ void EEVEE_lightprobes_planar_data_from_object(Object *ob, int EEVEE_depth_of_field_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, Object *camera); void EEVEE_depth_of_field_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata); void EEVEE_depth_of_field_draw(EEVEE_Data *vedata); +bool EEVEE_depth_of_field_jitter_get(EEVEE_EffectsInfo *effects, + float r_jitter[2], + float *r_focus_distance); +int EEVEE_depth_of_field_sample_count_get(EEVEE_EffectsInfo *effects, + int sample_count, + int *r_ring_count); /* eevee_bloom.c */ int EEVEE_bloom_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata); @@ -1345,6 +1440,7 @@ int EEVEE_renderpasses_aov_hash(const ViewLayerAOV *aov); /* eevee_temporal_sampling.c */ void EEVEE_temporal_sampling_reset(EEVEE_Data *vedata); void EEVEE_temporal_sampling_create_view(EEVEE_Data *vedata); +int EEVEE_temporal_sampling_sample_count_get(const Scene *scene, const EEVEE_StorageList *stl); int EEVEE_temporal_sampling_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata); void EEVEE_temporal_sampling_offset_calc(const double ht_point[2], const float filter_size, diff --git a/source/blender/draw/engines/eevee/eevee_render.c b/source/blender/draw/engines/eevee/eevee_render.c index 72b12f6daeb..66e3d8ebd22 100644 --- a/source/blender/draw/engines/eevee/eevee_render.c +++ b/source/blender/draw/engines/eevee/eevee_render.c @@ -563,7 +563,7 @@ void EEVEE_render_draw(EEVEE_Data *vedata, RenderEngine *engine, RenderLayer *rl /* Sort transparents before the loop. */ DRW_pass_sort_shgroup_z(psl->transparent_pass); - uint tot_sample = stl->g_data->render_tot_samples; + uint tot_sample = stl->g_data->render_sample_count_per_timestep; uint render_samples = 0; /* SSR needs one iteration to start properly. */ diff --git a/source/blender/draw/engines/eevee/eevee_shaders.c b/source/blender/draw/engines/eevee/eevee_shaders.c index 82b2395cc6e..ec058afd58e 100644 --- a/source/blender/draw/engines/eevee/eevee_shaders.c +++ b/source/blender/draw/engines/eevee/eevee_shaders.c @@ -77,9 +77,16 @@ static struct { struct GPUShader *bloom_resolve_sh[2]; /* Depth Of Field */ - struct GPUShader *dof_downsample_sh[2]; - struct GPUShader *dof_scatter_sh[2]; - struct GPUShader *dof_resolve_sh[2]; + struct GPUShader *dof_bokeh_sh; + struct GPUShader *dof_setup_sh; + struct GPUShader *dof_flatten_tiles_sh; + struct GPUShader *dof_dilate_tiles_sh[2]; + struct GPUShader *dof_downsample_sh; + struct GPUShader *dof_reduce_sh[2]; + struct GPUShader *dof_gather_sh[DOF_GATHER_MAX_PASS][2]; + struct GPUShader *dof_filter_sh; + struct GPUShader *dof_scatter_sh[2][2]; + struct GPUShader *dof_resolve_sh[2][2]; /* General purpose Shaders. */ struct GPUShader *lookdev_background; @@ -192,8 +199,18 @@ extern char datatoc_cubemap_lib_glsl[]; extern char datatoc_default_frag_glsl[]; extern char datatoc_lookdev_world_frag_glsl[]; extern char datatoc_effect_bloom_frag_glsl[]; -extern char datatoc_effect_dof_frag_glsl[]; -extern char datatoc_effect_dof_vert_glsl[]; +extern char datatoc_effect_dof_bokeh_frag_glsl[]; +extern char datatoc_effect_dof_dilate_tiles_frag_glsl[]; +extern char datatoc_effect_dof_downsample_frag_glsl[]; +extern char datatoc_effect_dof_filter_frag_glsl[]; +extern char datatoc_effect_dof_flatten_tiles_frag_glsl[]; +extern char datatoc_effect_dof_gather_frag_glsl[]; +extern char datatoc_effect_dof_lib_glsl[]; +extern char datatoc_effect_dof_reduce_frag_glsl[]; +extern char datatoc_effect_dof_resolve_frag_glsl[]; +extern char datatoc_effect_dof_scatter_frag_glsl[]; +extern char datatoc_effect_dof_scatter_vert_glsl[]; +extern char datatoc_effect_dof_setup_frag_glsl[]; extern char datatoc_effect_downsample_cube_frag_glsl[]; extern char datatoc_effect_downsample_frag_glsl[]; extern char datatoc_effect_gtao_frag_glsl[]; @@ -281,6 +298,7 @@ static void eevee_shader_library_ensure(void) DRW_SHADER_LIB_ADD(e_data.lib, volumetric_lib); DRW_SHADER_LIB_ADD(e_data.lib, closure_lib); DRW_SHADER_LIB_ADD(e_data.lib, ssr_lib); + DRW_SHADER_LIB_ADD(e_data.lib, effect_dof_lib); /* Add one for each Closure */ e_data.closure_lit_lib = BLI_string_joinN(datatoc_closure_lit_lib_glsl, @@ -1010,48 +1028,172 @@ GPUShader *EEVEE_shaders_bloom_resolve_get(bool high_quality) /** \name Depth of field * \{ */ -GPUShader *EEVEE_shaders_depth_of_field_downsample_get(bool use_alpha) +GPUShader *EEVEE_shaders_depth_of_field_bokeh_get(void) { - int index = use_alpha ? 1 : 0; - if (e_data.dof_downsample_sh[index] == NULL) { - e_data.dof_downsample_sh[index] = DRW_shader_create_fullscreen_with_shaderlib( - datatoc_effect_dof_frag_glsl, + if (e_data.dof_bokeh_sh == NULL) { + e_data.dof_bokeh_sh = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_bokeh_frag_glsl, e_data.lib, DOF_SHADER_DEFINES); + } + return e_data.dof_bokeh_sh; +} + +GPUShader *EEVEE_shaders_depth_of_field_setup_get(void) +{ + if (e_data.dof_setup_sh == NULL) { + e_data.dof_setup_sh = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_setup_frag_glsl, e_data.lib, DOF_SHADER_DEFINES); + } + return e_data.dof_setup_sh; +} + +GPUShader *EEVEE_shaders_depth_of_field_flatten_tiles_get(void) +{ + if (e_data.dof_flatten_tiles_sh == NULL) { + e_data.dof_flatten_tiles_sh = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_flatten_tiles_frag_glsl, e_data.lib, DOF_SHADER_DEFINES); + } + return e_data.dof_flatten_tiles_sh; +} + +GPUShader *EEVEE_shaders_depth_of_field_dilate_tiles_get(bool b_pass) +{ + int pass = b_pass; + if (e_data.dof_dilate_tiles_sh[pass] == NULL) { + e_data.dof_dilate_tiles_sh[pass] = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_dilate_tiles_frag_glsl, + e_data.lib, + (pass == 0) ? DOF_SHADER_DEFINES "#define DILATE_MODE_MIN_MAX\n" : + DOF_SHADER_DEFINES "#define DILATE_MODE_MIN_ABS\n"); + } + return e_data.dof_dilate_tiles_sh[pass]; +} + +GPUShader *EEVEE_shaders_depth_of_field_downsample_get(void) +{ + if (e_data.dof_downsample_sh == NULL) { + e_data.dof_downsample_sh = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_downsample_frag_glsl, e_data.lib, DOF_SHADER_DEFINES); + } + return e_data.dof_downsample_sh; +} + +GPUShader *EEVEE_shaders_depth_of_field_reduce_get(bool b_is_copy_pass) +{ + int is_copy_pass = b_is_copy_pass; + if (e_data.dof_reduce_sh[is_copy_pass] == NULL) { + e_data.dof_reduce_sh[is_copy_pass] = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_reduce_frag_glsl, e_data.lib, - use_alpha ? "#define USE_ALPHA_DOF\n" - "#define STEP_DOWNSAMPLE\n" : - "#define STEP_DOWNSAMPLE\n"); + (is_copy_pass) ? DOF_SHADER_DEFINES "#define COPY_PASS\n" : + DOF_SHADER_DEFINES "#define REDUCE_PASS\n"); + } + return e_data.dof_reduce_sh[is_copy_pass]; +} + +GPUShader *EEVEE_shaders_depth_of_field_gather_get(EEVEE_DofGatherPass pass, bool b_use_bokeh_tx) +{ + int use_bokeh_tx = b_use_bokeh_tx; + if (e_data.dof_gather_sh[pass][use_bokeh_tx] == NULL) { + DynStr *ds = BLI_dynstr_new(); + + BLI_dynstr_append(ds, DOF_SHADER_DEFINES); + + switch (pass) { + case DOF_GATHER_FOREGROUND: + BLI_dynstr_append(ds, "#define DOF_FOREGROUND_PASS\n"); + break; + case DOF_GATHER_BACKGROUND: + BLI_dynstr_append(ds, "#define DOF_BACKGROUND_PASS\n"); + break; + case DOF_GATHER_HOLEFILL: + BLI_dynstr_append(ds, + "#define DOF_BACKGROUND_PASS\n" + "#define DOF_HOLEFILL_PASS\n"); + break; + default: + break; + } + + if (use_bokeh_tx) { + BLI_dynstr_append(ds, "#define DOF_BOKEH_TEXTURE\n"); + } + + char *define = BLI_dynstr_get_cstring(ds); + BLI_dynstr_free(ds); + + e_data.dof_gather_sh[pass][use_bokeh_tx] = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_gather_frag_glsl, e_data.lib, define); + + MEM_freeN(define); } - return e_data.dof_downsample_sh[index]; + return e_data.dof_gather_sh[pass][use_bokeh_tx]; } -GPUShader *EEVEE_shaders_depth_of_field_scatter_get(bool use_alpha) +GPUShader *EEVEE_shaders_depth_of_field_filter_get(void) { - int index = use_alpha ? 1 : 0; - if (e_data.dof_scatter_sh[index] == NULL) { - e_data.dof_scatter_sh[index] = DRW_shader_create_with_shaderlib(datatoc_effect_dof_vert_glsl, - NULL, - datatoc_effect_dof_frag_glsl, - e_data.lib, - use_alpha ? - "#define USE_ALPHA_DOF\n" - "#define STEP_SCATTER\n" : - "#define STEP_SCATTER\n"); + if (e_data.dof_filter_sh == NULL) { + e_data.dof_filter_sh = DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_filter_frag_glsl, e_data.lib, DOF_SHADER_DEFINES); } - return e_data.dof_scatter_sh[index]; + return e_data.dof_filter_sh; } -GPUShader *EEVEE_shaders_depth_of_field_resolve_get(bool use_alpha) +GPUShader *EEVEE_shaders_depth_of_field_scatter_get(bool b_is_foreground, bool b_use_bokeh_tx) { - int index = use_alpha ? 1 : 0; - if (e_data.dof_resolve_sh[index] == NULL) { - e_data.dof_resolve_sh[index] = DRW_shader_create_fullscreen_with_shaderlib( - datatoc_effect_dof_frag_glsl, + int is_foreground = b_is_foreground; + int use_bokeh_tx = b_use_bokeh_tx; + if (e_data.dof_scatter_sh[is_foreground][use_bokeh_tx] == NULL) { + DynStr *ds = BLI_dynstr_new(); + + BLI_dynstr_append(ds, DOF_SHADER_DEFINES); + BLI_dynstr_append( + ds, (is_foreground) ? "#define DOF_FOREGROUND_PASS\n" : "#define DOF_BACKGROUND_PASS\n"); + + if (use_bokeh_tx) { + BLI_dynstr_append(ds, "#define DOF_BOKEH_TEXTURE\n"); + } + + char *define = BLI_dynstr_get_cstring(ds); + BLI_dynstr_free(ds); + + e_data.dof_scatter_sh[is_foreground][use_bokeh_tx] = DRW_shader_create_with_shaderlib( + datatoc_effect_dof_scatter_vert_glsl, + NULL, + datatoc_effect_dof_scatter_frag_glsl, e_data.lib, - use_alpha ? "#define USE_ALPHA_DOF\n" - "#define STEP_RESOLVE\n" : - "#define STEP_RESOLVE\n"); + define); + + MEM_freeN(define); + } + return e_data.dof_scatter_sh[is_foreground][use_bokeh_tx]; +} + +GPUShader *EEVEE_shaders_depth_of_field_resolve_get(bool b_use_bokeh_tx, bool b_use_hq_gather) +{ + int use_hq_gather = b_use_hq_gather; + int use_bokeh_tx = b_use_bokeh_tx; + if (e_data.dof_resolve_sh[use_bokeh_tx][use_hq_gather] == NULL) { + DynStr *ds = BLI_dynstr_new(); + + BLI_dynstr_append(ds, DOF_SHADER_DEFINES); + BLI_dynstr_append(ds, "#define DOF_RESOLVE_PASS\n"); + + if (use_bokeh_tx) { + BLI_dynstr_append(ds, "#define DOF_BOKEH_TEXTURE\n"); + } + + BLI_dynstr_appendf(ds, "#define DOF_SLIGHT_FOCUS_DENSITY %d\n", use_hq_gather ? 4 : 2); + + char *define = BLI_dynstr_get_cstring(ds); + BLI_dynstr_free(ds); + + e_data.dof_resolve_sh[use_bokeh_tx][use_hq_gather] = + DRW_shader_create_fullscreen_with_shaderlib( + datatoc_effect_dof_resolve_frag_glsl, e_data.lib, define); + + MEM_freeN(define); } - return e_data.dof_resolve_sh[index]; + return e_data.dof_resolve_sh[use_bokeh_tx][use_hq_gather]; } /* \} */ @@ -1458,6 +1600,27 @@ void EEVEE_shaders_free(void) DRW_SHADER_FREE_SAFE(e_data.velocity_resolve_sh); DRW_SHADER_FREE_SAFE(e_data.taa_resolve_sh); DRW_SHADER_FREE_SAFE(e_data.taa_resolve_reproject_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_bokeh_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_setup_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_flatten_tiles_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_dilate_tiles_sh[0]); + DRW_SHADER_FREE_SAFE(e_data.dof_dilate_tiles_sh[1]); + DRW_SHADER_FREE_SAFE(e_data.dof_downsample_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_reduce_sh[0]); + DRW_SHADER_FREE_SAFE(e_data.dof_reduce_sh[1]); + for (int i = 0; i < DOF_GATHER_MAX_PASS; i++) { + DRW_SHADER_FREE_SAFE(e_data.dof_gather_sh[i][0]); + DRW_SHADER_FREE_SAFE(e_data.dof_gather_sh[i][1]); + } + DRW_SHADER_FREE_SAFE(e_data.dof_filter_sh); + DRW_SHADER_FREE_SAFE(e_data.dof_scatter_sh[0][0]); + DRW_SHADER_FREE_SAFE(e_data.dof_scatter_sh[0][1]); + DRW_SHADER_FREE_SAFE(e_data.dof_scatter_sh[1][0]); + DRW_SHADER_FREE_SAFE(e_data.dof_scatter_sh[1][1]); + DRW_SHADER_FREE_SAFE(e_data.dof_resolve_sh[0][0]); + DRW_SHADER_FREE_SAFE(e_data.dof_resolve_sh[0][1]); + DRW_SHADER_FREE_SAFE(e_data.dof_resolve_sh[1][0]); + DRW_SHADER_FREE_SAFE(e_data.dof_resolve_sh[1][1]); DRW_SHADER_FREE_SAFE(e_data.cryptomatte_sh[0]); DRW_SHADER_FREE_SAFE(e_data.cryptomatte_sh[1]); for (int i = 0; i < 2; i++) { @@ -1465,9 +1628,6 @@ void EEVEE_shaders_free(void) DRW_SHADER_FREE_SAFE(e_data.bloom_downsample_sh[i]); DRW_SHADER_FREE_SAFE(e_data.bloom_upsample_sh[i]); DRW_SHADER_FREE_SAFE(e_data.bloom_resolve_sh[i]); - DRW_SHADER_FREE_SAFE(e_data.dof_downsample_sh[i]); - DRW_SHADER_FREE_SAFE(e_data.dof_scatter_sh[i]); - DRW_SHADER_FREE_SAFE(e_data.dof_resolve_sh[i]); } for (int i = 0; i < SSR_MAX_SHADER; i++) { DRW_SHADER_FREE_SAFE(e_data.ssr_sh[i]); diff --git a/source/blender/draw/engines/eevee/eevee_temporal_sampling.c b/source/blender/draw/engines/eevee/eevee_temporal_sampling.c index 456efeca0f0..8fa7d899b6b 100644 --- a/source/blender/draw/engines/eevee/eevee_temporal_sampling.c +++ b/source/blender/draw/engines/eevee/eevee_temporal_sampling.c @@ -142,16 +142,52 @@ void EEVEE_temporal_sampling_matrices_calc(EEVEE_EffectsInfo *effects, const dou Scene *scene = draw_ctx->scene; RenderData *rd = &scene->r; - float persmat[4][4], viewmat[4][4], winmat[4][4]; + float persmat[4][4], viewmat[4][4], winmat[4][4], wininv[4][4]; DRW_view_persmat_get(NULL, persmat, false); DRW_view_viewmat_get(NULL, viewmat, false); DRW_view_winmat_get(NULL, winmat, false); + DRW_view_winmat_get(NULL, wininv, true); float ofs[2]; EEVEE_temporal_sampling_offset_calc(ht_point, rd->gauss, ofs); window_translate_m4(winmat, persmat, ofs[0] / viewport_size[0], ofs[1] / viewport_size[1]); + /* Jitter is in pixel space. Focus distance in world space units. */ + float dof_jitter[2], focus_distance; + if (EEVEE_depth_of_field_jitter_get(effects, dof_jitter, &focus_distance)) { + /* Convert to NDC space [-1..1]. */ + dof_jitter[0] /= viewport_size[0] * 0.5f; + dof_jitter[1] /= viewport_size[1] * 0.5f; + + /* Skew the projection matrix in the ray direction and offset it to ray origin. + * Make it focus at focus_distance. */ + if (winmat[2][3] != -1.0f) { + /* Orthographic */ + add_v2_v2(winmat[2], dof_jitter); + + window_translate_m4( + winmat, persmat, dof_jitter[0] * focus_distance, dof_jitter[1] * focus_distance); + } + else { + /* Get focus distance in NDC. */ + float focus_pt[3] = {0.0f, 0.0f, -focus_distance}; + mul_project_m4_v3(winmat, focus_pt); + /* Get pixel footprint in viewspace. */ + float jitter_scaled[3] = {dof_jitter[0], dof_jitter[1], focus_pt[2]}; + float center[3] = {0.0f, 0.0f, focus_pt[2]}; + mul_project_m4_v3(wininv, jitter_scaled); + mul_project_m4_v3(wininv, center); + + /* FIXME(fclem) The offset is noticeably large and the culling might make object pop out + * of the bluring radius. To fix this, use custom enlarged culling matrix. */ + sub_v2_v2v2(jitter_scaled, jitter_scaled, center); + add_v2_v2(viewmat[3], jitter_scaled); + + window_translate_m4(winmat, persmat, dof_jitter[0], dof_jitter[1]); + } + } + BLI_assert(effects->taa_view != NULL); /* When rendering just update the view. This avoids recomputing the culling. */ @@ -194,6 +230,21 @@ void EEVEE_temporal_sampling_create_view(EEVEE_Data *vedata) DRW_view_clip_planes_set(effects->taa_view, NULL, 0); } +int EEVEE_temporal_sampling_sample_count_get(const Scene *scene, const EEVEE_StorageList *stl) +{ + const bool is_render = DRW_state_is_image_render(); + int sample_count = is_render ? scene->eevee.taa_render_samples : scene->eevee.taa_samples; + int timesteps = is_render ? stl->g_data->render_timesteps : 1; + + sample_count = max_ii(0, sample_count); + sample_count = (sample_count == 0) ? TAA_MAX_SAMPLE : sample_count; + sample_count = divide_ceil_u(sample_count, timesteps); + + int dof_sample_count = EEVEE_depth_of_field_sample_count_get(stl->effects, sample_count, NULL); + sample_count = dof_sample_count * divide_ceil_u(sample_count, dof_sample_count); + return sample_count; +} + int EEVEE_temporal_sampling_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata) { EEVEE_StorageList *stl = vedata->stl; @@ -238,10 +289,12 @@ int EEVEE_temporal_sampling_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data view_is_valid = view_is_valid && (ED_screen_animation_no_scrub(wm) == NULL); } - const bool first_sample_only = EEVEE_renderpasses_only_first_sample_pass_active(vedata); - view_is_valid = view_is_valid && !first_sample_only; - effects->taa_total_sample = first_sample_only ? 1 : scene_eval->eevee.taa_samples; - MAX2(effects->taa_total_sample, 0); + effects->taa_total_sample = EEVEE_temporal_sampling_sample_count_get(scene_eval, stl); + + if (EEVEE_renderpasses_only_first_sample_pass_active(vedata)) { + view_is_valid = false; + effects->taa_total_sample = 1; + } /* Motion blur steps could reset the sampling when camera is animated (see T79970). */ if (!DRW_state_is_scene_render()) { diff --git a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl index 95a585f0d9c..427657b19b7 100644 --- a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl @@ -12,6 +12,12 @@ uniform sampler2DArray utilTex; #define LUT_SIZE 64 +/** + * Reminder: The 4 noise values are based of 3 uncorrelated blue noises: + * x : Uniformly distributed value [0..1] (noise 1). + * y : Uniformly distributed value [0..1] (noise 2). + * z,w : Uniformly distributed point on the unit circle [-1..1] (noise 3). + **/ #define texelfetch_noise_tex(coord) texelFetch(utilTex, ivec3(ivec2(coord) % LUT_SIZE, 2.0), 0) /* Return texture coordinates to sample Surface LUT */ diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_bokeh_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_bokeh_frag.glsl new file mode 100644 index 00000000000..40e4c5a313e --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_bokeh_frag.glsl @@ -0,0 +1,101 @@ + +/** + * Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or + * the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not + * a perfect circle. + * We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh + * as it is way more complex and expensive to do. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +uniform float bokehSides; +uniform float bokehRotation; +uniform vec2 bokehAnisotropyInv; + +in vec4 uvcoordsvar; + +layout(location = 0) out vec2 outGatherLut; +layout(location = 1) out float outScatterLut; +layout(location = 2) out float outResolveLut; + +float polygon_sides_length(float sides_count) +{ + return 2.0 * sin(M_PI / sides_count); +} + +/* Returns intersection ratio between the radius edge at theta and the polygon edge. + * Start first corners at theta == 0. */ +float circle_to_polygon_radius(float sides_count, float theta) +{ + /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide 36). */ + float side_angle = M_2PI / sides_count; + float halfside_angle = side_angle * 0.5; + return cos(side_angle * 0.5) / + cos(theta - side_angle * floor((sides_count * theta + M_PI) / M_2PI)); +} + +/* Remap input angle to have homogenous spacing of points along a polygon edge. + * Expect theta to be in [0..2pi] range. */ +float circle_to_polygon_angle(float sides_count, float theta) +{ + float side_angle = M_2PI / sides_count; + float halfside_angle = side_angle * 0.5; + float side = floor(theta / side_angle); + /* Length of segment from center to the middle of polygon side. */ + float adjacent = circle_to_polygon_radius(sides_count, 0.0); + + /* This is the relative position of the sample on the polygon half side. */ + float local_theta = theta - side * side_angle; + float ratio = (local_theta - halfside_angle) / halfside_angle; + + float halfside_len = polygon_sides_length(sides_count) * 0.5; + float oposite = ratio * halfside_len; + + /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */ + float final_local_theta = atan(oposite / adjacent); + + return side * side_angle + final_local_theta; +} + +void main() +{ + /* Center uv in range [-1..1]. */ + vec2 uv = uvcoordsvar.xy * 2.0 - 1.0; + + float radius = length(uv); + + vec2 texel = floor(gl_FragCoord.xy) - float(DOF_MAX_SLIGHT_FOCUS_RADIUS); + + if (bokehSides > 0.0) { + /* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */ + float theta = atan(uv.y, uv.x) + M_2PI; + float r = length(uv); + + radius /= circle_to_polygon_radius(bokehSides, theta - bokehRotation); + + float theta_new = circle_to_polygon_angle(bokehSides, theta); + float r_new = circle_to_polygon_radius(bokehSides, theta_new); + + theta_new -= bokehRotation; + + uv = r_new * vec2(-cos(theta_new), sin(theta_new)); + + { + /* Slight focus distance */ + texel *= bokehAnisotropyInv; + float theta = atan(texel.y, -texel.x) + M_2PI; + texel /= circle_to_polygon_radius(bokehSides, theta + bokehRotation); + } + } + else { + uv *= safe_rcp(length(uv)); + } + + /* For gather store the normalized UV. */ + outGatherLut = uv; + /* For scatter store distance. */ + outScatterLut = radius; + /* For slight focus gather store pixel perfect distance. */ + outResolveLut = length(texel); +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl new file mode 100644 index 00000000000..d52a9e6757f --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_dilate_tiles_frag.glsl @@ -0,0 +1,117 @@ + +/** + * Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to + * their neighboorhod. This pass is repeated multiple time until the maximum CoC can be covered. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/* 1/16th of fullres. */ +uniform sampler2D cocTilesFgBuffer; +uniform sampler2D cocTilesBgBuffer; + +uniform int ringCount; +uniform int ringWidthMultiplier; +uniform bool dilateSlightFocus; + +/* 1/16th of fullres. Same format as input. */ +layout(location = 0) out vec4 outFgCoc; +layout(location = 1) out vec3 outBgCoc; + +const float tile_to_fullres_factor = float(DOF_TILE_DIVISOR); + +/* Error introduced by the random offset of the gathering kernel's center. */ +const float bluring_radius_error = 1.0 + 1.0 / (gather_ring_count + 0.5); + +void main() +{ + ivec2 center_tile_pos = ivec2(gl_FragCoord.xy); + + CocTile ring_buckets[DOF_DILATE_RING_COUNT]; + + for (int ring = 0; ring < ringCount && ring < DOF_DILATE_RING_COUNT; ring++) { + ring_buckets[ring] = dof_coc_tile_init(); + + int ring_distance = ring + 1; + for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) { + ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id); + + offset *= ringWidthMultiplier; + + for (int i = 0; i < 2; i++) { + ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset); + + CocTile adj_tile = dof_coc_tile_load(cocTilesFgBuffer, cocTilesBgBuffer, adj_tile_pos); + +#ifdef DILATE_MODE_MIN_MAX + /* Actually gather the "absolute" biggest coc but keeping the sign. */ + ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc); + ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc); + + if (dilateSlightFocus) { + ring_buckets[ring].fg_slight_focus_max_coc = dof_coc_max_slight_focus( + ring_buckets[ring].fg_slight_focus_max_coc, adj_tile.fg_slight_focus_max_coc); + } + +#else /* DILATE_MODE_MIN_ABS */ + ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc); + ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc); + + /* Should be tight as possible to reduce gather overhead (see slide 61). */ + float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) * + tile_to_fullres_factor; + + ring_buckets[ring].fg_max_intersectable_coc = max( + ring_buckets[ring].fg_max_intersectable_coc, + adj_tile.fg_max_intersectable_coc + closest_neighbor_distance); + ring_buckets[ring].bg_min_intersectable_coc = min( + ring_buckets[ring].bg_min_intersectable_coc, + adj_tile.bg_min_intersectable_coc + closest_neighbor_distance); +#endif + } + } + } + + /* Load center tile. */ + CocTile out_tile = dof_coc_tile_load(cocTilesFgBuffer, cocTilesBgBuffer, center_tile_pos); + + /* Dilate once. */ + if (dilateSlightFocus) { + out_tile.fg_slight_focus_max_coc = dof_coc_max_slight_focus( + out_tile.fg_slight_focus_max_coc, ring_buckets[0].fg_slight_focus_max_coc); + } + + for (int ring = 0; ring < ringCount && ring < DOF_DILATE_RING_COUNT; ring++) { + float ring_distance = float(ring + 1); + + ring_distance = (ring_distance * ringWidthMultiplier - 1) * tile_to_fullres_factor; + + /* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */ +#ifdef DILATE_MODE_MIN_MAX + if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc); + } + + if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc); + } + +#else /* DILATE_MODE_MIN_ABS */ + /* Find minimum absolute CoC radii that will be intersected for the previously + * computed maximum CoC values. */ + if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc); + out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc, + ring_buckets[ring].fg_max_intersectable_coc); + } + + if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc); + out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc, + ring_buckets[ring].bg_min_intersectable_coc); + } +#endif + } + + dof_coc_tile_store(out_tile, outFgCoc, outBgCoc); +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl new file mode 100644 index 00000000000..c477e0f6eb8 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_downsample_frag.glsl @@ -0,0 +1,37 @@ + +/** + * Downsample pass: CoC aware downsample to quarter resolution. + * + * Pretty much identical to the setup pass but get CoC from buffer. Also does not + * weight luma for the bilateral weights. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/* Half resolution. */ +uniform sampler2D colorBuffer; +uniform sampler2D cocBuffer; + +/* Quarter resolution. */ +layout(location = 0) out vec4 outColor; + +void main() +{ + vec2 halfres_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy); + /* Center uv around the 4 halfres pixels. */ + vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * halfres_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size; + colors[i] = textureLod(colorBuffer, sample_uv, 0.0); + cocs[i] = textureLod(cocBuffer, sample_uv, 0.0).r; + } + + vec4 weights = dof_downsample_bilateral_coc_weights(cocs); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + outColor = weighted_sum_array(colors, weights); +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_filter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_filter_frag.glsl new file mode 100644 index 00000000000..8fd8215da0f --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_filter_frag.glsl @@ -0,0 +1,93 @@ + +/** + * Gather Filter pass: Filter the gather pass result to reduce noise. + * + * This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if + * cheaper. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +uniform sampler2D colorBuffer; +uniform sampler2D weightBuffer; + +in vec4 uvcoordsvar; + +layout(location = 0) out vec4 outColor; +layout(location = 1) out float outWeight; + +/* From: + * Implementing Median Filters in XC4000E FPGAs + * JOHN L. SMITH, Univision Technologies Inc., Billerica, MA + * http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf + * Figure 1 */ + +/* Outputs low median and high value of a triple. */ +void lmh(vec4 s1, vec4 s2, vec4 s3, out vec4 l, out vec4 m, out vec4 h) +{ + /* From diagram, with nodes numbered from top to bottom. */ + vec4 h1 = max(s2, s3); + vec4 l1 = min(s2, s3); + + vec4 h2 = max(s1, l1); + vec4 l2 = min(s1, l1); + + vec4 h3 = max(h2, h1); + vec4 l3 = min(h2, h1); + + l = l2; + m = l3; + h = h3; +} + +vec4 median_filter(sampler2D tex, vec2 uv) +{ + vec2 texel_size = 1.0 / vec2(textureSize(tex, 0).xy); + vec4 samples[9]; + int s = 0; + + const vec2 ofs[9] = vec2[9](vec2(-1, -1), + vec2(0, -1), + vec2(1, -1), + vec2(-1, 0), + vec2(0, 0), + vec2(1, 0), + vec2(-1, 1), + vec2(0, 1), + vec2(1, 1)); + + for (int s = 0; s < 9; s++) { + samples[s] = textureLod(tex, uv + ofs[s] * texel_size, 0.0); + } + + if (no_gather_filtering) { + return samples[4]; + } + + for (int s = 0; s < 9; s += 3) { + lmh(samples[s], samples[s + 1], samples[s + 2], samples[s], samples[s + 1], samples[s + 2]); + } + /* Some aliases to better understand what's happening. */ + vec4 L123 = samples[0 + 0], L456 = samples[3 + 0], L789 = samples[6 + 0]; + vec4 M123 = samples[0 + 1], M456 = samples[3 + 1], M789 = samples[6 + 1]; + vec4 H123 = samples[0 + 2], H456 = samples[3 + 2], H789 = samples[6 + 2]; + vec4 dummy, l, m, h; + /* Left nodes. */ + h = max(max(L123, L456), L789); + /* Right nodes. */ + l = min(min(H123, H456), H789); + /* Center nodes. */ + lmh(M123, M456, M789, dummy, m, dummy); + /* Last bottom nodes. */ + lmh(l, m, h, dummy, m, dummy); + + return m; +} + +void main() +{ + /* OPTI(fclem) Could early return on some tiles. */ + + outColor = median_filter(colorBuffer, uvcoordsvar.xy); + outWeight = median_filter(weightBuffer, uvcoordsvar.xy).r; +}
\ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl new file mode 100644 index 00000000000..bd81171c759 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_flatten_tiles_frag.glsl @@ -0,0 +1,57 @@ + +/** + * Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles. + * + * Output min and max values for each tile and for both foreground & background. + * Also outputs min intersectable CoC for the background, which is the minimum CoC + * that comes from the background pixels. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/* Half resolution. */ +uniform sampler2D halfResCocBuffer; + +/* 1/8th of halfResCocBuffer resolution. So 1/16th of fullres. */ +layout(location = 0) out vec4 outFgCoc; +layout(location = 1) out vec3 outBgCoc; + +const int halfres_tile_divisor = DOF_TILE_DIVISOR / 2; + +void main() +{ + ivec2 halfres_bounds = textureSize(halfResCocBuffer, 0).xy - 1; + ivec2 tile_co = ivec2(gl_FragCoord.xy); + + CocTile tile = dof_coc_tile_init(); + + for (int x = 0; x < halfres_tile_divisor; x++) { + /* OPTI: Could be done in separate passes. */ + for (int y = 0; y < halfres_tile_divisor; y++) { + ivec2 sample_texel = tile_co * halfres_tile_divisor + ivec2(x, y); + vec2 sample_data = texelFetch(halfResCocBuffer, min(sample_texel, halfres_bounds), 0).rg; + float sample_coc = sample_data.x; + float sample_slight_focus_coc = sample_data.y; + + float fg_coc = min(sample_coc, 0.0); + tile.fg_min_coc = min(tile.fg_min_coc, fg_coc); + tile.fg_max_coc = max(tile.fg_max_coc, fg_coc); + + float bg_coc = max(sample_coc, 0.0); + tile.bg_min_coc = min(tile.bg_min_coc, bg_coc); + tile.bg_max_coc = max(tile.bg_max_coc, bg_coc); + + if (sample_coc > 0.0) { + tile.bg_min_intersectable_coc = min(tile.bg_min_intersectable_coc, bg_coc); + } + if (sample_coc < 0.0) { + tile.fg_max_intersectable_coc = max(tile.fg_max_intersectable_coc, fg_coc); + } + + tile.fg_slight_focus_max_coc = dof_coc_max_slight_focus(tile.fg_slight_focus_max_coc, + sample_slight_focus_coc); + } + } + + dof_coc_tile_store(tile, outFgCoc, outBgCoc); +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_frag.glsl deleted file mode 100644 index 9c1ca17f87c..00000000000 --- a/source/blender/draw/engines/eevee/shaders/effect_dof_frag.glsl +++ /dev/null @@ -1,254 +0,0 @@ - - -#pragma BLENDER_REQUIRE(common_view_lib.glsl) -#pragma BLENDER_REQUIRE(common_math_lib.glsl) - -uniform sampler2D colorBuffer; -uniform sampler2D depthBuffer; - -uniform vec2 dofParams; -uniform bool unpremult; - -#define dof_mul dofParams.x /* distance * aperturesize * invsensorsize */ -#define dof_bias dofParams.y /* aperturesize * invsensorsize */ - -uniform vec4 bokehParams[2]; - -#define bokeh_rotation bokehParams[0].x -#define bokeh_ratio bokehParams[0].y -#define bokeh_maxsize bokehParams[0].z -#define bokeh_sides \ - bokehParams[1] /* Polygon Bokeh shape number of sides (with precomputed vars) */ - -uniform vec2 nearFar; /* Near & far view depths values */ - -/* -------------- Utils ------------- */ - -/* divide by sensor size to get the normalized size */ -#define calculate_coc(zdepth) (dof_mul / zdepth - dof_bias) - -#define linear_depth(z) \ - ((ProjectionMatrix[3][3] == 0.0) ? \ - (nearFar.x * nearFar.y) / (z * (nearFar.x - nearFar.y) + nearFar.y) : \ - z * (nearFar.y - nearFar.x) + nearFar.x) /* Only true for camera view! */ - -#define weighted_sum(a, b, c, d, e) \ - (a * e.x + b * e.y + c * e.z + d * e.w) / max(1e-6, dot(e, vec4(1.0))); - -vec4 safe_color(vec4 c) -{ - /* Clamp to avoid black square artifacts if a pixel goes NaN. */ - return clamp(c, vec4(0.0), vec4(1e20)); /* 1e20 arbitrary. */ -} - -#define THRESHOLD 1.0 - -#ifdef STEP_DOWNSAMPLE - -layout(location = 0) out vec4 nearColor; -layout(location = 1) out vec4 farColor; -layout(location = 2) out vec2 cocData; - -/* Downsample the color buffer to half resolution. - * Weight color samples by - * Compute maximum CoC for near and far blur. */ -void main(void) -{ - ivec4 uvs = ivec4(gl_FragCoord.xyxy) * 2 + ivec4(0, 0, 1, 1); - - /* custom downsampling */ - vec4 color1 = safe_color(texelFetch(colorBuffer, uvs.xy, 0)); - vec4 color2 = safe_color(texelFetch(colorBuffer, uvs.zw, 0)); - vec4 color3 = safe_color(texelFetch(colorBuffer, uvs.zy, 0)); - vec4 color4 = safe_color(texelFetch(colorBuffer, uvs.xw, 0)); - - /* Leverage SIMD by combining 4 depth samples into a vec4 */ - vec4 depth; - depth.r = texelFetch(depthBuffer, uvs.xy, 0).r; - depth.g = texelFetch(depthBuffer, uvs.zw, 0).r; - depth.b = texelFetch(depthBuffer, uvs.zy, 0).r; - depth.a = texelFetch(depthBuffer, uvs.xw, 0).r; - - vec4 zdepth = linear_depth(depth); - - /* Compute signed CoC for each depth samples */ - vec4 coc_near = calculate_coc(zdepth); - vec4 coc_far = -coc_near; - - cocData.x = max(max_v4(coc_near), 0.0); - cocData.y = max(max_v4(coc_far), 0.0); - - /* now we need to write the near-far fields premultiplied by the coc - * also use bilateral weighting by each coc values to avoid bleeding. */ - vec4 near_weights = step(THRESHOLD, coc_near) * clamp(1.0 - abs(cocData.x - coc_near), 0.0, 1.0); - vec4 far_weights = step(THRESHOLD, coc_far) * clamp(1.0 - abs(cocData.y - coc_far), 0.0, 1.0); - -# ifdef USE_ALPHA_DOF - /* Premult */ - color1.rgb *= color1.a; - color2.rgb *= color2.a; - color3.rgb *= color3.a; - color4.rgb *= color4.a; -# endif - - /* now write output to weighted buffers. */ - nearColor = weighted_sum(color1, color2, color3, color4, near_weights); - farColor = weighted_sum(color1, color2, color3, color4, far_weights); -} - -#elif defined(STEP_SCATTER) - -flat in vec4 color; -flat in float weight; -flat in float smoothFac; -flat in ivec2 edge; -/* coordinate used for calculating radius */ -in vec2 particlecoord; - -layout(location = 0) out vec4 fragColor; -# ifdef USE_ALPHA_DOF -layout(location = 1) out float fragAlpha; -# endif - -/* accumulate color in the near/far blur buffers */ -void main(void) -{ - /* Discard to avoid bleeding onto the next layer */ - if (int(gl_FragCoord.x) * edge.x + edge.y > 0) { - discard; - } - - /* Circle Dof */ - float dist = length(particlecoord); - - /* Outside of bokeh shape */ - if (dist > 1.0) { - discard; - } - - /* Regular Polygon Dof */ - if (bokeh_sides.x > 0.0) { - /* Circle parametrization */ - float theta = atan(particlecoord.y, particlecoord.x) + bokeh_rotation; - - /* Optimized version of : - * float denom = theta - (M_2PI / bokeh_sides) * floor((bokeh_sides * theta + M_PI) / M_2PI); - * float r = cos(M_PI / bokeh_sides) / cos(denom); */ - float denom = theta - bokeh_sides.y * floor(bokeh_sides.z * theta + 0.5); - float r = bokeh_sides.w / cos(denom); - - /* Divide circle radial coord by the shape radius for angle theta. - * Giving us the new linear radius to the shape edge. */ - dist /= r; - - /* Outside of bokeh shape */ - if (dist > 1.0) { - discard; - } - } - - fragColor = color; - - /* Smooth the edges a bit. This effectively reduce the bokeh shape - * but does fade out the undersampling artifacts. */ - float shape = smoothstep(1.0, min(0.999, smoothFac), dist); - - fragColor *= shape; - -# ifdef USE_ALPHA_DOF - fragAlpha = fragColor.a; - fragColor.a = weight * shape; -# endif -} - -#elif defined(STEP_RESOLVE) - -# define MERGE_THRESHOLD 4.0 - -uniform sampler2D scatterBuffer; -uniform sampler2D scatterAlphaBuffer; - -in vec4 uvcoordsvar; -out vec4 fragColor; - -vec4 upsample_filter(sampler2D tex, vec2 uv, vec2 texelSize) -{ - /* TODO FIXME: Clamp the sample position - * depending on the layer to avoid bleeding. - * This is not really noticeable so leaving it as is for now. */ - -# if 1 /* 9-tap bilinear upsampler (tent filter) */ - vec4 d = texelSize.xyxy * vec4(1, 1, -1, 0); - - vec4 s; - s = textureLod(tex, uv - d.xy, 0.0); - s += textureLod(tex, uv - d.wy, 0.0) * 2; - s += textureLod(tex, uv - d.zy, 0.0); - - s += textureLod(tex, uv + d.zw, 0.0) * 2; - s += textureLod(tex, uv, 0.0) * 4; - s += textureLod(tex, uv + d.xw, 0.0) * 2; - - s += textureLod(tex, uv + d.zy, 0.0); - s += textureLod(tex, uv + d.wy, 0.0) * 2; - s += textureLod(tex, uv + d.xy, 0.0); - - return s * (1.0 / 16.0); -# else - /* 4-tap bilinear upsampler */ - vec4 d = texelSize.xyxy * vec4(-1, -1, +1, +1) * 0.5; - - vec4 s; - s = textureLod(tex, uv + d.xy, 0.0); - s += textureLod(tex, uv + d.zy, 0.0); - s += textureLod(tex, uv + d.xw, 0.0); - s += textureLod(tex, uv + d.zw, 0.0); - - return s * (1.0 / 4.0); -# endif -} - -/* Combine the Far and Near color buffers */ -void main(void) -{ - vec2 uv = uvcoordsvar.xy; - /* Recompute Near / Far CoC per pixel */ - float depth = textureLod(depthBuffer, uv, 0.0).r; - float zdepth = linear_depth(depth); - float coc_signed = calculate_coc(zdepth); - float coc_far = max(-coc_signed, 0.0); - float coc_near = max(coc_signed, 0.0); - - vec4 focus_col = textureLod(colorBuffer, uv, 0.0); - - vec2 texelSize = vec2(0.5, 1.0) / vec2(textureSize(scatterBuffer, 0)); - vec2 near_uv = uv * vec2(0.5, 1.0); - vec2 far_uv = near_uv + vec2(0.5, 0.0); - vec4 near_col = upsample_filter(scatterBuffer, near_uv, texelSize); - vec4 far_col = upsample_filter(scatterBuffer, far_uv, texelSize); - - float far_w = far_col.a; - float near_w = near_col.a; - float focus_w = 1.0 - smoothstep(1.0, MERGE_THRESHOLD, abs(coc_signed)); - float inv_weight_sum = 1.0 / (near_w + focus_w + far_w); - - focus_col *= focus_w; /* Premul */ - -# ifdef USE_ALPHA_DOF - near_col.a = upsample_filter(scatterAlphaBuffer, near_uv, texelSize).r; - far_col.a = upsample_filter(scatterAlphaBuffer, far_uv, texelSize).r; -# endif - - fragColor = (far_col + near_col + focus_col) * inv_weight_sum; - -# ifdef USE_ALPHA_DOF - /* Sigh... viewport expect premult output but - * the final render output needs to be with - * associated alpha. */ - if (unpremult) { - fragColor.rgb /= (fragColor.a > 0.0) ? fragColor.a : 1.0; - } -# endif -} - -#endif diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl new file mode 100644 index 00000000000..79c95b6d433 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_gather_frag.glsl @@ -0,0 +1,293 @@ + +/** + * Gather pass: Convolve foreground and background parts in separate passes. + * + * Using the min&max CoC tile buffer, we select the best apropriate method to blur the scene color. + * A fast gather path is taken if there is not many CoC variation inside the tile. + * + * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring + * rotation to ensure maximum coverage. + **/ + +#pragma BLENDER_REQUIRE(common_utiltex_lib.glsl) +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/* Mipmapped input buffers, halfres but with padding to ensure mipmap alignement. */ +uniform sampler2D colorBuffer; +uniform sampler2D cocBuffer; + +/* Same input buffer but with a bilinear sampler object. */ +uniform sampler2D colorBufferBilinear; + +/* CoC Min&Max tile buffer at 1/16th of fullres. */ +uniform sampler2D cocTilesFgBuffer; +uniform sampler2D cocTilesBgBuffer; + +uniform sampler2D bokehLut; + +/* Used to correct the padding in the color and CoC buffers. */ +uniform vec2 gatherInputUvCorrection; + +uniform vec2 gatherOutputTexelSize; + +uniform vec2 bokehAnisotropy; + +layout(location = 0) out vec4 outColor; +layout(location = 1) out float outWeight; +#ifndef DOF_HOLEFILL_PASS +layout(location = 2) out vec2 outOcclusion; +#else + +/* Dirty global variable that isn't used. So it should get optimized out. */ +vec2 outOcclusion; +#endif + +#ifdef DOF_FOREGROUND_PASS +const bool is_foreground = true; +#else /* DOF_BACKGROUND_PASS */ +const bool is_foreground = false; +#endif + +const float unit_ring_radius = 1.0 / float(gather_ring_count); +const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5); +const float large_kernel_radius = 0.5 + float(gather_ring_count); +const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring); +/* NOTE(fclem) the bias is reducing issues with density change visible transition. */ +const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius; +const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1); +const float coc_radius_error = 2.0; + +/* Radii needs to be halfres CoC sizes. */ +bool dof_do_density_change(float base_radius, float min_intersectable_radius) +{ + /* Reduce artifact for very large blur. */ + min_intersectable_radius *= 0.1; + + bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius); + bool larger_than_min_density = (base_radius * radius_downscale_factor > + float(gather_ring_count)); + + return need_new_density && larger_than_min_density; +} + +void dof_gather_init(float base_radius, + vec4 noise, + out vec2 center_co, + out float lod, + out float intersection_multiplier) +{ + /* Jitter center half a ring to reduce undersampling. */ + vec2 jitter_ofs = 0.499 * noise.zw * sqrt(noise.x); +#ifdef DOF_BOKEH_TEXTURE + jitter_ofs *= bokehAnisotropy; +#endif + center_co = gl_FragCoord.xy + jitter_ofs * base_radius * unit_sample_radius; + + /* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving + * out of focus shapes. */ + const float lod_bias = -2.0; + lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0); + + if (no_gather_mipmaps) { + lod = 0.0; + } + /* (Slide 64). */ + intersection_multiplier = pow(0.5, lod); +} + +void dof_gather_accumulator(float base_radius, + float min_intersectable_radius, + const bool do_fast_gather, + const bool do_density_change) +{ + vec4 noise = no_gather_random ? vec4(0.0, 0.0, 0.0, 1.0) : texelfetch_noise_tex(gl_FragCoord.xy); + + if (!do_fast_gather) { + /* Jitter the radius to reduce noticeable density changes. */ + base_radius += noise.x * unit_ring_radius * base_radius; + } + else { + /* Jittering the radius more than we need means we are going to feather the bokeh shape half + * a ring. So we need to compensate for fast gather that does not check CoC intersection. */ + base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius; + } + /* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */ + noise.x = fract(noise.x + 0.61803398875); + + float lod, isect_mul; + vec2 center_co; + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + + bool first_ring = true; + + DofGatherData accum_data = GATHER_DATA_INIT; + + int density_change = 0; + for (int ring = gather_ring_count; ring > 0; ring--) { + int sample_pair_count = gather_ring_density * ring; + + float step_rot = M_PI / float(sample_pair_count); + mat2 step_rot_mat = rot2_from_angle(step_rot); + + float angle_offset = noise.y * step_rot; + vec2 offset = vec2(cos(angle_offset), sin(angle_offset)); + + float ring_radius = float(ring) * unit_sample_radius * base_radius; + + /* Slide 38. */ + float bordering_radius = ring_radius + + (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + DofGatherData ring_data = GATHER_DATA_INIT; + for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) { + offset = step_rot_mat * offset; + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + vec2 offset_co = ((i == 0) ? offset : -offset); +#ifdef DOF_BOKEH_TEXTURE + /* Scaling to 0.25 for speed. Improves texture cache hit. */ + offset_co = texture(bokehLut, offset_co * 0.25 + 0.5).rg; + offset_co *= bokehAnisotropy; +#endif + vec2 sample_co = center_co + offset_co * ring_radius; + vec2 sample_uv = sample_co * gatherOutputTexelSize * gatherInputUvCorrection; + if (do_fast_gather) { + pair_data[i].color = dof_load_gather_color(colorBufferBilinear, sample_uv, lod); + } + else { + pair_data[i].color = dof_load_gather_color(colorBuffer, sample_uv, lod); + } + pair_data[i].coc = dof_load_gather_coc(cocBuffer, sample_uv, lod); + pair_data[i].dist = ring_radius; + } + + dof_gather_accumulate_sample_pair(pair_data, + bordering_radius, + isect_mul, + first_ring, + do_fast_gather, + is_foreground, + ring_data, + accum_data); + } + +#ifdef DOF_FOREGROUND_PASS /* Reduce issue with closer foreground over distant foreground. */ + /* TODO(fclem) this seems to not be completely correct as the issue remains. */ + float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) - + sqr(float(ring) - 0.5 + coc_radius_error)) * + sqr(base_radius * unit_sample_radius); + dof_gather_ammend_weight(ring_data, ring_area); +#endif + + dof_gather_accumulate_sample_ring( + ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data); + + first_ring = false; + + if (do_density_change && (ring == change_density_at_ring) && + (density_change < gather_max_density_change)) { + if (dof_do_density_change(base_radius, min_intersectable_radius)) { + base_radius *= radius_downscale_factor; + ring += gather_density_change_ring; + /* We need to account for the density change in the weights (slide 62). + * For that multiply old kernel data by its area divided by the new kernel area. */ + const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor); +#ifndef DOF_FOREGROUND_PASS /* Samples are already weighted per ring in foreground pass. */ + dof_gather_ammend_weight(accum_data, outer_rings_weight); +#endif + /* Re-init kernel position & sampling parameters. */ + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + density_change++; + } + } + } + + { + /* Center sample. */ + vec2 sample_uv = center_co * gatherOutputTexelSize * gatherInputUvCorrection; + DofGatherData center_data; + if (do_fast_gather) { + center_data.color = dof_load_gather_color(colorBufferBilinear, sample_uv, lod); + } + else { + center_data.color = dof_load_gather_color(colorBuffer, sample_uv, lod); + } + center_data.coc = dof_load_gather_coc(cocBuffer, sample_uv, lod); + center_data.dist = 0.0; + + /* Slide 38. */ + float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, do_fast_gather, is_foreground, accum_data); + } + + int total_sample_count = dof_gather_total_sample_count_with_density_change( + gather_ring_count, gather_ring_density, density_change); + dof_gather_accumulate_resolve(total_sample_count, accum_data, outColor, outWeight, outOcclusion); + +#if defined(DOF_DEBUG_GATHER_PERF) + if (density_change > 0) { + float fac = saturate(float(density_change) / float(10.0)); + outColor.rgb = avg(outColor.rgb) * neon_gradient(fac); + } + if (do_fast_gather) { + outColor.rgb = avg(outColor.rgb) * vec3(0.0, 1.0, 0.0); + } +#elif defined(DOF_DEBUG_SCATTER_PERF) + outColor.rgb = avg(outColor.rgb) * vec3(0.0, 1.0, 0.0); +#endif + + /* Output premultiplied color so we can use bilinear sampler in resolve pass. */ + outColor *= outWeight; +} + +void main() +{ + ivec2 tile_co = ivec2(gl_FragCoord.xy / float(DOF_TILE_DIVISOR / 2)); + CocTile coc_tile = dof_coc_tile_load(cocTilesFgBuffer, cocTilesBgBuffer, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + +#if defined(DOF_FOREGROUND_PASS) + float base_radius = -coc_tile.fg_min_coc; + float min_radius = -coc_tile.fg_max_coc; + float min_intersectable_radius = -coc_tile.fg_max_intersectable_coc; + bool can_early_out = !prediction.do_foreground; + +#elif defined(DOF_HOLEFILL_PASS) + float base_radius = -coc_tile.fg_min_coc; + float min_radius = -coc_tile.fg_max_coc; + float min_intersectable_radius = DOF_TILE_LARGE_COC; + bool can_early_out = !prediction.do_holefill; + +#else /* DOF_BACKGROUND_PASS */ + float base_radius = coc_tile.bg_max_coc; + float min_radius = coc_tile.bg_min_coc; + float min_intersectable_radius = coc_tile.bg_min_intersectable_coc; + bool can_early_out = !prediction.do_background; +#endif + + bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground); + + /* Gather at half resolution. Divide CoC by 2. */ + base_radius *= 0.5; + min_intersectable_radius *= 0.5; + + bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius); + + if (can_early_out) { + /* Early out. */ + outColor = vec4(0.0); + outWeight = 0.0; + outOcclusion = vec2(0.0, 0.0); + } + else if (do_fast_gather) { + dof_gather_accumulator(base_radius, min_intersectable_radius, true, false); + } + else if (do_density_change) { + dof_gather_accumulator(base_radius, min_intersectable_radius, false, true); + } + else { + dof_gather_accumulator(base_radius, min_intersectable_radius, false, false); + } +}
\ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl new file mode 100644 index 00000000000..88d83cd913a --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_lib.glsl @@ -0,0 +1,631 @@ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +uniform vec4 cocParams; + +#define cocMul cocParams[0] /* distance * aperturesize * invsensorsize */ +#define cocBias cocParams[1] /* aperturesize * invsensorsize */ +#define cocNear cocParams[2] /* Near view depths value. */ +#define cocFar cocParams[3] /* Far view depths value. */ + +/* -------------- Debug Defines ------------- */ + +// #define DOF_DEBUG_GATHER_PERF +// #define DOF_DEBUG_SCATTER_PERF + +const bool no_smooth_intersection = false; +const bool no_gather_occlusion = false; +const bool no_gather_mipmaps = false; +const bool no_gather_random = false; +const bool no_gather_filtering = false; +const bool no_scatter_occlusion = false; +const bool no_scatter_pass = false; +const bool no_foreground_pass = false; +const bool no_background_pass = false; +const bool no_slight_focus_pass = false; +const bool no_focus_pass = false; +const bool no_holefill_pass = false; + +/* -------------- Quality Defines ------------- */ + +#ifdef DOF_HOLEFILL_PASS +/* No need for very high density for holefill. */ +const int gather_ring_count = 3; +const int gather_ring_density = 3; +const int gather_max_density_change = 0; +const int gather_density_change_ring = 1; +#else +const int gather_ring_count = DOF_GATHER_RING_COUNT; +const int gather_ring_density = 3; +const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */ +const int gather_density_change_ring = 1; +#endif + +/* -------------- Utils ------------- */ + +const vec2 quad_offsets[4] = vec2[4]( + vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5)); + +/* Divide by sensor size to get the normalized size. */ +#define calculate_coc_persp(zdepth) (cocMul / zdepth - cocBias) +#define calculate_coc_ortho(zdepth) ((zdepth + cocMul / cocBias) * cocMul) +#define calculate_coc(z) \ + (ProjectionMatrix[3][3] == 0.0) ? calculate_coc_persp(z) : calculate_coc_ortho(z) + +/* Ortho conversion is only true for camera view! */ +#define linear_depth_persp(d) ((cocNear * cocFar) / (d * (cocNear - cocFar) + cocFar)) +#define linear_depth_ortho(d) (d * (cocNear - cocFar) + cocNear) + +#define linear_depth(d) \ + ((ProjectionMatrix[3][3] == 0.0) ? linear_depth_persp(d) : linear_depth_ortho(d)) + +#define dof_coc_from_zdepth(d) calculate_coc(linear_depth(d)) + +vec4 safe_color(vec4 c) +{ + /* Clamp to avoid black square artifacts if a pixel goes NaN. */ + return clamp(c, vec4(0.0), vec4(1e20)); /* 1e20 arbitrary. */ +} + +float dof_hdr_color_weight(vec4 color) +{ + /* From UE4. Very fast "luma" weighting. */ + float luma = (color.g * 2.0) + (color.r + color.b); + /* TODO(fclem) Pass correct exposure. */ + const float exposure = 1.0; + return 1.0 / (luma * exposure + 4.0); +} + +float dof_coc_select(vec4 cocs) +{ + /* Select biggest coc. */ + float selected_coc = cocs.x; + if (abs(cocs.y) > abs(selected_coc)) { + selected_coc = cocs.y; + } + if (abs(cocs.z) > abs(selected_coc)) { + selected_coc = cocs.z; + } + if (abs(cocs.w) > abs(selected_coc)) { + selected_coc = cocs.w; + } + return selected_coc; +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_downsample_bilateral_coc_weights(vec4 cocs) +{ + float chosen_coc = dof_coc_select(cocs); + + const float scale = 4.0; /* TODO(fclem) revisit. */ + /* NOTE: The difference between the cocs should be inside a abs() function, + * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */ + return saturate(1.0 - (chosen_coc - cocs) * scale); +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_downsample_bilateral_color_weights(vec4 colors[4]) +{ + vec4 weights; + for (int i = 0; i < 4; i++) { + weights[i] = dof_hdr_color_weight(colors[i]); + } + return weights; +} + +/* Makes sure the load functions distribute the energy correctly + * to both scatter and gather passes. */ +vec4 dof_load_gather_color(sampler2D gather_input_color_buffer, vec2 uv, float lod) +{ + vec4 color = textureLod(gather_input_color_buffer, uv, lod); + return color; +} + +vec4 dof_load_scatter_color(sampler2D scatter_input_color_buffer, vec2 uv, float lod) +{ + vec4 color = textureLod(scatter_input_color_buffer, uv, lod); + return color; +} + +float dof_load_gather_coc(sampler2D gather_input_coc_buffer, vec2 uv, float lod) +{ + float coc = textureLod(gather_input_coc_buffer, uv, lod).r; + /* We gather at halfres. CoC must be divided by 2 to be compared against radii. */ + return coc * 0.5; +} + +/* Distribute weights between near/slightfocus/far fields (slide 117). */ +const float layer_threshold = 4.0; +/* Make sure it overlaps. */ +const float layer_offset_fg = 0.5 + 1.0; +/* Extra offset for convolution layers to avoid light leaking from background. */ +const float layer_offset = 0.5 + 0.5; + +#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5 + +float dof_layer_weight(float coc, const bool is_foreground) +{ +/* NOTE: These are fullres pixel CoC value. */ +#ifdef DOF_RESOLVE_PASS + return saturate(-abs(coc) + layer_threshold + layer_offset) * + float(is_foreground ? (coc <= 0.5) : (coc > -0.5)); +#else + coc *= 2.0; /* Account for half pixel gather. */ + float threshold = layer_threshold - ((is_foreground) ? layer_offset_fg : layer_offset); + return saturate(((is_foreground) ? -coc : coc) - threshold); +#endif +} +vec4 dof_layer_weight(vec4 coc) +{ + /* NOTE: Used for scatter pass which already flipped the sign correctly. */ + coc *= 2.0; /* Account for half pixel gather. */ + return saturate(coc - layer_threshold + layer_offset); +} + +/* NOTE: This is halfres CoC radius. */ +float dof_sample_weight(float coc) +{ + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(min_coc, abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +} +vec4 dof_sample_weight(vec4 coc) +{ + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(vec4(min_coc), abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +} + +/* Intersection with the center of the kernel. */ +float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier) +{ + if (no_smooth_intersection) { + return step(0.0, (abs(coc) - distance_from_center)); + } + else { + /* (Slide 64). */ + return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5); + } +} + +/* Returns weight of the sample for the outer bucket (containing previous rings). */ +float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring) +{ + /* First ring has nothing to be mixed against. */ + if (first_ring) { + return 0.0; + } + return saturate(coc - bordering_radius); +} + +bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground) +{ + float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc, + is_foreground); + if (min_weight < 1.0) { + return false; + } + /* FIXME(fclem): This is a workaround to fast gather triggering too early. + * Since we use custom opacity mask, the opacity is not given to be 100% even for + * after normal threshold. */ + if (is_foreground && min_absolute_coc < layer_threshold) { + return false; + } + return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc); +} + +/* ------------------- COC TILES UTILS ------------------- */ + +struct CocTile { + float fg_min_coc; + float fg_max_coc; + float fg_max_intersectable_coc; + float fg_slight_focus_max_coc; + float bg_min_coc; + float bg_max_coc; + float bg_min_intersectable_coc; +}; + +struct CocTilePrediction { + bool do_foreground; + bool do_slight_focus; + bool do_focus; + bool do_background; + bool do_holefill; +}; + +/* WATCH: Might have to change depending on the texture format. */ +#define DOF_TILE_DEFOCUS 0.25 +#define DOF_TILE_FOCUS 0.0 +#define DOF_TILE_MIXED 0.75 +#define DOF_TILE_LARGE_COC 1024.0 + +/* Init a CoC tile for reduction algorithms. */ +CocTile dof_coc_tile_init(void) +{ + CocTile tile; + tile.fg_min_coc = 0.0; + tile.fg_max_coc = -DOF_TILE_LARGE_COC; + tile.fg_max_intersectable_coc = DOF_TILE_LARGE_COC; + tile.fg_slight_focus_max_coc = -1.0; + tile.bg_min_coc = DOF_TILE_LARGE_COC; + tile.bg_max_coc = 0.0; + tile.bg_min_intersectable_coc = DOF_TILE_LARGE_COC; + return tile; +} + +CocTile dof_coc_tile_load(sampler2D fg_buffer, sampler2D bg_buffer, ivec2 tile_co) +{ + ivec2 tex_size = textureSize(fg_buffer, 0).xy; + tile_co = clamp(tile_co, ivec2(0), tex_size - 1); + + vec4 fg = texelFetch(fg_buffer, tile_co, 0); + vec3 bg = texelFetch(bg_buffer, tile_co, 0).xyz; + + CocTile tile; + tile.fg_min_coc = -fg.x; + tile.fg_max_coc = -fg.y; + tile.fg_max_intersectable_coc = -fg.z; + tile.fg_slight_focus_max_coc = fg.w; + tile.bg_min_coc = bg.x; + tile.bg_max_coc = bg.y; + tile.bg_min_intersectable_coc = bg.z; + return tile; +} + +void dof_coc_tile_store(CocTile tile, out vec4 out_fg, out vec3 out_bg) +{ + out_fg.x = -tile.fg_min_coc; + out_fg.y = -tile.fg_max_coc; + out_fg.z = -tile.fg_max_intersectable_coc; + out_fg.w = tile.fg_slight_focus_max_coc; + out_bg.x = tile.bg_min_coc; + out_bg.y = tile.bg_max_coc; + out_bg.z = tile.bg_min_intersectable_coc; +} + +CocTilePrediction dof_coc_tile_prediction_get(CocTile tile) +{ + /* Based on tile value, predict what pass we need to load. */ + CocTilePrediction predict; + + predict.do_foreground = (-tile.fg_min_coc > layer_threshold - layer_offset_fg); + bool fg_fully_opaque = predict.do_foreground && + dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true); + + predict.do_slight_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc >= 0.5); + predict.do_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc == DOF_TILE_FOCUS); + + predict.do_background = !predict.do_focus && !fg_fully_opaque && + (tile.bg_max_coc > layer_threshold - layer_offset); + bool bg_fully_opaque = predict.do_background && + dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false); + predict.do_holefill = !predict.do_focus && !fg_fully_opaque && -tile.fg_max_coc > 0.0; + +#if 0 /* Debug */ + predict.do_foreground = predict.do_background = predict.do_holefill = true; +#endif + return predict; +} + +/* Special function to return the correct max value of 2 slight focus coc. */ +float dof_coc_max_slight_focus(float coc1, float coc2) +{ + /* Do not consider values below 0.5 for expansion as they are "encoded". + * See setup pass shader for more infos. */ + if ((coc1 == DOF_TILE_DEFOCUS && coc2 == DOF_TILE_FOCUS) || + (coc1 == DOF_TILE_FOCUS && coc2 == DOF_TILE_DEFOCUS)) { + /* Tile where completely out of focus and in focus are both present. + * Consider as very slightly out of focus. */ + return DOF_TILE_MIXED; + } + return max(coc1, coc2); +} + +/* ------------------- GATHER UTILS ------------------- */ + +struct DofGatherData { + vec4 color; + float weight; + float dist; /* TODO remove */ + /* For scatter occlusion. */ + float coc; + float coc_sqr; + /* For ring bucket merging. */ + float transparency; + + float layer_opacity; +}; + +#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + +void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight) +{ + sample_data.color *= weight; + sample_data.coc *= weight; + sample_data.coc_sqr *= weight; + sample_data.weight *= weight; +} + +void dof_gather_accumulate_sample(DofGatherData sample_data, + float weight, + inout DofGatherData accum_data) +{ + accum_data.color += sample_data.color * weight; + accum_data.coc += sample_data.coc * weight; + accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight); + accum_data.weight += weight; +} + +void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2], + float bordering_radius, + float intersection_multiplier, + bool first_ring, + const bool do_fast_gather, + const bool is_foreground, + inout DofGatherData ring_data, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + for (int i = 0; i < 2; i++) { + dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data); + accum_data.layer_opacity += 1.0; + } + return; + } + +#if 0 + const float mirroring_threshold = -layer_threshold - layer_offset; + /* TODO(fclem) Promote to parameter? dither with Noise? */ + const float mirroring_min_distance = 15.0; + if (pair_data[0].coc < mirroring_threshold && + (pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) { + pair_data[1].coc = pair_data[0].coc; + } + else if (pair_data[1].coc < mirroring_threshold && + (pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) { + pair_data[0].coc = pair_data[1].coc; + } +#endif + + for (int i = 0; i < 2; i++) { + float sample_weight = dof_sample_weight(pair_data[i].coc); + float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground); + float inter_weight = dof_intersection_weight( + pair_data[i].coc, pair_data[i].dist, intersection_multiplier); + float weight = inter_weight * layer_weight * sample_weight; + + /** + * If a CoC is larger than bordering radius we accumulate it to the general accumulator. + * If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion. + **/ + float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring); + dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data); + dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data); + + accum_data.layer_opacity += layer_weight; + + if (is_foreground) { + ring_data.transparency += 1.0 - inter_weight * layer_weight; + } + else { + float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc; + ring_data.transparency += saturate(coc - bordering_radius); + } + } +} + +void dof_gather_accumulate_sample_ring(DofGatherData ring_data, + int sample_count, + bool first_ring, + const bool do_fast_gather, + /* accum_data occludes the ring_data if true. */ + const bool reversed_occlusion, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + /* Do nothing as ring_data contains nothing. All samples are already in accum_data. */ + return; + } + + if (first_ring) { + /* Layer opacity is directly accumulated into accum_data data. */ + accum_data.color = ring_data.color; + accum_data.coc = ring_data.coc; + accum_data.coc_sqr = ring_data.coc_sqr; + accum_data.weight = ring_data.weight; + + accum_data.transparency = ring_data.transparency / float(sample_count); + return; + } + + if (ring_data.weight == 0.0) { + return; + } + + float ring_avg_coc = ring_data.coc / ring_data.weight; + float accum_avg_coc = accum_data.coc / accum_data.weight; + + /* Smooth test to set opacity to see if the ring average coc occludes the accumulation. + * Test is reversed to be multiplied against opacity. */ + float ring_occlu = saturate(accum_avg_coc - ring_avg_coc); + /* The bias here is arbitrary. Seems to avoid weird looking foreground in most cases. + * We might need to make it a parameter or find a relative bias. */ + float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0); + +#ifdef DOF_RESOLVE_PASS + ring_occlu = accum_occlu = 0.0; +#endif + + if (no_gather_occlusion) { + ring_occlu = 0.0; + accum_occlu = 0.0; + } + + /* (Slide 40) */ + float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count)); + float accum_opacity = 1.0 - accum_data.transparency; + + if (reversed_occlusion) { + /* Accum_data occludes the ring. */ + float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu; + float one_minus_alpha = 1.0 - alpha; + + accum_data.color += ring_data.color * one_minus_alpha; + accum_data.coc += ring_data.coc * one_minus_alpha; + accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha; + accum_data.weight += ring_data.weight * one_minus_alpha; + + accum_data.transparency *= 1.0 - ring_opacity; + } + else { + /* Ring occludes the accum_data (Same as reference). */ + float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu); + float one_minus_alpha = 1.0 - alpha; + + accum_data.color = accum_data.color * one_minus_alpha + ring_data.color; + accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc; + accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr; + accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight; + } +} + +/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for slightfocus gather. */ +int dof_gather_total_sample_count(const int ring_count, const int ring_density) +{ + return (ring_count * ring_count - ring_count) * ring_density + 1; +} + +void dof_gather_accumulate_center_sample(DofGatherData center_data, + float bordering_radius, +#ifdef DOF_RESOLVE_PASS + int i_radius, +#endif + const bool do_fast_gather, + const bool is_foreground, + inout DofGatherData accum_data) +{ + float layer_weight = dof_layer_weight(center_data.coc, is_foreground); + float sample_weight = dof_sample_weight(center_data.coc); + float weight = layer_weight * sample_weight; + float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false); + + if (do_fast_gather) { + /* Hope for the compiler to optimize the above. */ + layer_weight = 1.0; + sample_weight = 1.0; + accum_weight = 1.0; + weight = 1.0; + } + + center_data.transparency = 1.0 - weight; + + dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data); + + if (!do_fast_gather) { +#ifdef DOF_RESOLVE_PASS + /* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */ + int total_sample_count = dof_gather_total_sample_count(i_radius + 1, DOF_SLIGHT_FOCUS_DENSITY); + float fac = saturate(1.0 - abs(center_data.coc) / float(layer_threshold)); + accum_data.layer_opacity += float(total_sample_count) * fac * fac; +#endif + accum_data.layer_opacity += layer_weight; + + /* Logic of dof_gather_accumulate_sample(). */ + weight *= (1.0 - accum_weight); + center_data.coc_sqr = center_data.coc * (center_data.coc * weight); + center_data.color *= weight; + center_data.coc *= weight; + center_data.weight = weight; + +#ifdef DOF_FOREGROUND_PASS /* Reduce issue with closer foreground over distant foreground. */ + float ring_area = sqr(bordering_radius); + dof_gather_ammend_weight(center_data, ring_area); +#endif + + /* Accumulate center as its own ring. */ + dof_gather_accumulate_sample_ring( + center_data, 1, false, do_fast_gather, is_foreground, accum_data); + } +} + +int dof_gather_total_sample_count_with_density_change(const int ring_count, + const int ring_density, + int density_change) +{ + int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) - + dof_gather_total_sample_count( + ring_count - gather_density_change_ring, ring_density); + + return dof_gather_total_sample_count(ring_count, ring_density) + + sample_count_per_density_change * density_change; +} + +void dof_gather_accumulate_resolve(int total_sample_count, + DofGatherData accum_data, + out vec4 out_col, + out float out_weight, + out vec2 out_occlusion) +{ + float weight_inv = safe_rcp(accum_data.weight); + out_col = accum_data.color * weight_inv; + out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv; + +#ifdef DOF_FOREGROUND_PASS + out_weight = 1.0 - accum_data.transparency; +#else + if (accum_data.weight > 0.0) { + out_weight = accum_data.layer_opacity / float(total_sample_count); + } + else { + out_weight = 0.0; + } +#endif + /* Gathering may not accumulate to 1.0 alpha because of float precision. */ + if (out_weight > 0.99) { + out_weight = 1.0; + } + else if (out_weight < 0.01) { + out_weight = 0.0; + } + /* Same thing for alpha channel. */ + if (out_col.a > 0.99) { + out_col.a = 1.0; + } + else if (out_col.a < 0.01) { + out_col.a = 0.0; + } +} + +ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id) +{ + /** + * Generate samples in a square pattern with the ring radius. X is the center tile. + * + * Dist1 Dist2 + * 6 5 4 3 2 + * 3 2 1 7 1 + * . X 0 . X 0 + * . . . . . + * . . . . . + * + * Samples are expected to be mirrored to complete the pattern. + **/ + ivec2 offset; + if (sample_id < ring_distance) { + offset.x = ring_distance; + offset.y = sample_id; + } + else if (sample_id < ring_distance * 3) { + offset.x = ring_distance - sample_id + ring_distance; + offset.y = ring_distance; + } + else { + offset.x = -ring_distance; + offset.y = ring_distance - sample_id + 3 * ring_distance; + } + return offset; +}
\ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl new file mode 100644 index 00000000000..51a139ad343 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_reduce_frag.glsl @@ -0,0 +1,179 @@ + +/** + * Reduce pass: Downsample the color buffer to generate mipmaps. + * Also decide if a pixel is to be convolved by scattering or gathering during the first pass. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/** Inputs: + * COPY_PASS: Is output of setup pass (halfres) and downsample pass (quarter res). + * REDUCE_PASS: Is previous Gather input miplvl (halfres >> miplvl). + **/ +uniform sampler2D colorBuffer; +uniform sampler2D cocBuffer; +uniform sampler2D downsampledBuffer; + +uniform vec2 bokehAnisotropy; +uniform float scatterColorThreshold; +uniform float scatterCocThreshold; +uniform float scatterColorNeighborMax; +uniform float colorNeighborClamping; + +/** Outputs: + * COPY_PASS: Gather input mip0. + * REDUCE_PASS: Is next Gather input miplvl (halfres >> miplvl). + **/ +layout(location = 0) out vec4 outColor; +layout(location = 1) out float outCoc; + +#ifdef COPY_PASS + +layout(location = 2) out vec3 outScatterColor; + +/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */ +float dof_scatter_neighborhood_rejection(vec3 color) +{ + color = min(vec3(scatterColorNeighborMax), color); + + float validity = 0.0; + + /* Centered in the middle of 4 quarter res texel. */ + vec2 texel_size = 1.0 / vec2(textureSize(downsampledBuffer, 0).xy); + vec2 uv = (gl_FragCoord.xy * 0.5) * texel_size; + + vec3 max_diff = vec3(0.0); + for (int i = 0; i < 4; i++) { + vec2 sample_uv = uv + quad_offsets[i] * texel_size; + vec3 ref = textureLod(downsampledBuffer, sample_uv, 0.0).rgb; + + ref = min(vec3(scatterColorNeighborMax), ref); + float diff = max_v3(max(vec3(0.0), abs(ref - color))); + + const float rejection_threshold = 0.7; + diff = saturate(diff / rejection_threshold - 1.0); + validity = max(validity, diff); + } + + return validity; +} + +/* This avoids sprite popping in and out at the screen border and + * drawing sprites larger than the screen. */ +float dof_scatter_screen_border_rejection(float coc, vec2 uv, vec2 screen_size) +{ + vec2 screen_pos = uv * screen_size; + float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos)); + /* Fullres to halfres CoC. */ + coc *= 0.5; + /* Allow 10px transition. */ + const float rejection_hardeness = 1.0 / 10.0; + return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0); +} + +float dof_scatter_luminosity_rejection(vec3 color) +{ + const float rejection_hardness = 1.0; + return saturate(max_v3(color - scatterColorThreshold) * rejection_hardness); +} + +float dof_scatter_coc_radius_rejection(float coc) +{ + const float rejection_hardness = 0.3; + return saturate((abs(coc) - scatterCocThreshold) * rejection_hardness); +} + +float fast_luma(vec3 color) +{ + return (2.0 * color.g) + color.r + color.b; +} + +/* Lightweight version of neighborhood clamping found in TAA. */ +vec3 dof_neighborhood_clamping(vec3 color) +{ + vec2 texel_size = 1.0 / vec2(textureSize(colorBuffer, 0)); + vec2 uv = gl_FragCoord.xy * texel_size; + vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy; + + /* Luma clamping. 3x3 square neighborhood. */ + float c00 = fast_luma(textureLod(colorBuffer, uv + ofs.xz, 0.0).rgb); + float c01 = fast_luma(textureLod(colorBuffer, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb); + float c02 = fast_luma(textureLod(colorBuffer, uv + ofs.xw, 0.0).rgb); + + float c10 = fast_luma(textureLod(colorBuffer, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb); + float c11 = fast_luma(color); + float c12 = fast_luma(textureLod(colorBuffer, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb); + + float c20 = fast_luma(textureLod(colorBuffer, uv + ofs.yz, 0.0).rgb); + float c21 = fast_luma(textureLod(colorBuffer, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb); + float c22 = fast_luma(textureLod(colorBuffer, uv + ofs.yw, 0.0).rgb); + + float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22); + float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22); + + float upper_bound = mix(max_luma, avg_luma, colorNeighborClamping); + upper_bound = mix(c11, upper_bound, colorNeighborClamping); + + float clamped_luma = min(upper_bound, c11); + + return color * clamped_luma * safe_rcp(c11); +} + +/* Simple copy pass where we select what pixels to scatter. Also the resolution might change. + * NOTE: The texture can end up being too big because of the mipmap padding. We correct for + * that during the convolution phase. */ +void main() +{ + vec2 halfres = vec2(textureSize(colorBuffer, 0).xy); + vec2 uv = gl_FragCoord.xy / halfres; + + outColor = textureLod(colorBuffer, uv, 0.0); + outCoc = textureLod(cocBuffer, uv, 0.0).r; + + outColor.rgb = dof_neighborhood_clamping(outColor.rgb); + + /* Only scatter if luminous enough. */ + float do_scatter = dof_scatter_luminosity_rejection(outColor.rgb); + /* Only scatter if CoC is big enough. */ + do_scatter *= dof_scatter_coc_radius_rejection(outCoc); + /* Only scatter if CoC is not too big to avoid performance issues. */ + do_scatter *= dof_scatter_screen_border_rejection(outCoc, uv, halfres); + /* Only scatter if neighborhood is different enough. */ + do_scatter *= dof_scatter_neighborhood_rejection(outColor.rgb); + /* For debuging. */ + do_scatter *= float(!no_scatter_pass); + + outScatterColor = mix(vec3(0.0), outColor.rgb, do_scatter); + outColor.rgb = mix(outColor.rgb, vec3(0.0), do_scatter); + + /* Apply energy conservation to anamorphic scattered bokeh. */ + outScatterColor /= min_v2(bokehAnisotropy); +} + +#else /* REDUCE_PASS */ + +/* Downsample pass done for each mip starting from mip1. */ +void main() +{ + vec2 input_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy); + /* Center uv around the 4 pixels of the previous mip. */ + vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * input_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * input_texel_size; + colors[i] = dof_load_gather_color(colorBuffer, sample_uv, 0.0); + cocs[i] = textureLod(cocBuffer, sample_uv, 0.0).r; + } + + vec4 weights = dof_downsample_bilateral_coc_weights(cocs); + weights *= dof_downsample_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + outColor = weighted_sum_array(colors, weights); + outCoc = dot(cocs, weights); +} + +#endif diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl new file mode 100644 index 00000000000..469745206e2 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl @@ -0,0 +1,212 @@ + +/** + * Recombine Pass: Load separate convolution layer and composite with self slight defocus + * convolution and in-focus fields. + * + * The halfres gather methods are fast but lack precision for small CoC areas. To fix this we + * do a bruteforce gather to have a smooth transition between in-focus and defocus regions. + */ + +#pragma BLENDER_REQUIRE(common_utiltex_lib.glsl) +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +uniform sampler2D fullResColorBuffer; +uniform sampler2D fullResDepthBuffer; + +uniform sampler2D bgColorBuffer; +uniform sampler2D bgWeightBuffer; +uniform sampler2D bgTileBuffer; + +uniform sampler2D fgColorBuffer; +uniform sampler2D fgWeightBuffer; +uniform sampler2D fgTileBuffer; + +uniform sampler2D holefillColorBuffer; +uniform sampler2D holefillWeightBuffer; + +uniform sampler2D bokehLut; + +uniform float bokehMaxSize; + +in vec4 uvcoordsvar; + +out vec4 fragColor; + +void dof_slight_focus_gather(float radius, out vec4 out_color, out float out_weight) +{ + /* offset coord to avoid correlation with sampling pattern. */ + vec4 noise = texelfetch_noise_tex(gl_FragCoord.xy + 7.0); + + DofGatherData fg_accum = GATHER_DATA_INIT; + DofGatherData bg_accum = GATHER_DATA_INIT; + + int i_radius = clamp(int(radius), 0, int(layer_threshold)); + const int resolve_ring_density = DOF_SLIGHT_FOCUS_DENSITY; + ivec2 texel = ivec2(gl_FragCoord.xy); + + bool first_ring = true; + + for (int ring = i_radius; ring > 0; ring--) { + DofGatherData fg_ring = GATHER_DATA_INIT; + DofGatherData bg_ring = GATHER_DATA_INIT; + + int ring_distance = ring; + int ring_sample_count = resolve_ring_density * ring_distance; + for (int sample_id = 0; sample_id < ring_sample_count; sample_id++) { + int s = sample_id * (4 / resolve_ring_density) + + int(noise.y * float((4 - resolve_ring_density) * ring_distance)); + + ivec2 offset = dof_square_ring_sample_offset(ring_distance, s); + float ring_dist = length(vec2(offset)); + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + ivec2 sample_offset = ((i == 0) ? offset : -offset); + ivec2 sample_texel = texel + sample_offset; + /* OPTI: could precompute the factor. */ + vec2 sample_uv = (vec2(sample_texel) + 0.5) / vec2(textureSize(fullResDepthBuffer, 0)); + float depth = textureLod(fullResDepthBuffer, sample_uv, 0.0).r; + pair_data[i].color = safe_color(textureLod(fullResColorBuffer, sample_uv, 0.0)); + pair_data[i].coc = dof_coc_from_zdepth(depth); + pair_data[i].dist = ring_dist; +#ifdef DOF_BOKEH_TEXTURE + /* Contains subpixel distance to bokeh shape. */ + pair_data[i].dist = texelFetch(bokehLut, sample_offset + DOF_MAX_SLIGHT_FOCUS_RADIUS, 0).r; +#endif + pair_data[i].coc = clamp(pair_data[i].coc, -bokehMaxSize, bokehMaxSize); + } + + float bordering_radius = ring_dist + 0.5; + const float isect_mul = 1.0; + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum); + +#ifdef DOF_BOKEH_TEXTURE + /* Swap distances in order to flip bokeh shape for foreground. */ + float tmp = pair_data[0].dist; + pair_data[0].dist = pair_data[1].dist; + pair_data[1].dist = tmp; +#endif + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum); + } + + dof_gather_accumulate_sample_ring( + bg_ring, ring_sample_count * 2, first_ring, false, false, bg_accum); + dof_gather_accumulate_sample_ring( + fg_ring, ring_sample_count * 2, first_ring, false, true, fg_accum); + + first_ring = false; + } + + /* Center sample. */ + vec2 sample_uv = uvcoordsvar.xy; + float depth = textureLod(fullResDepthBuffer, sample_uv, 0.0).r; + DofGatherData center_data; + center_data.color = safe_color(textureLod(fullResColorBuffer, sample_uv, 0.0)); + center_data.coc = dof_coc_from_zdepth(depth); + center_data.coc = clamp(center_data.coc, -bokehMaxSize, bokehMaxSize); + center_data.dist = 0.0; + + /* Slide 38. */ + float bordering_radius = 0.5; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, true, fg_accum); + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, false, bg_accum); + + vec4 bg_col, fg_col; + float bg_weight, fg_weight; + vec2 unused_occlusion; + + int total_sample_count = dof_gather_total_sample_count(i_radius + 1, resolve_ring_density); + dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion); + dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion); + + /* Fix weighting issues on perfectly focus > slight focus transitionning areas. */ + if (abs(center_data.coc) < 0.5) { + bg_col = center_data.color; + bg_weight = 1.0; + } + + /* Alpha Over */ + float alpha = 1.0 - fg_weight; + out_weight = bg_weight * alpha + fg_weight; + out_color = bg_col * bg_weight * alpha + fg_col * fg_weight; +} + +void dof_resolve_load_layer(sampler2D color_tex, + sampler2D weight_tex, + out vec4 out_color, + out float out_weight) +{ + vec2 pixel_co = gl_FragCoord.xy / 2.0; + vec2 uv = pixel_co / textureSize(color_tex, 0).xy; + out_color = textureLod(color_tex, uv, 0.0); + out_weight = textureLod(weight_tex, uv, 0.0).r; +} + +void main(void) +{ + ivec2 tile_co = ivec2(gl_FragCoord.xy / float(DOF_TILE_DIVISOR)); + CocTile coc_tile = dof_coc_tile_load(fgTileBuffer, bgTileBuffer, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + fragColor = vec4(0.0); + float weight = 0.0; + + vec4 layer_color; + float layer_weight; + + if (!no_holefill_pass && prediction.do_holefill) { + dof_resolve_load_layer(holefillColorBuffer, holefillWeightBuffer, layer_color, layer_weight); + fragColor = layer_color * safe_rcp(layer_weight); + weight = float(layer_weight > 0.0); + } + + if (!no_background_pass && prediction.do_background) { + dof_resolve_load_layer(bgColorBuffer, bgWeightBuffer, layer_color, layer_weight); + /* Always prefer background to holefill pass. */ + layer_color *= safe_rcp(layer_weight); + layer_weight = float(layer_weight > 0.0); + /* Composite background. */ + fragColor = fragColor * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + /* Fill holes with the composited background. */ + fragColor *= safe_rcp(weight); + weight = float(weight > 0.0); + } + + if (!no_slight_focus_pass && prediction.do_slight_focus) { + dof_slight_focus_gather(coc_tile.fg_slight_focus_max_coc, layer_color, layer_weight); + /* Composite slight defocus. */ + fragColor = fragColor * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + } + + if (!no_focus_pass && prediction.do_focus) { + layer_color = safe_color(textureLod(fullResColorBuffer, uvcoordsvar.xy, 0.0)); + layer_weight = 1.0; + /* Composite in focus. */ + fragColor = fragColor * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + } + + if (!no_foreground_pass && prediction.do_foreground) { + dof_resolve_load_layer(fgColorBuffer, fgWeightBuffer, layer_color, layer_weight); + /* Composite foreground. */ + fragColor = fragColor * (1.0 - layer_weight) + layer_color; + } + + /* Fix float precision issue in alpha compositing. */ + if (fragColor.a > 0.99) { + fragColor.a = 1.0; + } + +#if 0 /* Debug */ + if (coc_tile.fg_slight_focus_max_coc >= 0.5) { + fragColor.rgb *= vec3(1.0, 0.1, 0.1); + } +#endif +}
\ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl new file mode 100644 index 00000000000..704bbf6d999 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl @@ -0,0 +1,85 @@ + +/** + * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur. + * + * We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader + * invocations and overdraw. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +uniform sampler2D occlusionBuffer; +uniform sampler2D bokehLut; + +uniform vec2 bokehAnisotropyInv; + +flat in vec4 color1; +flat in vec4 color2; +flat in vec4 color3; +flat in vec4 color4; +flat in vec4 weights; +flat in vec4 cocs; +flat in vec2 spritepos; +flat in float spritesize; /* MaxCoC */ + +layout(location = 0) out vec4 fragColor; + +float bokeh_shape(vec2 center) +{ + vec2 co = gl_FragCoord.xy - center; + +#ifdef DOF_BOKEH_TEXTURE + co *= bokehAnisotropyInv; + float texture_size = float(textureSize(bokehLut, 0).x); + /* Bias scale to avoid sampling at the texture's border. */ + float scale_fac = spritesize * (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1)); + float dist = scale_fac * textureLod(bokehLut, (co / scale_fac) * 0.5 + 0.5, 0.0).r; +#else + float dist = length(co); +#endif + + return dist; +} + +#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0)) + +void main(void) +{ + vec4 shapes; + for (int i = 0; i < 4; i++) { + shapes[i] = bokeh_shape(spritepos + quad_offsets[i]); + } + /* Becomes signed distance field in pixel units. */ + shapes -= cocs; + /* Smooth the edges a bit to fade out the undersampling artifacts. */ + shapes = 1.0 - linearstep(-0.8, 0.8, shapes); + /* Outside of bokeh shape. Try to avoid overloading ROPs. */ + if (max_v4(shapes) == 0.0) { + discard; + } + + if (!no_scatter_occlusion) { + /* Works because target is the same size as occlusionBuffer. */ + vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusionBuffer, 0).xy); + vec2 occlusion_data = texture(occlusionBuffer, uv).rg; + /* Fix tilling artifacts. (Slide 90) */ + const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR; + /* Occlude the sprite with geometry from the same field + * using a VSM like chebychev test (slide 85). */ + float mean = occlusion_data.x; + float variance = occlusion_data.x; + shapes *= variance * safe_rcp(variance + sqr(max(cocs * correction_fac - mean, 0.0))); + } + + fragColor = color1 * shapes.x; + fragColor += color2 * shapes.y; + fragColor += color3 * shapes.z; + fragColor += color4 * shapes.w; + + /* Do not accumulate alpha. This has already been accumulated by the gather pass. */ + fragColor.a = 0.0; + +#ifdef DOF_DEBUG_SCATTER_PERF + fragColor.rgb = avg(fragColor.rgb) * vec3(1.0, 0.0, 0.0); +#endif +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl new file mode 100644 index 00000000000..276ab119fab --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_vert.glsl @@ -0,0 +1,138 @@ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +uniform vec2 targetTexelSize; +uniform int spritePerRow; +uniform vec2 bokehAnisotropy; + +uniform sampler2D colorBuffer; +uniform sampler2D cocBuffer; + +/* Scatter pass, calculate a triangle covering the CoC. + * We render to a half resolution target with double width so we can + * separate near and far fields. We also generate only one triangle per group of 4 pixels + * to limit overdraw. */ + +flat out vec4 color1; +flat out vec4 color2; +flat out vec4 color3; +flat out vec4 color4; +flat out vec4 weights; +flat out vec4 cocs; +flat out vec2 spritepos; +flat out float spritesize; + +/* Load 4 Circle of confusion values. texel_co is centered around the 4 taps. */ +vec4 fetch_cocs(vec2 texel_co) +{ + /* TODO(fclem) The textureGather(sampler, co, comp) variant isn't here on some implementations.*/ +#if 0 // GPU_ARB_texture_gather + vec2 uvs = texel_co / vec2(textureSize(cocBuffer, 0)); + /* Reminder: Samples order is CW starting from top left. */ + cocs = textureGather(cocBuffer, uvs, isForegroundPass ? 0 : 1); +#else + ivec2 texel = ivec2(texel_co - 0.5); + vec4 cocs; + cocs.x = texelFetchOffset(cocBuffer, texel, 0, ivec2(0, 1)).r; + cocs.y = texelFetchOffset(cocBuffer, texel, 0, ivec2(1, 1)).r; + cocs.z = texelFetchOffset(cocBuffer, texel, 0, ivec2(1, 0)).r; + cocs.w = texelFetchOffset(cocBuffer, texel, 0, ivec2(0, 0)).r; +#endif + +#ifdef DOF_FOREGROUND_PASS + cocs *= -1.0; +#endif + + cocs = max(vec4(0.0), cocs); + /* We are scattering at half resolution, so divide CoC by 2. */ + return cocs * 0.5; +} + +void vertex_discard() +{ + /* Don't produce any fragments */ + gl_Position = vec4(0.0, 0.0, 0.0, 1.0); +} + +void main() +{ + ivec2 tex_size = textureSize(cocBuffer, 0); + + int t_id = gl_VertexID / 3; /* Triangle Id */ + + /* Some math to get the target pixel. */ + ivec2 texelco = ivec2(t_id % spritePerRow, t_id / spritePerRow) * 2; + + /* Center sprite around the 4 texture taps. */ + spritepos = vec2(texelco) + 1.0; + + cocs = fetch_cocs(spritepos); + + /* Early out from local CoC radius. */ + if (all(lessThan(cocs, vec4(0.5)))) { + vertex_discard(); + return; + } + + vec2 input_texel_size = 1.0 / vec2(tex_size); + vec2 quad_center = spritepos * input_texel_size; + vec4 colors[4]; + bool no_color = true; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * input_texel_size; + + colors[i] = dof_load_scatter_color(colorBuffer, sample_uv, 0.0); + no_color = no_color && all(equal(colors[i].rgb, vec3(0.0))); + } + + /* Early out from no color to scatter. */ + if (no_color) { + vertex_discard(); + return; + } + + weights = dof_layer_weight(cocs) * dof_sample_weight(cocs); + /* Filter NaNs. */ + weights = mix(weights, vec4(0.0), equal(cocs, vec4(0.0))); + + color1 = colors[0] * weights[0]; + color2 = colors[1] * weights[1]; + color3 = colors[2] * weights[2]; + color4 = colors[3] * weights[3]; + + /* Extend to cover at least the unit circle */ + const float extend = (cos(M_PI / 4.0) + 1.0) * 2.0; + /* Crappy diagram + * ex 1 + * | \ + * | \ + * 1 | \ + * | \ + * | \ + * 0 | x \ + * | Circle \ + * | Origin \ + * -1 0 --------------- 2 + * -1 0 1 ex + */ + + /* Generate Triangle : less memory fetches from a VBO */ + int v_id = gl_VertexID % 3; /* Vertex Id */ + gl_Position.x = float(v_id / 2) * extend - 1.0; /* int divisor round down */ + gl_Position.y = float(v_id % 2) * extend - 1.0; + gl_Position.z = 0.0; + gl_Position.w = 1.0; + + spritesize = max_v4(cocs); + + /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin + * and because we smooth the bokeh shape a bit in the pixel shader. */ + gl_Position.xy *= spritesize * bokehAnisotropy + 2.5; + /* Position the sprite. */ + gl_Position.xy += spritepos; + /* NDC range [-1..1]. */ + gl_Position.xy = gl_Position.xy * targetTexelSize * 2.0 - 1.0; + + /* Add 2.5 for the same reason but without the ratio. */ + spritesize += 2.5; +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl new file mode 100644 index 00000000000..d1ecc0fc244 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_setup_frag.glsl @@ -0,0 +1,65 @@ + +/** + * Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer. + * + * An addition to the downsample CoC, we output the maximum slight out of focus CoC to be + * sure we don't miss a pixel. + **/ + +#pragma BLENDER_REQUIRE(effect_dof_lib.glsl) + +/* Full resolution. */ +uniform sampler2D colorBuffer; +uniform sampler2D depthBuffer; + +uniform float bokehMaxSize; + +/* Half resolution. */ +layout(location = 0) out vec4 outColor; +layout(location = 1) out vec2 outCoc; /* x: Downsample CoC, y: Max slight focus abs CoC */ + +void main() +{ + vec2 fullres_texel_size = 1.0 / vec2(textureSize(colorBuffer, 0).xy); + /* Center uv around the 4 fullres pixels. */ + vec2 quad_center = (floor(gl_FragCoord.xy) * 2.0 + 1.0) * fullres_texel_size; + + vec4 colors[4]; + vec4 depths; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size; + colors[i] = safe_color(textureLod(colorBuffer, sample_uv, 0.0)); + depths[i] = textureLod(depthBuffer, sample_uv, 0.0).r; + } + + vec4 cocs = dof_coc_from_zdepth(depths); + + cocs = clamp(cocs, -bokehMaxSize, bokehMaxSize); + + vec4 weights = dof_downsample_bilateral_coc_weights(cocs); + weights *= dof_downsample_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + outColor = weighted_sum_array(colors, weights); + outCoc.x = dot(cocs, weights); + + /* Max slight focus abs CoC. */ + + /* Clamp to 0.5 if full in defocus to differentiate full focus tiles with coc == 0.0. + * This enables an optimization in the resolve pass. */ + const vec4 threshold = vec4(layer_threshold + layer_offset); + cocs = abs(cocs); + bvec4 defocus = greaterThan(cocs, threshold); + bvec4 focus = lessThanEqual(cocs, vec4(0.5)); + if (any(defocus) && any(focus)) { + /* For the same reason as in the flatten pass. This is a case we cannot optimize for. */ + cocs = mix(cocs, vec4(DOF_TILE_MIXED), focus); + cocs = mix(cocs, vec4(DOF_TILE_MIXED), defocus); + } + else { + cocs = mix(cocs, vec4(DOF_TILE_FOCUS), focus); + cocs = mix(cocs, vec4(DOF_TILE_DEFOCUS), defocus); + } + outCoc.y = max_v4(cocs); +} diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_vert.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_vert.glsl deleted file mode 100644 index 6e35d4a54ae..00000000000 --- a/source/blender/draw/engines/eevee/shaders/effect_dof_vert.glsl +++ /dev/null @@ -1,109 +0,0 @@ - -#pragma BLENDER_REQUIRE(common_math_lib.glsl) - -uniform vec4 bokehParams[2]; - -#define bokeh_rotation bokehParams[0].x -#define bokeh_ratio bokehParams[0].y -#define bokeh_maxsize bokehParams[0].z - -uniform sampler2D nearBuffer; -uniform sampler2D farBuffer; -uniform sampler2D cocBuffer; - -flat out vec4 color; -flat out float weight; -flat out float smoothFac; -flat out ivec2 edge; -out vec2 particlecoord; - -/* Scatter pass, calculate a triangle covering the CoC. */ -void main() -{ - ivec2 tex_size = textureSize(cocBuffer, 0); - /* We render to a double width texture so compute - * the target texel size accordingly */ - vec2 texel_size = vec2(0.5, 1.0) / vec2(tex_size); - - int t_id = gl_VertexID / 3; /* Triangle Id */ - - ivec2 texelco = ivec2(0); - /* some math to get the target pixel */ - texelco.x = t_id % tex_size.x; - texelco.y = t_id / tex_size.x; - - vec2 cocs = texelFetch(cocBuffer, texelco, 0).rg; - - bool is_near = (cocs.x > cocs.y); - float coc = (is_near) ? cocs.x : cocs.y; - - /* Clamp to max size for performance */ - coc = min(coc, bokeh_maxsize); - - if (coc >= 1.0) { - if (is_near) { - color = texelFetch(nearBuffer, texelco, 0); - } - else { - color = texelFetch(farBuffer, texelco, 0); - } - /* find the area the pixel will cover and divide the color by it */ - /* HACK: 4.0 out of nowhere (I suppose it's 4 pixels footprint for coc 0?) - * Makes near in focus more closer to 1.0 alpha. */ - weight = 4.0 / (coc * coc * M_PI); - color *= weight; - - /* Compute edge to discard fragment that does not belong to the other layer. */ - edge.x = (is_near) ? 1 : -1; - edge.y = (is_near) ? -tex_size.x + 1 : tex_size.x; - } - else { - /* Don't produce any fragments */ - color = vec4(0.0); - gl_Position = vec4(0.0, 0.0, 0.0, 1.0); - return; - } - - /* Generate Triangle : less memory fetches from a VBO */ - int v_id = gl_VertexID % 3; /* Vertex Id */ - - /* Extend to cover at least the unit circle */ - const float extend = (cos(M_PI / 4.0) + 1.0) * 2.0; - /* Crappy diagram - * ex 1 - * | \ - * | \ - * 1 | \ - * | \ - * | \ - * 0 | x \ - * | Circle \ - * | Origin \ - * -1 0 --------------- 2 - * -1 0 1 ex - */ - gl_Position.x = float(v_id / 2) * extend - 1.0; /* int divisor round down */ - gl_Position.y = float(v_id % 2) * extend - 1.0; - gl_Position.z = 0.0; - gl_Position.w = 1.0; - - /* Generate Triangle */ - particlecoord = gl_Position.xy; - - gl_Position.xy *= coc * texel_size * vec2(bokeh_ratio, 1.0); - gl_Position.xy -= 1.0 - 0.5 * texel_size; /* NDC Bottom left */ - gl_Position.xy += (0.5 + vec2(texelco) * 2.0) * texel_size; - - /* Push far plane to left side. */ - if (!is_near) { - gl_Position.x += 2.0 / 2.0; - } - - /* don't do smoothing for small sprites */ - if (coc > 3.0) { - smoothFac = 1.0 - 1.5 / coc; - } - else { - smoothFac = 1.0; - } -} |