diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2019-06-04 19:26:36 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2019-06-04 19:42:50 +0300 |
commit | 942a748d5d5d3d03c09ae2b0b0767f1b1eb16147 (patch) | |
tree | 1bb83d9578806a72bc8715a1bdd5c845791adc52 /source/blender/draw/engines/eevee/eevee_lights.c | |
parent | ff7401b02c4b9e111c0632f33bf5b51fc63026f6 (diff) |
Eevee: Merge shadow map processing into one drawcall per light
This removes a lot of framebuffer configuration and binding.
Diffstat (limited to 'source/blender/draw/engines/eevee/eevee_lights.c')
-rw-r--r-- | source/blender/draw/engines/eevee/eevee_lights.c | 236 |
1 files changed, 121 insertions, 115 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_lights.c b/source/blender/draw/engines/eevee/eevee_lights.c index 0c2512904f9..c3a9c4bba41 100644 --- a/source/blender/draw/engines/eevee/eevee_lights.c +++ b/source/blender/draw/engines/eevee/eevee_lights.c @@ -49,6 +49,8 @@ static struct { extern char datatoc_shadow_vert_glsl[]; extern char datatoc_shadow_frag_glsl[]; +extern char datatoc_shadow_process_vert_glsl[]; +extern char datatoc_shadow_process_geom_glsl[]; extern char datatoc_shadow_store_frag_glsl[]; extern char datatoc_shadow_copy_frag_glsl[]; extern char datatoc_concentric_samples_lib_glsl[]; @@ -185,26 +187,30 @@ void EEVEE_lights_init(EEVEE_ViewLayerData *sldata) /* only compile the ones needed. reduce startup time. */ if ((sh_method == SHADOW_ESM) && !e_data.shadow_copy_cube_sh[SHADOW_ESM]) { - e_data.shadow_copy_cube_sh[SHADOW_ESM] = DRW_shader_create_fullscreen( - datatoc_shadow_copy_frag_glsl, - "#define ESM\n" - "#define COPY\n"); - e_data.shadow_copy_cascade_sh[SHADOW_ESM] = DRW_shader_create_fullscreen( - datatoc_shadow_copy_frag_glsl, - "#define ESM\n" - "#define COPY\n" - "#define CSM\n"); + e_data.shadow_copy_cube_sh[SHADOW_ESM] = DRW_shader_create(datatoc_shadow_process_vert_glsl, + datatoc_shadow_process_geom_glsl, + datatoc_shadow_copy_frag_glsl, + "#define ESM\n" + "#define COPY\n"); + e_data.shadow_copy_cascade_sh[SHADOW_ESM] = DRW_shader_create(datatoc_shadow_process_vert_glsl, + datatoc_shadow_process_geom_glsl, + datatoc_shadow_copy_frag_glsl, + "#define ESM\n" + "#define COPY\n" + "#define CSM\n"); } else if ((sh_method == SHADOW_VSM) && !e_data.shadow_copy_cube_sh[SHADOW_VSM]) { - e_data.shadow_copy_cube_sh[SHADOW_VSM] = DRW_shader_create_fullscreen( - datatoc_shadow_copy_frag_glsl, - "#define VSM\n" - "#define COPY\n"); - e_data.shadow_copy_cascade_sh[SHADOW_VSM] = DRW_shader_create_fullscreen( - datatoc_shadow_copy_frag_glsl, - "#define VSM\n" - "#define COPY\n" - "#define CSM\n"); + e_data.shadow_copy_cube_sh[SHADOW_VSM] = DRW_shader_create(datatoc_shadow_process_vert_glsl, + datatoc_shadow_process_geom_glsl, + datatoc_shadow_copy_frag_glsl, + "#define VSM\n" + "#define COPY\n"); + e_data.shadow_copy_cascade_sh[SHADOW_VSM] = DRW_shader_create(datatoc_shadow_process_vert_glsl, + datatoc_shadow_process_geom_glsl, + datatoc_shadow_copy_frag_glsl, + "#define VSM\n" + "#define COPY\n" + "#define CSM\n"); } } @@ -239,7 +245,10 @@ static GPUShader *eevee_lights_get_store_sh(int shadow_method, bool high_blur, b char *define_str = BLI_dynstr_get_cstring(ds_frag); BLI_dynstr_free(ds_frag); - *shader = DRW_shader_create_fullscreen(store_shadow_shader_str, define_str); + *shader = DRW_shader_create(datatoc_shadow_process_vert_glsl, + datatoc_shadow_process_geom_glsl, + store_shadow_shader_str, + define_str); MEM_freeN(store_shadow_shader_str); MEM_freeN(define_str); @@ -256,14 +265,13 @@ static DRWPass *eevee_lights_cube_store_pass_get(EEVEE_PassList *psl, bool high_blur = shadow_samples_len > 16; DRWPass **pass = (high_blur) ? &psl->shadow_cube_store_pass : &psl->shadow_cube_store_high_pass; if (*pass == NULL) { - EEVEE_LightsInfo *linfo = sldata->lights; *pass = DRW_pass_create("Shadow Cube Storage Pass", DRW_STATE_WRITE_COLOR); GPUShader *shader = eevee_lights_get_store_sh(shadow_method, high_blur, false); DRWShadingGroup *grp = DRW_shgroup_create(shader, *pass); DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cube_blur); DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo); - DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1); - DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL); + + DRW_shgroup_call_procedural_triangles(grp, NULL, 6); } return *pass; } @@ -277,15 +285,13 @@ static DRWPass *eevee_lights_cascade_store_pass_get(EEVEE_PassList *psl, DRWPass **pass = (high_blur) ? &psl->shadow_cascade_store_pass : &psl->shadow_cascade_store_high_pass; if (*pass == NULL) { - EEVEE_LightsInfo *linfo = sldata->lights; *pass = DRW_pass_create("Shadow Cascade Storage Pass", DRW_STATE_WRITE_COLOR); GPUShader *shader = eevee_lights_get_store_sh(shadow_method, high_blur, true); DRWShadingGroup *grp = DRW_shgroup_create(shader, *pass); DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cascade_blur); DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo); - DRW_shgroup_uniform_int(grp, "cascadeId", &linfo->current_shadow_cascade, 1); - DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1); - DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL); + + DRW_shgroup_call_procedural_triangles(grp, NULL, MAX_CASCADE_NUM); } return *pass; } @@ -325,9 +331,8 @@ void EEVEE_lights_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) psl->shadow_cube_copy_pass); DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cube_target); DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo); - DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1); - DRW_shgroup_uniform_int(grp, "faceId", &linfo->current_shadow_face, 1); - DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL); + + DRW_shgroup_call_procedural_triangles(grp, NULL, 6); } { @@ -337,9 +342,8 @@ void EEVEE_lights_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) psl->shadow_cascade_copy_pass); DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cascade_target); DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo); - DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1); - DRW_shgroup_uniform_int(grp, "cascadeId", &linfo->current_shadow_cascade, 1); - DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL); + + DRW_shgroup_call_procedural_triangles(grp, NULL, MAX_CASCADE_NUM); } { @@ -589,6 +593,10 @@ void EEVEE_lights_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) sldata->shadow_cube_blur = DRW_texture_create_cube( linfo->shadow_cube_size, shadow_pool_format, DRW_TEX_FILTER, NULL); } + GPU_framebuffer_ensure_config( + &sldata->shadow_cube_copy_fb, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(sldata->shadow_cube_blur)}); + if (!sldata->shadow_cube_pool) { sldata->shadow_cube_pool = DRW_texture_create_2d_array(linfo->shadow_cube_store_size, linfo->shadow_cube_store_size, @@ -618,6 +626,10 @@ void EEVEE_lights_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) DRW_TEX_FILTER, NULL); } + GPU_framebuffer_ensure_config( + &sldata->shadow_cascade_copy_fb, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(sldata->shadow_cascade_blur)}); + if (!sldata->shadow_cascade_pool) { sldata->shadow_cascade_pool = DRW_texture_create_2d_array(linfo->shadow_cascade_size, linfo->shadow_cascade_size, @@ -1382,11 +1394,9 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView srd->clip_far = light_attenuation_radius_get(la, light_threshold); srd->stored_texel_size = 1.0 / (float)linfo->shadow_cube_store_size; srd->exponent = la->bleedexp; - copy_v3_v3(srd->position, cube_data->position); - DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); - - eevee_ensure_cube_views(srd->clip_near, srd->clip_far, srd->position, g_data->cube_views); + eevee_ensure_cube_views( + srd->clip_near, srd->clip_far, cube_data->position, g_data->cube_views); /* Render shadow cube */ /* Render 6 faces separately: seems to be faster for the general case. @@ -1406,51 +1416,48 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView */ float filter_texture_size = la->soft * 0.001f; float filter_pixel_size = ceil(filter_texture_size / srd->cube_texel_size); - linfo->filter_size = srd->cube_texel_size * ((filter_pixel_size > 1.0f) ? 1.5f : 0.0f); - /* TODO: OPTI: Filter all faces in one/two draw call */ /* TODO: OPTI: Don't do this intermediate step if no filter is needed. */ - for (linfo->current_shadow_face = 0; linfo->current_shadow_face < 6; - linfo->current_shadow_face++) { + { + srd->filter_size[0] = srd->cube_texel_size * ((filter_pixel_size > 1.0f) ? 1.5f : 0.0f); + srd->view_count = 6; + srd->base_id = 0; + DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); + /* Copy using a small 3x3 box filter */ - GPU_framebuffer_texture_cubeface_attach(sldata->shadow_cube_store_fb, - sldata->shadow_cube_blur, - 0, - linfo->current_shadow_face, - 0); - GPU_framebuffer_bind(sldata->shadow_cube_store_fb); + GPU_framebuffer_bind(sldata->shadow_cube_copy_fb); DRW_draw_pass(psl->shadow_cube_copy_pass); } - /* Push it to shadowmap array */ + { + /* Adjust constants if concentric samples change. */ + const float max_filter_size = 7.5f; + const float magic = 4.5f; /* Dunno why but that works. */ + const int max_sample = 256; - /* Adjust constants if concentric samples change. */ - const float max_filter_size = 7.5f; - const float magic = 4.5f; /* Dunno why but that works. */ - const int max_sample = 256; - - if (filter_pixel_size > 2.0f) { - linfo->filter_size = srd->cube_texel_size * max_filter_size * magic; - filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f); - /* Compute number of concentric samples. Depends directly on filter size. */ - float pix_size_sqr = filter_pixel_size * filter_pixel_size; - srd->shadow_samples_len = min_ii(max_sample, - 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr)); - } - else { - linfo->filter_size = 0.0f; - srd->shadow_samples_len = 4; - } - srd->shadow_samples_len_inv = 1.0f / (float)srd->shadow_samples_len; - DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); + if (filter_pixel_size > 2.0f) { + srd->filter_size[0] = srd->cube_texel_size * max_filter_size * magic; + filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f); + /* Compute number of concentric samples. Depends directly on filter size. */ + float pix_size_sqr = filter_pixel_size * filter_pixel_size; + srd->shadow_samples_len[0] = min_ii( + max_sample, 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr)); + } + else { + srd->filter_size[0] = 0.0f; + srd->shadow_samples_len[0] = 4; + } + srd->view_count = 1; + srd->base_id = evscd->layer_id; + srd->shadow_samples_len_inv[0] = 1.0f / (float)srd->shadow_samples_len[0]; + DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); - GPU_framebuffer_texture_layer_attach( - sldata->shadow_cube_store_fb, sldata->shadow_cube_pool, 0, evscd->layer_id, 0); - GPU_framebuffer_bind(sldata->shadow_cube_store_fb); + DRWPass *store_pass = eevee_lights_cube_store_pass_get( + psl, sldata, linfo->shadow_method, srd->shadow_samples_len[0]); - DRWPass *store_pass = eevee_lights_cube_store_pass_get( - psl, sldata, linfo->shadow_method, srd->shadow_samples_len); - DRW_draw_pass(store_pass); + GPU_framebuffer_bind(sldata->shadow_cube_store_fb); + DRW_draw_pass(store_pass); + } if (linfo->soft_shadows == false) { led->need_update = false; @@ -1477,6 +1484,7 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView srd->clip_near = la->clipsta; srd->clip_far = la->clipend; + srd->view_count = la->cascade_count; srd->stored_texel_size = 1.0 / (float)linfo->shadow_cascade_size; DRW_uniformbuffer_update(sldata->shadow_render_ubo, &linfo->shadow_render_data); @@ -1500,54 +1508,52 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView GPU_framebuffer_clear_depth(sldata->shadow_cascade_target_fb, 1.0f); DRW_draw_pass(psl->shadow_pass); } - - /* TODO: OPTI: Filter all cascade in one/two draw call */ - for (linfo->current_shadow_cascade = 0; linfo->current_shadow_cascade < la->cascade_count; - ++linfo->current_shadow_cascade) { - /* 0.01f factor to convert to percentage */ - float filter_texture_size = la->soft * 0.01f / evscd->radius[linfo->current_shadow_cascade]; - float filter_pixel_size = ceil(linfo->shadow_cascade_size * filter_texture_size); - - /* Copy using a small 3x3 box filter */ + /* Copy using a small 3x3 box filter */ + { /* NOTE: We always do it in the case of CSM because of artifacts in the farthest cascade. */ - linfo->filter_size = srd->stored_texel_size; - GPU_framebuffer_texture_layer_attach(sldata->shadow_cascade_store_fb, - sldata->shadow_cascade_blur, - 0, - linfo->current_shadow_cascade, - 0); - GPU_framebuffer_bind(sldata->shadow_cascade_store_fb); - DRW_draw_pass(psl->shadow_cascade_copy_pass); + copy_v4_fl(srd->filter_size, srd->stored_texel_size); + srd->base_id = 0; + DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); - /* Push it to shadowmap array and blur more */ - - /* Adjust constants if concentric samples change. */ - const float max_filter_size = 7.5f; - const float magic = 3.2f; /* Arbitrary: less banding */ - const int max_sample = 256; - - if (filter_pixel_size > 2.0f) { - linfo->filter_size = srd->stored_texel_size * max_filter_size * magic; - filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f); - /* Compute number of concentric samples. Depends directly on filter size. */ - float pix_size_sqr = filter_pixel_size * filter_pixel_size; - srd->shadow_samples_len = min_ii(max_sample, - 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr)); - } - else { - linfo->filter_size = 0.0f; - srd->shadow_samples_len = 4; + GPU_framebuffer_bind(sldata->shadow_cascade_copy_fb); + DRW_draw_pass(psl->shadow_cascade_copy_pass); + } + /* Push it to shadowmap array and blur more */ + { + int max_pass_sample = 0; + + for (int j = 0; j < la->cascade_count; j++) { + /* 0.01f factor to convert to percentage */ + float filter_texture_size = la->soft * 0.01f / evscd->radius[j]; + float filter_pixel_size = ceil(linfo->shadow_cascade_size * filter_texture_size); + /* Adjust constants if concentric samples change. */ + const float max_filter_size = 7.5f; + const float magic = 3.2f; /* Arbitrary: less banding */ + const int max_sample = 256; + + if (filter_pixel_size > 2.0f) { + srd->filter_size[j] = srd->stored_texel_size * max_filter_size * magic; + filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f); + /* Compute number of concentric samples. Depends directly on filter size. */ + float pix_size_sqr = filter_pixel_size * filter_pixel_size; + srd->shadow_samples_len[j] = min_ii( + max_sample, 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr)); + } + else { + srd->filter_size[j] = 0.0f; + srd->shadow_samples_len[j] = 4; + } + srd->shadow_samples_len_inv[j] = 1.0f / (float)srd->shadow_samples_len[j]; + max_pass_sample = max_ii(max_pass_sample, srd->shadow_samples_len[j]); } - srd->shadow_samples_len_inv = 1.0f / (float)srd->shadow_samples_len; - DRW_uniformbuffer_update(sldata->shadow_render_ubo, &linfo->shadow_render_data); - - int layer = evscd->layer_id + linfo->current_shadow_cascade; - GPU_framebuffer_texture_layer_attach( - sldata->shadow_cascade_store_fb, sldata->shadow_cascade_pool, 0, layer, 0); - GPU_framebuffer_bind(sldata->shadow_cascade_store_fb); + srd->base_id = evscd->layer_id; + DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd); + /* XXX(fclem) this create drawcalls outside of cache generation. */ DRWPass *store_pass = eevee_lights_cascade_store_pass_get( - psl, sldata, linfo->shadow_method, srd->shadow_samples_len); + psl, sldata, linfo->shadow_method, max_pass_sample); + + GPU_framebuffer_bind(sldata->shadow_cascade_store_fb); DRW_draw_pass(store_pass); } } |