diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2017-09-13 16:29:13 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2017-09-13 16:29:38 +0300 |
commit | 9abacf38fd4f3b0de4b6d9a4644b9cb1155482c9 (patch) | |
tree | a7c4caf1b08d7c5b30e0601281b23b8af4700e16 /source | |
parent | 71c1bd1bd870712e27d0d56ca525cf2dbc848639 (diff) |
Eevee: SSR: Making ray count a define rather than an uniform.
The branching introduced by the uniform caused problems on mesa + AMD in the resolve stage.
This patch create one shader per sample count without branching.
This improves performance of a single ray per pixel case (3.0ms against 3.6ms in my testing)
Diffstat (limited to 'source')
-rw-r--r-- | source/blender/draw/engines/eevee/eevee_effects.c | 25 | ||||
-rw-r--r-- | source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl | 58 |
2 files changed, 50 insertions, 33 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_effects.c b/source/blender/draw/engines/eevee/eevee_effects.c index b18d6455893..bcc9986d671 100644 --- a/source/blender/draw/engines/eevee/eevee_effects.c +++ b/source/blender/draw/engines/eevee/eevee_effects.c @@ -60,9 +60,10 @@ typedef struct EEVEE_LightProbeData { /* SSR shader variations */ enum { - SSR_RESOLVE = (1 << 0), - SSR_FULL_TRACE = (1 << 1), - SSR_MAX_SHADER = (1 << 2), + SSR_SAMPLES = (1 << 0) | (1 << 1), + SSR_RESOLVE = (1 << 2), + SSR_FULL_TRACE = (1 << 3), + SSR_MAX_SHADER = (1 << 4), }; static struct { @@ -198,8 +199,11 @@ static struct GPUShader *eevee_effects_ssr_shader_get(int options) char *ssr_shader_str = BLI_dynstr_get_cstring(ds_frag); BLI_dynstr_free(ds_frag); + int samples = (SSR_SAMPLES & options) + 1; + DynStr *ds_defines = BLI_dynstr_new(); BLI_dynstr_appendf(ds_defines, SHADER_DEFINES); + BLI_dynstr_appendf(ds_defines, "#define RAY_COUNT %d\n", samples); if (options & SSR_RESOLVE) { BLI_dynstr_appendf(ds_defines, "#define STEP_RESOLVE\n"); } @@ -856,6 +860,7 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata) if ((effects->enabled_effects & EFFECT_SSR) != 0) { int options = (effects->reflection_trace_full) ? SSR_FULL_TRACE : 0; + options |= (effects->ssr_ray_count - 1); struct GPUShader *trace_shader = eevee_effects_ssr_shader_get(options); struct GPUShader *resolve_shader = eevee_effects_ssr_shader_get(SSR_RESOLVE | options); @@ -871,7 +876,6 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata) DRW_shgroup_uniform_vec4(grp, "viewvecs[0]", (float *)stl->g_data->viewvecs, 2); DRW_shgroup_uniform_vec2(grp, "mipRatio[0]", (float *)stl->g_data->mip_ratio, 10); DRW_shgroup_uniform_vec4(grp, "ssrParameters", &effects->ssr_quality, 1); - DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1); DRW_shgroup_uniform_int(grp, "planar_count", &sldata->probes->num_planar, 1); DRW_shgroup_uniform_float(grp, "maxRoughness", &effects->ssr_max_roughness, 1); DRW_shgroup_uniform_buffer(grp, "planarDepth", &vedata->txl->planar_depth); @@ -900,10 +904,15 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata) DRW_shgroup_uniform_buffer(grp, "probeCubes", &sldata->probe_pool); DRW_shgroup_uniform_buffer(grp, "probePlanars", &vedata->txl->planar_pool); DRW_shgroup_uniform_buffer(grp, "hitBuffer0", &stl->g_data->ssr_hit_output[0]); - DRW_shgroup_uniform_buffer(grp, "hitBuffer1", (effects->ssr_ray_count < 2) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[1]); - DRW_shgroup_uniform_buffer(grp, "hitBuffer2", (effects->ssr_ray_count < 3) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[2]); - DRW_shgroup_uniform_buffer(grp, "hitBuffer3", (effects->ssr_ray_count < 4) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[3]); - DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1); + if (effects->ssr_ray_count > 1) { + DRW_shgroup_uniform_buffer(grp, "hitBuffer1", &stl->g_data->ssr_hit_output[1]); + } + if (effects->ssr_ray_count > 2) { + DRW_shgroup_uniform_buffer(grp, "hitBuffer2", &stl->g_data->ssr_hit_output[2]); + } + if (effects->ssr_ray_count > 3) { + DRW_shgroup_uniform_buffer(grp, "hitBuffer3", &stl->g_data->ssr_hit_output[3]); + } DRW_shgroup_call_add(grp, quad, NULL); } diff --git a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl index 0a958404385..1f3c7822124 100644 --- a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl @@ -111,7 +111,6 @@ void main() if (dot(speccol_roughness.rgb, vec3(1.0)) == 0.0) discard; - float roughness = speccol_roughness.a; float roughnessSquared = max(1e-3, roughness * roughness); float a2 = roughnessSquared * roughnessSquared; @@ -129,8 +128,6 @@ void main() vec3 T, B; make_orthonormal_basis(N, T, B); /* Generate tangent space */ - float ray_ofs = 1.0 / float(rayCount); - /* Planar Reflections */ for (int i = 0; i < MAX_PLANAR && i < planar_count; ++i) { PlanarData pd = planars_data[i]; @@ -144,20 +141,31 @@ void main() tracePosition = transform_point(ViewMatrix, tracePosition); vec3 planeNormal = transform_direction(ViewMatrix, pd.pl_normal); - /* TODO : Raytrace together if textureGather is supported. */ hitData0 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand, 0.0); - if (rayCount > 1) hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs); - if (rayCount > 2) hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0 * ray_ofs); - if (rayCount > 3) hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0 * ray_ofs); +#if (RAY_COUNT > 1) + hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT)); +#endif +#if (RAY_COUNT > 2) + hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0 / float(RAY_COUNT)); +#endif +#if (RAY_COUNT > 3) + hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0 / float(RAY_COUNT)); +#endif return; } } /* TODO : Raytrace together if textureGather is supported. */ hitData0 = do_ssr(V, N, T, B, viewPosition, a2, rand, 0.0); - if (rayCount > 1) hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs); - if (rayCount > 2) hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0 * ray_ofs); - if (rayCount > 3) hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0 * ray_ofs); +#if (RAY_COUNT > 1) + hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT)); +#endif +#if (RAY_COUNT > 2) + hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0 / float(RAY_COUNT)); +#endif +#if (RAY_COUNT > 3) + hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0 / float(RAY_COUNT)); +#endif } #else /* STEP_RESOLVE */ @@ -395,21 +403,21 @@ void main() ssr_accum += get_ssr_sample(hitBuffer0, pd, planar_index, worldPosition, N, V, roughnessSquared, cone_tan, source_uvs, texture_size, target_texel, weight_acc); - if (rayCount > 1) { - ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V, - roughnessSquared, cone_tan, source_uvs, - texture_size, target_texel, weight_acc); - } - if (rayCount > 2) { - ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V, - roughnessSquared, cone_tan, source_uvs, - texture_size, target_texel, weight_acc); - } - if (rayCount > 3) { - ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V, - roughnessSquared, cone_tan, source_uvs, - texture_size, target_texel, weight_acc); - } +#if (RAY_COUNT > 1) + ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V, + roughnessSquared, cone_tan, source_uvs, + texture_size, target_texel, weight_acc); +#endif +#if (RAY_COUNT > 2) + ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V, + roughnessSquared, cone_tan, source_uvs, + texture_size, target_texel, weight_acc); +#endif +#if (RAY_COUNT > 3) + ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V, + roughnessSquared, cone_tan, source_uvs, + texture_size, target_texel, weight_acc); +#endif } } |