Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2017-07-30 18:11:05 +0300
committerClément Foucault <foucault.clem@gmail.com>2017-07-31 16:18:38 +0300
commit683e31fd80d36cffe4c65cfc0e973bb191889eee (patch)
tree2caa4f53f1ee8d203114e9636498cd8fabd2ea43 /source/blender/draw
parent39e1518d413a1af35b39a695b6a2e2d69fd2d35b (diff)
Eevee: SSR: Rewrote the raytracing algorithm.
It now uses a quality slider instead of stride. Lower quality takes larger strides between samples and use lower mips when tracing rough rays. Now raytracing is done entierly in homogeneous coordinate space. This run much faster. Should be fairly optimized. We are still Bandwidth bound. Add a line-line intersection refine. Add a ray jitter between the multiple ray per pixel to fill some undersampling in mirror reflections. The tracing now stops if it goes behind an object. This needs some work to allow it to continue even if behind objects.
Diffstat (limited to 'source/blender/draw')
-rw-r--r--source/blender/draw/engines/eevee/eevee_effects.c32
-rw-r--r--source/blender/draw/engines/eevee/eevee_engine.c4
-rw-r--r--source/blender/draw/engines/eevee/eevee_private.h3
-rw-r--r--source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl1
-rw-r--r--source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl83
-rw-r--r--source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl2
-rw-r--r--source/blender/draw/engines/eevee/shaders/raytrace_lib.glsl318
7 files changed, 223 insertions, 220 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_effects.c b/source/blender/draw/engines/eevee/eevee_effects.c
index 4de887f14fd..6d1dadf06b5 100644
--- a/source/blender/draw/engines/eevee/eevee_effects.c
+++ b/source/blender/draw/engines/eevee/eevee_effects.c
@@ -102,8 +102,6 @@ static struct {
struct GPUTexture *depth_src;
struct GPUTexture *color_src;
int depth_src_layer;
-
- float pixelprojmat[4][4];
} e_data = {NULL}; /* Engine data */
extern char datatoc_bsdf_common_lib_glsl[];
@@ -581,7 +579,7 @@ void EEVEE_effects_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
effects->ssr_ray_count = BKE_collection_engine_property_value_get_int(props, "ssr_ray_count");
effects->reflection_trace_full = !BKE_collection_engine_property_value_get_bool(props, "ssr_halfres");
effects->ssr_use_normalization = BKE_collection_engine_property_value_get_bool(props, "ssr_normalize_weight");
- effects->ssr_stride = (float)BKE_collection_engine_property_value_get_int(props, "ssr_stride");
+ effects->ssr_quality = 1.0f - BKE_collection_engine_property_value_get_float(props, "ssr_quality");
effects->ssr_thickness = BKE_collection_engine_property_value_get_float(props, "ssr_thickness");
effects->ssr_border_fac = BKE_collection_engine_property_value_get_float(props, "ssr_border_fade");
effects->ssr_firefly_fac = BKE_collection_engine_property_value_get_float(props, "ssr_firefly_fac");
@@ -621,23 +619,9 @@ void EEVEE_effects_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
DRW_framebuffer_init(&fbl->screen_tracing_fb, &draw_engine_eevee_type, tracing_res[0], tracing_res[1], tex_output, effects->ssr_ray_count);
- /* Compute pixel projection matrix */
- {
- float uvpix[4][4], ndcuv[4][4], tmp[4][4], winmat[4][4];
- DRW_viewport_matrix_get(winmat, DRW_MAT_WIN);
-
- /* NDC to UVs */
- unit_m4(ndcuv);
- ndcuv[0][0] = ndcuv[1][1] = ndcuv[3][0] = ndcuv[3][1] = 0.5f;
-
- /* UVs to pixels */
- unit_m4(uvpix);
- uvpix[0][0] = viewport_size[0];
- uvpix[1][1] = viewport_size[1];
-
- mul_m4_m4m4(tmp, uvpix, ndcuv);
- mul_m4_m4m4(e_data.pixelprojmat, tmp, winmat);
- }
+ /* Compute pixel size */
+ copy_v2_v2(effects->ssr_pixelsize, viewport_size);
+ invert_v2(effects->ssr_pixelsize);
}
else {
/* Cleanup to release memory */
@@ -762,9 +746,10 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
DRW_shgroup_uniform_buffer(grp, "normalBuffer", &txl->ssr_normal_input);
DRW_shgroup_uniform_buffer(grp, "specroughBuffer", &txl->ssr_specrough_input);
DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex());
+ DRW_shgroup_uniform_buffer(grp, "maxzBuffer", &txl->maxzbuffer);
+ DRW_shgroup_uniform_buffer(grp, "minzBuffer", &stl->g_data->minzbuffer);
DRW_shgroup_uniform_vec4(grp, "viewvecs[0]", (float *)stl->g_data->viewvecs, 2);
- DRW_shgroup_uniform_vec2(grp, "ssrParameters", &effects->ssr_stride, 1);
- DRW_shgroup_uniform_mat4(grp, "PixelProjMatrix", (float *)&e_data.pixelprojmat);
+ DRW_shgroup_uniform_vec4(grp, "ssrParameters", &effects->ssr_quality, 1);
DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1);
DRW_shgroup_uniform_int(grp, "planar_count", &sldata->probes->num_planar, 1);
DRW_shgroup_uniform_buffer(grp, "planarDepth", &vedata->txl->planar_depth);
@@ -1267,7 +1252,8 @@ void EEVEE_draw_effects(EEVEE_Data *vedata)
if (stl->g_data->ssr_hit_output[0]) DRW_transform_to_display(stl->g_data->ssr_hit_output[0]);
break;
case 3:
- if (txl->ssr_normal_input) DRW_transform_to_display(txl->ssr_normal_input);
+ if (stl->g_data->ssr_hit_output[1]) DRW_transform_to_display(stl->g_data->ssr_hit_output[1]);
+ // if (txl->ssr_normal_input) DRW_transform_to_display(txl->ssr_normal_input);
break;
case 4:
if (txl->ssr_specrough_input) DRW_transform_to_display(txl->ssr_specrough_input);
diff --git a/source/blender/draw/engines/eevee/eevee_engine.c b/source/blender/draw/engines/eevee/eevee_engine.c
index c48edcc3a53..4272e344119 100644
--- a/source/blender/draw/engines/eevee/eevee_engine.c
+++ b/source/blender/draw/engines/eevee/eevee_engine.c
@@ -239,10 +239,10 @@ static void EEVEE_scene_layer_settings_create(RenderEngine *UNUSED(engine), IDPr
BKE_collection_engine_property_add_bool(props, "ssr_enable", false);
BKE_collection_engine_property_add_bool(props, "ssr_halfres", true);
BKE_collection_engine_property_add_int(props, "ssr_ray_count", 1);
- BKE_collection_engine_property_add_int(props, "ssr_stride", 16);
+ BKE_collection_engine_property_add_float(props, "ssr_quality", 0.25);
BKE_collection_engine_property_add_float(props, "ssr_thickness", 0.2f);
BKE_collection_engine_property_add_float(props, "ssr_border_fade", 0.075f);
- BKE_collection_engine_property_add_float(props, "ssr_firefly_fac", 0.5f);
+ BKE_collection_engine_property_add_float(props, "ssr_firefly_fac", 0.0f);
BKE_collection_engine_property_add_bool(props, "volumetric_enable", false);
BKE_collection_engine_property_add_float(props, "volumetric_start", 0.1f);
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h
index 0b1fc2f5dff..115e2a25ea4 100644
--- a/source/blender/draw/engines/eevee/eevee_private.h
+++ b/source/blender/draw/engines/eevee/eevee_private.h
@@ -327,8 +327,9 @@ typedef struct EEVEE_EffectsInfo {
int ssr_ray_count;
float ssr_firefly_fac;
float ssr_border_fac;
- float ssr_stride;
+ float ssr_quality;
float ssr_thickness;
+ float ssr_pixelsize[2];
/* Ambient Occlusion */
bool use_ao, use_bent_normals;
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
index 17430007502..e80835ee498 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
@@ -95,6 +95,7 @@ struct ShadowCascadeData {
vec3 mul(mat3 m, vec3 v) { return m * v; }
mat3 mul(mat3 m1, mat3 m2) { return m1 * m2; }
+vec3 transform_direction(mat4 m, vec3 v) { return mat3(m) * v; }
vec3 transform_point(mat4 m, vec3 v) { return (m * vec4(v, 1.0)).xyz; }
vec3 project_point(mat4 m, vec3 v) {
vec4 tmp = m * vec4(v, 1.0);
diff --git a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
index 3a8430f14e6..673440c3d54 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
@@ -47,7 +47,7 @@ bool has_hit_backface(vec3 hit_pos, vec3 R, vec3 V)
return (dot(-R, hit_N) < 0.0);
}
-vec4 do_planar_ssr(int index, vec3 V, vec3 N, vec3 planeNormal, vec3 viewPosition, float a2, vec3 rand)
+vec4 do_planar_ssr(int index, vec3 V, vec3 N, vec3 planeNormal, vec3 viewPosition, float a2, vec3 rand, float ray_nbr)
{
float pdf;
vec3 R = generate_ray(V, N, a2, rand, pdf);
@@ -57,46 +57,34 @@ vec4 do_planar_ssr(int index, vec3 V, vec3 N, vec3 planeNormal, vec3 viewPositio
/* If ray is bad (i.e. going below the plane) do not trace. */
if (dot(R, planeNormal) > 0.0) {
- vec3 R = generate_ray(V, N, a2, rand, pdf);
+ vec3 R = generate_ray(V, N, a2, rand * vec3(1.0, -1.0, -1.0), pdf);
}
- float hit_dist;
+ vec3 hit_pos;
if (abs(dot(-R, V)) < 0.9999) {
- hit_dist = raycast(index, viewPosition, R, rand.x);
+ /* Since viewspace hit position can land behind the camera in this case,
+ * we save the reflected view position (visualize it as the hit position
+ * below the reflection plane). This way it's garanted that the hit will
+ * be in front of the camera. That let us tag the bad rays with a negative
+ * sign in the Z component. */
+ hit_pos = raycast(index, viewPosition, R, fract(rand.x + (ray_nbr / float(rayCount))), a2);
}
else {
- float z = get_view_z_from_depth(texelFetch(planarDepth, ivec3(project_point(PixelProjMatrix, viewPosition).xy, index), 0).r);
- hit_dist = (z - viewPosition.z) / R.z;
- }
-
- /* Since viewspace hit position can land behind the camera in this case,
- * we save the reflected view position (visualize it as the hit position
- * below the reflection plane). This way it's garanted that the hit will
- * be in front of the camera. That let us tag the bad rays with a negative
- * sign in the Z component. */
- vec3 hit_pos = viewPosition + R * abs(hit_dist);
-
- /* Ray did not hit anything. No backface test because it's not possible
- * to hit a backface in this case. */
- if (hit_dist <= 0.0) {
- hit_pos.z *= -1.0;
+ vec2 uvs = project_point(ProjectionMatrix, viewPosition).xy * 0.5 + 0.5;
+ float raw_depth = textureLod(planarDepth, vec3(uvs, float(index)), 0.0).r;
+ hit_pos = get_view_space_from_depth(uvs, raw_depth);
+ hit_pos.z *= (raw_depth < 1.0) ? 1.0 : -1.0;
}
return vec4(hit_pos, pdf);
}
-vec4 do_ssr(vec3 V, vec3 N, vec3 viewPosition, float a2, vec3 rand)
+vec4 do_ssr(vec3 V, vec3 N, vec3 viewPosition, float a2, vec3 rand, float ray_nbr)
{
float pdf;
vec3 R = generate_ray(V, N, a2, rand, pdf);
- float hit_dist = raycast(-1, viewPosition, R, rand.x);
- vec3 hit_pos = viewPosition + R * abs(hit_dist);
-
- /* Ray did not hit anything. Tag it as failled. */
- if (has_hit_backface(hit_pos, R, V) || (hit_dist <= 0.0)) {
- hit_pos.z *= -1.0;
- }
+ vec3 hit_pos = raycast(-1, viewPosition, R, fract(rand.x + (ray_nbr / float(rayCount))), a2);
return vec4(hit_pos, pdf);
}
@@ -141,7 +129,7 @@ void main()
vec3 rand = texelFetch(utilTex, ivec3(halfres_texel % LUT_SIZE, 2), 0).rba;
vec3 worldPosition = transform_point(ViewMatrixInverse, viewPosition);
- vec3 wN = mat3(ViewMatrixInverse) * N;
+ vec3 wN = transform_direction(ViewMatrixInverse, N);
/* Planar Reflections */
for (int i = 0; i < MAX_PLANAR && i < planar_count; ++i) {
@@ -154,22 +142,22 @@ void main()
/* TODO optimize, use view space for all. */
vec3 tracePosition = line_plane_intersect(worldPosition, cameraVec, pd.pl_plane_eq);
tracePosition = transform_point(ViewMatrix, tracePosition);
- vec3 planeNormal = mat3(ViewMatrix) * pd.pl_normal;
+ vec3 planeNormal = transform_direction(ViewMatrix, pd.pl_normal);
/* TODO : Raytrace together if textureGather is supported. */
- hitData0 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand);
- if (rayCount > 1) hitData1 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0));
- if (rayCount > 2) hitData2 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0));
- if (rayCount > 3) hitData3 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0));
+ hitData0 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand, 0.0);
+ if (rayCount > 1) hitData1 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0);
+ if (rayCount > 2) hitData2 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0);
+ if (rayCount > 3) hitData3 = do_planar_ssr(i, V, N, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0);
return;
}
}
/* TODO : Raytrace together if textureGather is supported. */
- hitData0 = do_ssr(V, N, viewPosition, a2, rand);
- if (rayCount > 1) hitData1 = do_ssr(V, N, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0));
- if (rayCount > 2) hitData2 = do_ssr(V, N, viewPosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0));
- if (rayCount > 3) hitData3 = do_ssr(V, N, viewPosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0));
+ hitData0 = do_ssr(V, N, viewPosition, a2, rand, 0.0);
+ if (rayCount > 1) hitData1 = do_ssr(V, N, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0);
+ if (rayCount > 2) hitData2 = do_ssr(V, N, viewPosition, a2, rand.xzy * vec3(1.0, 1.0, -1.0), 2.0);
+ if (rayCount > 3) hitData3 = do_ssr(V, N, viewPosition, a2, rand.xzy * vec3(1.0, -1.0, 1.0), 3.0);
}
#else /* STEP_RESOLVE */
@@ -258,12 +246,6 @@ float screen_border_mask(vec2 hit_co)
return screenfade;
}
-float view_facing_mask(vec3 V, vec3 R)
-{
- /* Fade on viewing angle (strange deformations happens at R == V) */
- return smoothstep(0.95, 0.80, dot(V, R));
-}
-
vec2 get_reprojected_reflection(vec3 hit, vec3 pos, vec3 N)
{
/* TODO real reprojection with motion vectors, etc... */
@@ -306,13 +288,13 @@ vec4 get_ssr_sample(
/* Find hit position in previous frame. */
ref_uvs = get_reprojected_reflection(hit_pos, worldPosition, N);
L = normalize(hit_pos - worldPosition);
- mask *= view_facing_mask(V, N);
- mask *= screen_border_mask(source_uvs);
+ vec2 uvs = gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0));
+ mask *= screen_border_mask(uvs);
/* Compute cone footprint Using UV distance because we are using screen space filtering. */
cone_footprint = 1.5 * cone_tan * distance(ref_uvs, source_uvs);
}
- mask *= screen_border_mask(ref_uvs);
+ mask = min(mask, screen_border_mask(ref_uvs));
mask *= float(has_hit);
/* Estimate a cone footprint to sample a corresponding mipmap level. */
@@ -320,7 +302,7 @@ vec4 get_ssr_sample(
/* Slide 54 */
float bsdf = bsdf_ggx(N, L, V, roughnessSquared);
- float weight = step(0.001, hit_co_pdf.w) * bsdf / hit_co_pdf.w;
+ float weight = bsdf / max(1e-8, hit_co_pdf.w);
weight_acc += weight;
vec3 sample;
@@ -340,7 +322,7 @@ vec4 get_ssr_sample(
return vec4(sample, mask) * weight;
}
-#define NUM_NEIGHBORS 9
+#define NUM_NEIGHBORS 4
void main()
{
@@ -364,7 +346,8 @@ void main()
vec3 viewPosition = get_view_space_from_depth(uvs, depth); /* Needed for viewCameraVec */
vec3 worldPosition = transform_point(ViewMatrixInverse, viewPosition);
vec3 V = cameraVec;
- vec3 N = mat3(ViewMatrixInverse) * normal_decode(texelFetch(normalBuffer, fullres_texel, 0).rg, viewCameraVec);
+ vec3 vN = normal_decode(texelFetch(normalBuffer, fullres_texel, 0).rg, viewCameraVec);
+ vec3 N = transform_direction(ViewMatrixInverse, vN);
vec4 speccol_roughness = texelFetch(specroughBuffer, fullres_texel, 0).rgba;
/* Early out */
@@ -441,7 +424,7 @@ void main()
if (weight_acc > 0.0) {
ssr_accum /= weight_acc;
/* fade between 0.5 and 1.0 roughness */
- ssr_accum.a *= saturate(2.0 - roughness * 2.0);
+ //ssr_accum.a *= saturate(2.0 - roughness * 2.0);
accumulate_light(ssr_accum.rgb, ssr_accum.a, spec_accum);
}
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl
index 93adc9483ad..0200b32d969 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl
@@ -132,7 +132,7 @@ vec3 probe_evaluate_cube(float id, CubeData cd, vec3 W, vec3 R, float roughness)
{
/* Correct reflection ray using parallax volume intersection. */
vec3 localpos = transform_point(cd.parallaxmat, W);
- vec3 localray = mat3(cd.parallaxmat) * R;
+ vec3 localray = transform_direction(cd.parallaxmat, R);
float dist;
if (cd.p_parallax_type == PROBE_PARALLAX_BOX) {
diff --git a/source/blender/draw/engines/eevee/shaders/raytrace_lib.glsl b/source/blender/draw/engines/eevee/shaders/raytrace_lib.glsl
index b4ccb1ca052..855755adfe4 100644
--- a/source/blender/draw/engines/eevee/shaders/raytrace_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/raytrace_lib.glsl
@@ -1,196 +1,228 @@
-/* Based on work from Morgan McGuire and Michael Mara at Williams College 2014
- * Released as open source under the BSD 2-Clause License
- * http://opensource.org/licenses/BSD-2-Clause
- * http://casual-effects.blogspot.fr/2014/08/screen-space-ray-tracing.html */
-
#define MAX_STEP 256
#define MAX_REFINE_STEP 32 /* Should be max allowed stride */
-uniform mat4 PixelProjMatrix; /* View > NDC > Texel : maps view coords to texel coord */
-uniform vec2 ssrParameters;
+uniform vec4 ssrParameters;
uniform sampler2D depthBuffer;
+uniform sampler2D maxzBuffer;
+uniform sampler2D minzBuffer;
uniform sampler2DArray planarDepth;
-#define ssrStride ssrParameters.x
+#define ssrQuality ssrParameters.x
#define ssrThickness ssrParameters.y
+#define ssrPixelSize ssrParameters.zw
-float sample_depth(ivec2 hitpixel, int index)
+float sample_depth(vec2 uv, int index, float lod)
{
if (index > -1) {
- return texelFetch(planarDepth, ivec3(hitpixel, index), 0).r;
+ return textureLod(planarDepth, vec3(uv, index), 0.0).r;
}
else {
- return texelFetch(depthBuffer, hitpixel, 0).r;
+ return textureLod(maxzBuffer, uv, lod).r;
}
}
-void swapIfBigger(inout float a, inout float b)
+float sample_minz_depth(vec2 uv, int index)
{
- if (a > b) {
- float temp = a;
- a = b;
- b = temp;
+ if (index > -1) {
+ return textureLod(planarDepth, vec3(uv, index), 0.0).r;
+ }
+ else {
+ return textureLod(minzBuffer, uv, 0.0).r;
}
}
-/* Return the length of the ray if there is a hit, and negate it if not hit occured */
-float raycast(int index, vec3 ray_origin, vec3 ray_dir, float ray_jitter)
+float sample_maxz_depth(vec2 uv, int index)
{
- float near = get_view_z_from_depth(0.0); /* TODO optimize */
- float far = get_view_z_from_depth(1.0); /* TODO optimize */
-
- /* Clip ray to a near/far plane in 3D */
- float ray_length = 1e16;
- if ((ray_origin.z + ray_dir.z * ray_length) > near) {
- ray_length = (near - ray_origin.z) / ray_dir.z;
+ if (index > -1) {
+ return textureLod(planarDepth, vec3(uv, index), 0.0).r;
}
else {
- ray_length = (ray_origin.z - far) / -ray_dir.z;
+ return textureLod(maxzBuffer, uv, 0.0).r;
}
+}
- vec3 ray_end = ray_dir * ray_length + ray_origin;
-
- /* Project into screen space */
- vec4 H0 = PixelProjMatrix * vec4(ray_origin, 1.0);
- vec4 H1 = PixelProjMatrix * vec4(ray_end, 1.0);
-
- /* There are a lot of divisions by w that can be turned into multiplications
- * at some minor precision loss...and we need to interpolate these 1/w values
- * anyway. */
- float k0 = 1.0 / H0.w;
- float k1 = 1.0 / H1.w;
-
- /* Switch the original points to values that interpolate linearly in 2D */
- vec3 Q0 = ray_origin * k0;
- vec3 Q1 = ray_end * k1;
+vec4 sample_depth_grouped(vec4 uv1, vec4 uv2, int index, float lod)
+{
+ vec4 depths;
+ if (index > -1) {
+ depths.x = textureLod(planarDepth, vec3(uv1.xy, index), 0.0).r;
+ depths.y = textureLod(planarDepth, vec3(uv1.zw, index), 0.0).r;
+ depths.z = textureLod(planarDepth, vec3(uv2.xy, index), 0.0).r;
+ depths.w = textureLod(planarDepth, vec3(uv2.zw, index), 0.0).r;
+ }
+ else {
+ depths.x = textureLod(maxzBuffer, uv1.xy, lod).r;
+ depths.y = textureLod(maxzBuffer, uv1.zw, lod).r;
+ depths.z = textureLod(maxzBuffer, uv2.xy, lod).r;
+ depths.w = textureLod(maxzBuffer, uv2.zw, lod).r;
+ }
+ return depths;
+}
- /* Screen-space endpoints */
- vec2 P0 = H0.xy * k0;
- vec2 P1 = H1.xy * k1;
+float refine_isect(float prev_delta, float curr_delta)
+{
+ /**
+ * Simplification of 2D intersection :
+ * r0 = (0.0, prev_ss_ray.z);
+ * r1 = (1.0, curr_ss_ray.z);
+ * d0 = (0.0, prev_hit_depth_sample);
+ * d1 = (1.0, curr_hit_depth_sample);
+ * vec2 r = r1 - r0;
+ * vec2 d = d1 - d0;
+ * vec2 isect = ((d * cross(r1, r0)) - (r * cross(d1, d0))) / cross(r,d);
+ *
+ * We only want isect.x to know how much stride we need. So it simplifies :
+ *
+ * isect_x = (cross(r1, r0) - cross(d1, d0)) / cross(r,d);
+ * isect_x = (prev_ss_ray.z - prev_hit_depth_sample.z) / cross(r,d);
+ */
+ return saturate(prev_delta / (prev_delta - curr_delta));
+}
- /* [Optional clipping to frustum sides here] */
+void prepare_raycast(vec3 ray_origin, vec3 ray_dir, out vec4 ss_step, out vec4 ss_ray, out float max_time)
+{
+ /* Negate the ray direction if it goes towards the camera.
+ * This way we don't need to care if the projected point
+ * is behind the near plane. */
+ float z_sign = -sign(ray_dir.z);
+ vec3 ray_end = z_sign * ray_dir * 1e16 + ray_origin;
+
+ /* Project into screen space. */
+ vec3 ss_start = project_point(ProjectionMatrix, ray_origin);
+ vec3 ss_end = project_point(ProjectionMatrix, ray_end);
+ /* 4th component is current stride */
+ ss_step = vec4(z_sign * normalize(ss_end - ss_start), 1.0);
/* If the line is degenerate, make it cover at least one pixel
* to not have to handle zero-pixel extent as a special case later */
- P1 += vec2((distance_squared(P0, P1) < 0.001) ? 0.01 : 0.0);
-
- vec2 delta = P1 - P0;
-
- /* Permute so that the primary iteration is in x to reduce large branches later.
- * After this, "x" is the primary iteration direction and "y" is the secondary one
- * If it is a more-vertical line, create a permutation that swaps x and y in the output
- * and directly swizzle the inputs. */
- bool permute = false;
- if (abs(delta.x) < abs(delta.y)) {
- permute = true;
- delta = delta.yx;
- P1 = P1.yx;
- P0 = P0.yx;
- }
+ ss_step.xy += vec2((dot(ss_step.xy, ss_step.xy) < 0.00001) ? 0.001 : 0.0);
- /* Track the derivatives */
- float step_sign = sign(delta.x);
- float invdx = step_sign / delta.x;
- vec2 dP = vec2(step_sign, invdx * delta.y);
- vec3 dQ = (Q1 - Q0) * invdx;
- float dk = (k1 - k0) * invdx;
+ /* Make ss_step cover one pixel. */
+ ss_step.xyz /= max(abs(ss_step.x), abs(ss_step.y));
+ ss_step.xyz *= ((abs(ss_step.x) > abs(ss_step.y)) ? ssrPixelSize.x : ssrPixelSize.y);
- /* Slide each value from the start of the ray to the end */
- vec4 pqk = vec4(P0, Q0.z, k0);
+ /* Clipping to frustum sides. */
+ max_time = line_unit_box_intersect_dist(ss_start, ss_step.xyz) - 1.0;
- /* Scale derivatives by the desired pixel stride */
- vec4 dPQK = vec4(dP, dQ.z, dk) * ssrStride;
+ /* Convert to texture coords. Z component included
+ * since this is how it's stored in the depth buffer.
+ * 4th component how far we are on the ray */
+ ss_ray = vec4(ss_start * 0.5 + 0.5, 0.0);
+ ss_step.xyz *= 0.5;
+}
- /* We track the ray depth at +/- 1/2 pixel to treat pixels as clip-space solid
- * voxels. Because the depth at -1/2 for a given pixel will be the same as at
- * +1/2 for the previous iteration, we actually only have to compute one value
- * per iteration. */
- float prev_zmax = ray_origin.z;
- float zmax;
+/* See times_and_deltas. */
+#define curr_time times_and_deltas.x
+#define prev_time times_and_deltas.y
+#define curr_delta times_and_deltas.z
+#define prev_delta times_and_deltas.w
- /* P1.x is never modified after this point, so pre-scale it by
- * the step direction for a signed comparison */
- float end = P1.x * step_sign;
+// #define GROUPED_FETCHES
+/* Return the hit position, and negate the z component (making it positive) if not hit occured. */
+vec3 raycast(int index, vec3 ray_origin, vec3 ray_dir, float ray_jitter, float roughness)
+{
+ vec4 ss_step, ss_start;
+ float max_time;
+ prepare_raycast(ray_origin, ray_dir, ss_step, ss_start, max_time);
- /* Initial offset */
- if (index > -1) {
- pqk -= dPQK * ray_jitter;
- }
- else {
- pqk += dPQK * (0.01 + ray_jitter);
- }
+#ifdef GROUPED_FETCHES
+ ray_jitter *= 0.25;
+#endif
+ /* x : current_time, y: previous_time, z: previous_delta, w: current_delta */
+ vec4 times_and_deltas = vec4(0.0, 0.0, 0.001, 0.001);
+ float ray_time = 0.0;
+ float depth_sample;
+
+ float lod_fac = saturate(fast_sqrt(roughness) * 2.0 - 0.4);
bool hit = false;
- float raw_depth;
- float thickness = (index == -1) ? ssrThickness : 1e16;
- for (float hitstep = 0.0; hitstep < MAX_STEP && !hit; hitstep++) {
- /* Ray finished & no hit*/
- if ((pqk.x * step_sign) > end) break;
-
- /* step through current cell */
- pqk += dPQK;
-
- ivec2 hitpixel = ivec2(permute ? pqk.yx : pqk.xy);
- raw_depth = sample_depth(hitpixel, index);
-
- float zmin = prev_zmax;
- zmax = (dPQK.z * 0.5 + pqk.z) / (dPQK.w * 0.5 + pqk.w);
- prev_zmax = zmax;
- swapIfBigger(zmin, zmax);
-
- float vmax = get_view_z_from_depth(raw_depth);
- float vmin = vmax - thickness;
-
- /* Check if we are somewhere near the surface. */
- /* Note: we consider hitting the screen borders (raw_depth == 0.0)
- * as valid to check for occluder in the refine pass */
- if (!((zmin > vmax) || (zmax < vmin)) || (raw_depth == 0.0)) {
- /* Below surface, cannot trace further */
- hit = true;
- }
- }
+ float iter;
+ for (iter = 1.0; !hit && (ray_time <= max_time) && (iter < MAX_STEP); iter++) {
+ /* Minimum stride of 2 because we are using half res minmax zbuffer. */
+ float stride = max(1.0, iter * ssrQuality) * 2.0;
+ float lod = log2(stride * 0.5 * ssrQuality) * lod_fac;
+
+ /* Save previous values. */
+ times_and_deltas.xyzw = times_and_deltas.yxwz;
- if (hit) {
- /* Rewind back a step. */
- pqk -= dPQK;
+#ifdef GROUPED_FETCHES
+ stride *= 4.0;
+ vec4 jit_stride = mix(vec4(2.0), vec4(stride), vec4(0.0, 0.25, 0.5, 0.75) + ray_jitter);
- /* And do a finer trace over this segment. */
- dPQK /= ssrStride;
+ vec4 times = vec4(ray_time) + jit_stride;
- prev_zmax = (dPQK.z * -0.5 + pqk.z) / (dPQK.w * -0.5 + pqk.w);
+ vec4 uv1 = ss_start.xyxy + ss_step.xyxy * times.xxyy;
+ vec4 uv2 = ss_start.xyxy + ss_step.xyxy * times.zzww;
- for (float refinestep = 0.0; refinestep < (ssrStride * 2.0) && refinestep < (MAX_REFINE_STEP * 2.0); refinestep++) {
- /* step through current cell */
- pqk += dPQK;
+ vec4 depth_samples = sample_depth_grouped(uv1, uv2, index, lod);
- ivec2 hitpixel = ivec2(permute ? pqk.yx : pqk.xy);
- raw_depth = sample_depth(hitpixel, index);
+ vec4 ray_z = ss_start.zzzz + ss_step.zzzz * times.xyzw;
- float zmin = prev_zmax;
- zmax = (dPQK.z * 0.5 + pqk.z) / (dPQK.w * 0.5 + pqk.w);
- prev_zmax = zmax;
- swapIfBigger(zmin, zmax);
+ vec4 deltas = depth_samples - ray_z;
+ /* Same as component wise (depth_samples <= ray_z) && (ray_time <= max_time). */
+ bvec4 test = equal(step(deltas, vec4(0.0)) * step(times, vec4(max_time)), vec4(1.0));
+ hit = any(test);
+ if (hit) {
+ vec2 m = vec2(1.0, 0.0); /* Mask */
- float vmax = get_view_z_from_depth(raw_depth);
- float vmin = vmax - thickness;
+ vec4 ret_times_and_deltas = times.wzzz * m.xxyy + deltas.wwwz * m.yyxx;
+ ret_times_and_deltas = (test.z) ? times.zyyy * m.xxyy + deltas.zzzy * m.yyxx : ret_times_and_deltas;
+ ret_times_and_deltas = (test.y) ? times.yxxx * m.xxyy + deltas.yyyx * m.yyxx : ret_times_and_deltas;
+ times_and_deltas = (test.x) ? times.xxxx * m.xyyy + deltas.xxxx * m.yyxy + times_and_deltas.yyww * m.yxyx : ret_times_and_deltas;
- /* Check if we are somewhere near the surface. */
- if (!((zmin > vmax) || (zmax < vmin)) || (raw_depth == 0.0)) {
- /* Below surface, cannot trace further */
- break;
- }
+ depth_sample = depth_samples.w;
+ depth_sample = (test.z) ? depth_samples.z : depth_sample;
+ depth_sample = (test.y) ? depth_samples.y : depth_sample;
+ depth_sample = (test.x) ? depth_samples.x : depth_sample;
+ break;
}
+ curr_time = times.w;
+ curr_delta = deltas.w;
+ ray_time += stride;
+#else
+ float jit_stride = mix(2.0, stride, ray_jitter);
+
+ curr_time = ray_time + jit_stride;
+ vec4 ss_ray = ss_start + ss_step * curr_time;
+
+ depth_sample = sample_depth(ss_ray.xy, index, lod);
+
+ curr_delta = depth_sample - ss_ray.z;
+ hit = (curr_delta <= 0.0) && (curr_time <= max_time);
+
+ ray_time += stride;
+#endif
}
- /* If we did hit the background, get exact ray. */
- if (raw_depth == 1.0) {
- zmax = get_view_z_from_depth(1.0); /* TODO optimize */
+ curr_time = (hit) ? mix(prev_time, curr_time, refine_isect(prev_delta, curr_delta)) : curr_time;
+ ray_time = (hit) ? curr_time : ray_time;
+
+#if 0 /* Not needed if using refine_isect() */
+ /* Binary search */
+ for (float time_step = (curr_time - prev_time) * 0.5; time_step > 1.0; time_step /= 2.0) {
+ ray_time -= time_step;
+ vec4 ss_ray = ss_start + ss_step * ray_time;
+ float depth_sample = sample_maxz_depth(ss_ray.xy, index);
+ bool is_hit = (depth_sample - ss_ray.z <= 0.0);
+ ray_time = (is_hit) ? ray_time : ray_time + time_step;
}
+#endif
+
+ /* Clip to frustum. */
+ ray_time = min(ray_time, max_time - 0.5);
- hit = hit && (raw_depth != 0.0);
+ vec4 ss_ray = ss_start + ss_step * ray_time;
+ vec3 hit_pos = get_view_space_from_depth(ss_ray.xy, ss_ray.z);
+
+ /* Reject hit if not within threshold. */
+ /* TODO do this check while tracing. Potentially higher quality */
+ if (hit && (index == -1)) {
+ float z = get_view_z_from_depth(depth_sample);
+ hit = hit && ((z - hit_pos.z - ssrThickness) <= ssrThickness);
+ }
- /* Return length */
- float result = (zmax - ray_origin.z) / ray_dir.z;
- return (hit) ? result : -result;
+ /* Tag Z if ray failed. */
+ hit_pos.z *= (hit) ? 1.0 : -1.0;
+ return hit_pos;
}