EEVEE-Next: Motion Blur new implementation

The new implementation leverage compute shaders to reduce the number of passes and complexity. The max blur amount is now detected automatically, replacing the property in the render panel by a simple checkbox. The dilation algorithm has also been rewritten from scratch into a 1 pass algorithm that does the dilation more efficiently and more precisely. Some differences with the old implementation can be observed in areas with complex motion.
author: Clément Foucault <foucault.clem@gmail.com> 2022-07-27 18:35:10 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2022-07-28 18:01:05 +0300
commit: 1e0aa2612c3f62607f1d12fd9d594cba96680f6c (patch)
tree: 086804c2186d8ba7593ea56d050bc3d1abc1f3e8 /source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
parent: 82327ce01de9be65b20c261977c9c3ccb59e0952 (diff)
1 files changed, 103 insertions, 0 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
new file mode 100644
index 00000000000..cbbeea25d20
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
@@ -0,0 +1,103 @@
+
+/**
+ * Shaders that down-sample velocity buffer into squared tile of MB_TILE_DIVISOR pixels wide.
+ * Outputs the largest motion vector in the tile area.
+ * Also perform velocity resolve to speedup the convolution pass.
+ *
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * Adapted from G3D Innovation Engine implementation.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+shared uint payload_prev;
+shared uint payload_next;
+shared vec2 max_motion_prev;
+shared vec2 max_motion_next;
+
+/* Store velocity magnitude in the MSB and thread id in the LSB. */
+uint pack_payload(vec2 motion, uvec2 thread_id)
+{
+  /* NOTE: We clamp max velocity to 16k pixels. */
+  return (min(uint(ceil(length(motion))), 0xFFFFu) << 16u) | (thread_id.y << 8) | thread_id.x;
+}
+
+/* Return thread index from the payload. */
+uvec2 unpack_payload(uint payload)
+{
+  return uvec2(payload & 0xFFu, (payload >> 8) & 0xFFu);
+}
+
+void main()
+{
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    payload_prev = 0u;
+    payload_next = 0u;
+  }
+  barrier();
+
+  uint local_payload_prev = 0u;
+  uint local_payload_next = 0u;
+  vec2 local_max_motion_prev;
+  vec2 local_max_motion_next;
+
+  ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(velocity_img) - 1);
+
+  vec2 render_size = vec2(imageSize(velocity_img).xy);
+  vec2 uv = (vec2(texel) + 0.5) / render_size;
+  float depth = texelFetch(depth_tx, texel, 0).r;
+  vec4 motion = velocity_resolve(imageLoad(velocity_img, texel), uv, depth);
+#ifdef FLATTEN_VIEWPORT
+  /* imageLoad does not perform the swizzling like sampler does. Do it manually. */
+  motion = motion.xyxy;
+#endif
+
+  /* Store resolved velocity to speedup the gather pass. Out of bounds writes are ignored.
+   * Unfortunately, we cannot convert to pixel space here since it is also used by TAA and the
+   * motion blur needs to remain optional. */
+  imageStore(velocity_img, ivec2(gl_GlobalInvocationID.xy), velocity_pack(motion));
+  /* Clip velocity to viewport bounds (in NDC space). */
+  vec2 line_clip;
+  line_clip.x = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, motion.xy * 2.0);
+  line_clip.y = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, -motion.zw * 2.0);
+  motion *= min(line_clip, vec2(1.0)).xxyy;
+  /* Convert to pixel space. Note this is only for velocity tiles. */
+  motion *= render_size.xyxy;
+  /* Rescale to shutter relative motion for viewport. */
+  motion *= motion_blur_buf.motion_scale.xxyy;
+
+  uint sample_payload_prev = pack_payload(motion.xy, gl_LocalInvocationID.xy);
+  if (local_payload_prev < sample_payload_prev) {
+    local_payload_prev = sample_payload_prev;
+    local_max_motion_prev = motion.xy;
+  }
+
+  uint sample_payload_next = pack_payload(motion.zw, gl_LocalInvocationID.xy);
+  if (local_payload_next < sample_payload_next) {
+    local_payload_next = sample_payload_next;
+    local_max_motion_next = motion.zw;
+  }
+
+  /* Compare the local payload with the other threads. */
+  atomicMax(payload_prev, local_payload_prev);
+  atomicMax(payload_next, local_payload_next);
+  barrier();
+
+  /* Need to broadcast the result to another thread in order to issue a unique write. */
+  if (all(equal(unpack_payload(payload_prev), gl_LocalInvocationID.xy))) {
+    max_motion_prev = local_max_motion_prev;
+  }
+  if (all(equal(unpack_payload(payload_next), gl_LocalInvocationID.xy))) {
+    max_motion_next = local_max_motion_next;
+  }
+  barrier();
+
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+    imageStore(out_tiles_img, tile_co, vec4(max_motion_prev, max_motion_next));
+  }
+}
author	Clément Foucault <foucault.clem@gmail.com>	2022-07-27 18:35:10 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2022-07-28 18:01:05 +0300
commit	1e0aa2612c3f62607f1d12fd9d594cba96680f6c (patch)
tree	086804c2186d8ba7593ea56d050bc3d1abc1f3e8 /source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
parent	82327ce01de9be65b20c261977c9c3ccb59e0952 (diff)