10 files changed, 668 insertions, 15 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
index b286836e8df..135507d956c 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -636,6 +636,8 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
       vec4 normal = texelFetch(normal_tx, film_sample.texel, 0);
       float depth = texelFetch(depth_tx, film_sample.texel, 0).x;
       vec4 vector = velocity_resolve(vector_tx, film_sample.texel, depth);
+      /* Transform to pixel space. */
+      vector *= vec4(film_buf.render_extent, -film_buf.render_extent);
 
       film_store_depth(texel_film, depth, out_depth);
       film_store_data(texel_film, film_buf.normal_id, normal, out_color);
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
new file mode 100644
index 00000000000..c59b7d7f4df
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
@@ -0,0 +1,116 @@
+
+/**
+ * Dilate motion vector tiles until we covered maximum velocity.
+ * Outputs the largest intersecting motion vector in the neighboorhod.
+ *
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+#define DEBUG_BYPASS_DILATION 0
+
+struct MotionRect {
+  ivec2 bottom_left;
+  ivec2 extent;
+};
+
+MotionRect compute_motion_rect(ivec2 tile, vec2 motion)
+{
+#if DEBUG_BYPASS_DILATION
+  return MotionRect(tile, ivec2(1));
+#endif
+  /* Ceil to number of tile touched.*/
+  ivec2 point1 = tile + ivec2(sign(motion) * ceil(abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
+  ivec2 point2 = tile;
+
+  ivec2 max_point = max(point1, point2);
+  ivec2 min_point = min(point1, point2);
+  /* Clamp to bounds. */
+  max_point = min(max_point, imageSize(in_tiles_img) - 1);
+  min_point = max(min_point, ivec2(0));
+
+  MotionRect rect;
+  rect.bottom_left = min_point;
+  rect.extent = 1 + max_point - min_point;
+  return rect;
+}
+
+struct MotionLine {
+  /** Origin of the line. */
+  vec2 origin;
+  /** Normal to the line direction. */
+  vec2 normal;
+};
+
+MotionLine compute_motion_line(ivec2 tile, vec2 motion)
+{
+  vec2 dir = safe_normalize(motion);
+
+  MotionLine line;
+  line.origin = vec2(tile);
+  /* Rotate 90° Counter-Clockwise. */
+  line.normal = vec2(-dir.y, dir.x);
+  return line;
+}
+
+bool is_inside_motion_line(ivec2 tile, MotionLine motion_line)
+{
+#if DEBUG_BYPASS_DILATION
+  return true;
+#endif
+  /* NOTE: Everything in is tile unit. */
+  float dist = point_line_projection_dist(vec2(tile), motion_line.origin, motion_line.normal);
+  /* In order to be conservative and for simplicity, we use the tiles bounding circles.
+   * Consider that both the tile and the line have bouding radius of M_SQRT1_2. */
+  return abs(dist) < M_SQRT2;
+}
+
+void main()
+{
+  ivec2 src_tile = ivec2(gl_GlobalInvocationID.xy);
+  if (any(greaterThanEqual(src_tile, imageSize(in_tiles_img)))) {
+    return;
+  }
+
+  vec4 max_motion = imageLoad(in_tiles_img, src_tile);
+
+  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
+  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+  if (true) {
+    /* Rectangular area (in tiles) where the motion vector spreads. */
+    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
+    MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy);
+    /* Do a conservative rasterization of the line of the motion vector line. */
+    for (int x = 0; x < motion_rect.extent.x; x++) {
+      for (int y = 0; y < motion_rect.extent.y; y++) {
+        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+        if (is_inside_motion_line(tile, motion_line)) {
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+           * the motion next so that weighting in gather pass is better. */
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+        }
+      }
+    }
+  }
+
+  if (true) {
+    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+    /* Rectangular area (in tiles) where the motion vector spreads. */
+    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
+    MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
+    /* Do a conservative rasterization of the line of the motion vector line. */
+    for (int x = 0; x < motion_rect.extent.x; x++) {
+      for (int y = 0; y < motion_rect.extent.y; y++) {
+        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+        if (is_inside_motion_line(tile, motion_line)) {
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+           * the motion next so that weighting in gather pass is better. */
+          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+        }
+      }
+    }
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
new file mode 100644
index 00000000000..cbbeea25d20
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
@@ -0,0 +1,103 @@
+
+/**
+ * Shaders that down-sample velocity buffer into squared tile of MB_TILE_DIVISOR pixels wide.
+ * Outputs the largest motion vector in the tile area.
+ * Also perform velocity resolve to speedup the convolution pass.
+ *
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * Adapted from G3D Innovation Engine implementation.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+shared uint payload_prev;
+shared uint payload_next;
+shared vec2 max_motion_prev;
+shared vec2 max_motion_next;
+
+/* Store velocity magnitude in the MSB and thread id in the LSB. */
+uint pack_payload(vec2 motion, uvec2 thread_id)
+{
+  /* NOTE: We clamp max velocity to 16k pixels. */
+  return (min(uint(ceil(length(motion))), 0xFFFFu) << 16u) | (thread_id.y << 8) | thread_id.x;
+}
+
+/* Return thread index from the payload. */
+uvec2 unpack_payload(uint payload)
+{
+  return uvec2(payload & 0xFFu, (payload >> 8) & 0xFFu);
+}
+
+void main()
+{
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    payload_prev = 0u;
+    payload_next = 0u;
+  }
+  barrier();
+
+  uint local_payload_prev = 0u;
+  uint local_payload_next = 0u;
+  vec2 local_max_motion_prev;
+  vec2 local_max_motion_next;
+
+  ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(velocity_img) - 1);
+
+  vec2 render_size = vec2(imageSize(velocity_img).xy);
+  vec2 uv = (vec2(texel) + 0.5) / render_size;
+  float depth = texelFetch(depth_tx, texel, 0).r;
+  vec4 motion = velocity_resolve(imageLoad(velocity_img, texel), uv, depth);
+#ifdef FLATTEN_VIEWPORT
+  /* imageLoad does not perform the swizzling like sampler does. Do it manually. */
+  motion = motion.xyxy;
+#endif
+
+  /* Store resolved velocity to speedup the gather pass. Out of bounds writes are ignored.
+   * Unfortunately, we cannot convert to pixel space here since it is also used by TAA and the
+   * motion blur needs to remain optional. */
+  imageStore(velocity_img, ivec2(gl_GlobalInvocationID.xy), velocity_pack(motion));
+  /* Clip velocity to viewport bounds (in NDC space). */
+  vec2 line_clip;
+  line_clip.x = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, motion.xy * 2.0);
+  line_clip.y = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, -motion.zw * 2.0);
+  motion *= min(line_clip, vec2(1.0)).xxyy;
+  /* Convert to pixel space. Note this is only for velocity tiles. */
+  motion *= render_size.xyxy;
+  /* Rescale to shutter relative motion for viewport. */
+  motion *= motion_blur_buf.motion_scale.xxyy;
+
+  uint sample_payload_prev = pack_payload(motion.xy, gl_LocalInvocationID.xy);
+  if (local_payload_prev < sample_payload_prev) {
+    local_payload_prev = sample_payload_prev;
+    local_max_motion_prev = motion.xy;
+  }
+
+  uint sample_payload_next = pack_payload(motion.zw, gl_LocalInvocationID.xy);
+  if (local_payload_next < sample_payload_next) {
+    local_payload_next = sample_payload_next;
+    local_max_motion_next = motion.zw;
+  }
+
+  /* Compare the local payload with the other threads. */
+  atomicMax(payload_prev, local_payload_prev);
+  atomicMax(payload_next, local_payload_next);
+  barrier();
+
+  /* Need to broadcast the result to another thread in order to issue a unique write. */
+  if (all(equal(unpack_payload(payload_prev), gl_LocalInvocationID.xy))) {
+    max_motion_prev = local_max_motion_prev;
+  }
+  if (all(equal(unpack_payload(payload_next), gl_LocalInvocationID.xy))) {
+    max_motion_next = local_max_motion_next;
+  }
+  barrier();
+
+  if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+    ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+    imageStore(out_tiles_img, tile_co, vec4(max_motion_prev, max_motion_next));
+  }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
new file mode 100644
index 00000000000..a7329f77181
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
@@ -0,0 +1,221 @@
+
+/**
+ * Perform two gather blur in the 2 motion blur directions
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * With modification from the presentation:
+ * Next Generation Post Processing in Call of Duty Advanced Warfare
+ * by Jorge Jimenez
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+const int gather_sample_count = 8;
+
+/* Converts uv velocity into pixel space. Assumes velocity_tx is the same resolution as the
+ * target post-fx framebuffer. */
+vec4 motion_blur_sample_velocity(sampler2D velocity_tx, vec2 uv)
+{
+  /* We can load velocity without velocity_resolve() since we resovled during the flatten pass. */
+  vec4 velocity = velocity_unpack(texture(velocity_tx, uv));
+  return velocity * vec2(textureSize(velocity_tx, 0)).xyxy * motion_blur_buf.motion_scale.xxyy;
+}
+
+vec2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length)
+{
+  return saturate(vec2(center_motion_length, sample_motion_length) - offset_length + 1.0);
+}
+
+vec2 depth_compare(float center_depth, float sample_depth)
+{
+  vec2 depth_scale = vec2(-motion_blur_buf.depth_scale, motion_blur_buf.depth_scale);
+  return saturate(0.5 + depth_scale * (sample_depth - center_depth));
+}
+
+/* Kill contribution if not going the same direction. */
+float dir_compare(vec2 offset, vec2 sample_motion, float sample_motion_length)
+{
+  if (sample_motion_length < 0.5) {
+    return 1.0;
+  }
+  return (dot(offset, sample_motion) > 0.0) ? 1.0 : 0.0;
+}
+
+/* Return background (x) and foreground (y) weights. */
+vec2 sample_weights(float center_depth,
+                    float sample_depth,
+                    float center_motion_length,
+                    float sample_motion_length,
+                    float offset_length)
+{
+  /* Classify foreground/background. */
+  vec2 depth_weight = depth_compare(center_depth, sample_depth);
+  /* Weight if sample is overlapping or under the center pixel. */
+  vec2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
+  return depth_weight * spread_weight;
+}
+
+struct Accumulator {
+  vec4 fg;
+  vec4 bg;
+  /** x: Background, y: Foreground, z: dir. */
+  vec3 weight;
+};
+
+void gather_sample(vec2 screen_uv,
+                   float center_depth,
+                   float center_motion_len,
+                   vec2 offset,
+                   float offset_len,
+                   const bool next,
+                   inout Accumulator accum)
+{
+  vec2 sample_uv = screen_uv - offset * motion_blur_buf.target_size_inv;
+  vec4 sample_vectors = motion_blur_sample_velocity(velocity_tx, sample_uv);
+  vec2 sample_motion = (next) ? sample_vectors.zw : sample_vectors.xy;
+  float sample_motion_len = length(sample_motion);
+  float sample_depth = texture(depth_tx, sample_uv).r;
+  vec4 sample_color = textureLod(in_color_tx, sample_uv, 0.0);
+
+  sample_depth = get_view_z_from_depth(sample_depth);
+
+  vec3 weights;
+  weights.xy = sample_weights(
+      center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
+  weights.z = dir_compare(offset, sample_motion, sample_motion_len);
+  weights.xy *= weights.z;
+
+  accum.fg += sample_color * weights.y;
+  accum.bg += sample_color * weights.x;
+  accum.weight += weights;
+}
+
+void gather_blur(vec2 screen_uv,
+                 vec2 center_motion,
+                 float center_depth,
+                 vec2 max_motion,
+                 float ofs,
+                 const bool next,
+                 inout Accumulator accum)
+{
+  float center_motion_len = length(center_motion);
+  float max_motion_len = length(max_motion);
+
+  /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
+   * Fix this by overriding the max_motion. */
+  if (max_motion_len < center_motion_len) {
+    max_motion_len = center_motion_len;
+    max_motion = center_motion;
+  }
+
+  if (max_motion_len < 0.5) {
+    return;
+  }
+
+  int i;
+  float t, inc = 1.0 / float(gather_sample_count);
+  for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+    gather_sample(screen_uv,
+                  center_depth,
+                  center_motion_len,
+                  max_motion * t,
+                  max_motion_len * t,
+                  next,
+                  accum);
+  }
+
+  if (center_motion_len < 0.5) {
+    return;
+  }
+
+  for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+    /* Also sample in center motion direction.
+     * Allow recovering motion where there is conflicting
+     * motion between foreground and background. */
+    gather_sample(screen_uv,
+                  center_depth,
+                  center_motion_len,
+                  center_motion * t,
+                  center_motion_len * t,
+                  next,
+                  accum);
+  }
+}
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(depth_tx, 0).xy);
+
+  if (!in_texture_range(texel, depth_tx)) {
+    return;
+  }
+
+  /* Data of the center pixel of the gather (target). */
+  float center_depth = get_view_z_from_depth(texelFetch(depth_tx, texel, 0).r);
+  vec4 center_motion = motion_blur_sample_velocity(velocity_tx, uv);
+
+  vec4 center_color = textureLod(in_color_tx, uv, 0.0);
+
+  float noise_offset = sampling_rng_1D_get(SAMPLING_TIME);
+  /** TODO(fclem) Blue noise. */
+  vec2 rand = vec2(interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 0, noise_offset),
+                   interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 1, noise_offset));
+
+  /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
+   * Note this randomize only in one direction but in practice it's enough. */
+  rand.x = rand.x * 2.0 - 1.0;
+  ivec2 tile = (texel + ivec2(rand.x * float(MOTION_BLUR_TILE_SIZE) * 0.25)) /
+               MOTION_BLUR_TILE_SIZE;
+  tile = clamp(tile, ivec2(0), imageSize(in_tiles_img) - 1);
+  /* NOTE: Tile velocity is already in pixel space and with correct zw sign. */
+  vec4 max_motion;
+  /* Load dilation result from the indirection table. */
+  ivec2 tile_prev;
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
+  max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
+  ivec2 tile_next;
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
+  max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
+
+  Accumulator accum;
+  accum.weight = vec3(0.0, 0.0, 1.0);
+  accum.bg = vec4(0.0);
+  accum.fg = vec4(0.0);
+  /* First linear gather. time = [T - delta, T] */
+  gather_blur(uv, center_motion.xy, center_depth, max_motion.xy, rand.y, false, accum);
+  /* Second linear gather. time = [T, T + delta] */
+  gather_blur(uv, center_motion.zw, center_depth, max_motion.zw, rand.y, true, accum);
+
+#if 1 /* Own addition. Not present in reference implementation. */
+  /* Avoid division by 0.0. */
+  float w = 1.0 / (50.0 * float(gather_sample_count) * 4.0);
+  accum.bg += center_color * w;
+  accum.weight.x += w;
+  /* NOTE: In Jimenez's presentation, they used center sample.
+   * We use background color as it contains more information for foreground
+   * elements that have not enough weights.
+   * Yield better blur in complex motion. */
+  center_color = accum.bg / accum.weight.x;
+#endif
+  /* Merge background. */
+  accum.fg += accum.bg;
+  accum.weight.y += accum.weight.x;
+  /* Balance accumulation for failed samples.
+   * We replace the missing foreground by the background. */
+  float blend_fac = saturate(1.0 - accum.weight.y / accum.weight.z);
+  vec4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
+
+#if 0 /* For debugging. */
+  out_color.rgb = out_color.ggg;
+  out_color.rg += max_motion.xy;
+#endif
+
+  imageStore(out_color_img, texel, out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
new file mode 100644
index 00000000000..436fd01795a
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
@@ -0,0 +1,48 @@
+
+
+/* -------------------------------------------------------------------- */
+/** \name Tile indirection packing
+ * \{ */
+
+#define MotionPayload uint
+
+/* Store velocity magnitude in the MSB to be able to use it with atomicMax operations. */
+MotionPayload motion_blur_tile_indirection_pack_payload(vec2 motion, uvec2 payload)
+{
+  /* NOTE: Clamp to 16383 pixel velocity. After that, it is tile position that determine the tile
+   * to dilate over. */
+  uint velocity = min(uint(ceil(length(motion))), 0x3FFFu);
+  /* Designed for 512x512 tiles max. */
+  return (velocity << 18u) | ((payload.x & 0x1FFu) << 9u) | (payload.y & 0x1FFu);
+}
+
+/* Return thread index. */
+ivec2 motion_blur_tile_indirection_pack_payload(uint data)
+{
+  return ivec2((data >> 9u) & 0x1FFu, data & 0x1FFu);
+}
+
+uint motion_blur_tile_indirection_index(uint motion_step, uvec2 tile)
+{
+  uint index = tile.x;
+  index += tile.y * MOTION_BLUR_MAX_TILE;
+  index += motion_step * MOTION_BLUR_MAX_TILE * MOTION_BLUR_MAX_TILE;
+  return index;
+}
+
+#define MOTION_PREV 0u
+#define MOTION_NEXT 1u
+
+#define motion_blur_tile_indirection_store(table_, step_, tile, payload_) \
+  if (true) { \
+    uint index = motion_blur_tile_indirection_index(step_, tile); \
+    atomicMax(table_[index], payload_); \
+  }
+
+#define motion_blur_tile_indirection_load(table_, step_, tile_, result_) \
+  if (true) { \
+    uint index = motion_blur_tile_indirection_index(step_, tile_); \
+    result_ = motion_blur_tile_indirection_pack_payload(table_[index]); \
+  }
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
new file mode 100644
index 00000000000..0c7bbaa9dc2
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
@@ -0,0 +1,104 @@
+
+/**
+ * Sampling data accessors and random number generators.
+ * Also contains some sample mapping functions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling data.
+ *
+ * Return a random values from Low Discrepency Sequence in [0..1) range.
+ * This value is uniform (constant) for the whole scene sample.
+ * You might want to couple it with a noise function.
+ * \{ */
+
+#ifdef EEVEE_SAMPLING_DATA
+
+float sampling_rng_1D_get(const eSamplingDimension dimension)
+{
+  return sampling_buf.dimensions[dimension];
+}
+
+vec2 sampling_rng_2D_get(const eSamplingDimension dimension)
+{
+  return vec2(sampling_buf.dimensions[dimension], sampling_buf.dimensions[dimension + 1u]);
+}
+
+vec3 sampling_rng_3D_get(const eSamplingDimension dimension)
+{
+  return vec3(sampling_buf.dimensions[dimension],
+              sampling_buf.dimensions[dimension + 1u],
+              sampling_buf.dimensions[dimension + 2u]);
+}
+
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Random Number Generators.
+ * \{ */
+
+/* Interlieved gradient noise by Jorge Jimenez
+ * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+ * Seeding found by Epic Game. */
+float interlieved_gradient_noise(vec2 pixel, float seed, float offset)
+{
+  pixel += seed * (vec2(47, 17) * 0.695);
+  return fract(offset + 52.9829189 * fract(0.06711056 * pixel.x + 0.00583715 * pixel.y));
+}
+
+/* From: http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html */
+float van_der_corput_radical_inverse(uint bits)
+{
+#if 0 /* Reference */
+  bits = (bits << 16u) | (bits >> 16u);
+  bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+  bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+  bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+  bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+#else
+  bits = bitfieldReverse(bits);
+#endif
+  /* Same as dividing by 0x100000000. */
+  return float(bits) * 2.3283064365386963e-10;
+}
+
+vec2 hammersley_2d(float i, float sample_count)
+{
+  vec2 rand;
+  rand.x = i / sample_count;
+  rand.y = van_der_corput_radical_inverse(uint(i));
+  return rand;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Distribution mapping.
+ *
+ * Functions mapping input random numbers to sampling shapes (i.e: hemisphere).
+ * \{ */
+
+/* Given 2 random number in [0..1] range, return a random unit disk sample. */
+vec2 sample_disk(vec2 noise)
+{
+  float angle = noise.x * M_2PI;
+  return vec2(cos(angle), sin(angle)) * sqrt(noise.y);
+}
+
+/* This transform a 2d random sample (in [0..1] range) to a sample located on a cylinder of the
+ * same range. This is because the sampling functions expect such a random sample which is
+ * normally precomputed. */
+vec3 sample_cylinder(vec2 rand)
+{
+  float theta = rand.x;
+  float phi = (rand.y - 0.5) * M_2PI;
+  float cos_phi = cos(phi);
+  float sin_phi = sqrt(1.0 - sqr(cos_phi)) * sign(phi);
+  return vec3(theta, cos_phi, sin_phi);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
index 34ea288852a..bd32215ddc2 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
@@ -73,7 +73,7 @@ void main()
 
   nodetree_surface();
 
-  // float noise_offset = sampling_rng_1D_get(sampling_buf, SAMPLING_TRANSPARENCY);
+  // float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY);
   float noise_offset = 0.5;
   float random_threshold = hashed_alpha_threshold(1.0, noise_offset, g_data.P);
 
@@ -84,7 +84,7 @@ void main()
 #endif
 
 #ifdef MAT_VELOCITY
-  out_velocity = velocity_surface(interp.P + motion.prev, interp.P, interp.P - motion.next);
+  out_velocity = velocity_surface(interp.P + motion.prev, interp.P, interp.P + motion.next);
   out_velocity = velocity_pack(out_velocity);
 #endif
 }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
index c21456b7a5c..c0a5b976810 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
@@ -2,8 +2,6 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
 
-#ifdef VELOCITY_CAMERA
-
 vec4 velocity_pack(vec4 data)
 {
   return data * 0.01;
@@ -14,6 +12,8 @@ vec4 velocity_unpack(vec4 data)
   return data * 100.0;
 }
 
+#ifdef VELOCITY_CAMERA
+
 /**
  * Given a triple of position, compute the previous and next motion vectors.
  * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy).
@@ -24,7 +24,15 @@ vec4 velocity_surface(vec3 P_prv, vec3 P, vec3 P_nxt)
   vec2 prev_uv = project_point(camera_prev.persmat, P_prv).xy;
   vec2 curr_uv = project_point(camera_curr.persmat, P).xy;
   vec2 next_uv = project_point(camera_next.persmat, P_nxt).xy;
-
+  /* Fix issue with perspective division. */
+  if (any(isnan(prev_uv))) {
+    prev_uv = curr_uv;
+  }
+  if (any(isnan(next_uv))) {
+    next_uv = curr_uv;
+  }
+  /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+   * with rgrg swizzle in viewport. */
   vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
   /* Convert NDC velocity to UV velocity */
   motion *= 0.5;
@@ -45,7 +53,8 @@ vec4 velocity_background(vec3 vV)
   vec2 prev_uv = project_point(camera_prev.winmat, V).xy;
   vec2 curr_uv = project_point(camera_curr.winmat, V).xy;
   vec2 next_uv = project_point(camera_next.winmat, V).xy;
-
+  /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+   * with rgrg swizzle in viewport. */
   vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
   /* Convert NDC velocity to UV velocity */
   motion *= 0.5;
@@ -53,15 +62,8 @@ vec4 velocity_background(vec3 vV)
   return motion;
 }
 
-/**
- * Load and resolve correct velocity as some pixels might still not have correct
- * motion data for performance reasons.
- */
-vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth)
+vec4 velocity_resolve(vec4 vector, vec2 uv, float depth)
 {
-  vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(vector_tx, 0).xy);
-  vec4 vector = texelFetch(vector_tx, texel, 0);
-
   if (vector.x == VELOCITY_INVALID) {
     bool is_background = (depth == 1.0);
     if (is_background) {
@@ -78,6 +80,18 @@ vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth)
   return velocity_unpack(vector);
 }
 
+/**
+ * Load and resolve correct velocity as some pixels might still not have correct
+ * motion data for performance reasons.
+ * Returns motion vector in render UV space.
+ */
+vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth)
+{
+  vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(vector_tx, 0).xy);
+  vec4 vector = texelFetch(vector_tx, texel, 0);
+  return velocity_resolve(vector, uv, depth);
+}
+
 #endif
 
 #ifdef MAT_VELOCITY
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
index 2368061402c..db3cfc4a7a2 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
@@ -12,8 +12,9 @@ GPU_SHADER_CREATE_INFO(eevee_shared)
     .typedef_source("eevee_shader_shared.hh");
 
 GPU_SHADER_CREATE_INFO(eevee_sampling_data)
+    .define("EEVEE_SAMPLING_DATA")
     .additional_info("eevee_shared")
-    .uniform_buf(14, "SamplingData", "sampling_buf");
+    .storage_buf(14, Qualifier::READ, "SamplingData", "sampling_buf");
 
 /** \} */
 
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
new file mode 100644
index 00000000000..b01d1521c5e
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
@@ -0,0 +1,44 @@
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+    .uniform_buf(4, "MotionBlurData", "motion_blur_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_img")
+    .compute_source("eevee_motion_blur_flatten_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_viewport)
+    .do_static_compilation(true)
+    .define("FLATTEN_VIEWPORT")
+    .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+    .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_render)
+    .do_static_compilation(true)
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+    .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_dilate)
+    .do_static_compilation(true)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared")
+    /* NOTE: See MotionBlurTileIndirection. */
+    .storage_buf(0, Qualifier::READ_WRITE, "uint", "tile_indirection_buf[]")
+    .image(1, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+    .compute_source("eevee_motion_blur_dilate_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_gather)
+    .do_static_compilation(true)
+    .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+    .additional_info("eevee_shared", "draw_view", "eevee_sampling_data")
+    .uniform_buf(4, "MotionBlurData", "motion_blur_buf")
+    .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+    .sampler(1, ImageType::FLOAT_2D, "velocity_tx")
+    .sampler(2, ImageType::FLOAT_2D, "in_color_tx")
+    /* NOTE: See MotionBlurTileIndirection. */
+    .storage_buf(0, Qualifier::READ, "uint", "tile_indirection_buf[]")
+    .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+    .compute_source("eevee_motion_blur_gather_comp.glsl");