1 files changed, 405 insertions, 12 deletions
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index 819f49756d7..f6a96aaaff2 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -12,22 +12,75 @@
 #  include "BLI_memory_utils.hh"
 #  include "DRW_gpu_wrapper.hh"
 
+#  include "draw_manager.hh"
+#  include "draw_pass.hh"
+
 #  include "eevee_defines.hh"
 
 #  include "GPU_shader_shared.h"
 
 namespace blender::eevee {
 
-using draw::Framebuffer;
-using draw::SwapChain;
-using draw::Texture;
-using draw::TextureFromPool;
+using namespace draw;
+
+constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
+constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
 
 #endif
 
 #define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14
 
 /* -------------------------------------------------------------------- */
+/** \name Debug Mode
+ * \{ */
+
+/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */
+enum eDebugMode : uint32_t {
+  DEBUG_NONE = 0u,
+  /**
+   * Gradient showing light evaluation hot-spots.
+   */
+  DEBUG_LIGHT_CULLING = 1u,
+  /**
+   * Show incorrectly downsample tiles in red.
+   */
+  DEBUG_HIZ_VALIDATION = 2u,
+  /**
+   * Tile-maps to screen. Is also present in other modes.
+   * - Black pixels, no pages allocated.
+   * - Green pixels, pages cached.
+   * - Red pixels, pages allocated.
+   */
+  DEBUG_SHADOW_TILEMAPS = 10u,
+  /**
+   * Random color per pages. Validates page density allocation and sampling.
+   */
+  DEBUG_SHADOW_PAGES = 11u,
+  /**
+   * Outputs random color per tile-map (or tile-map level). Validates tile-maps coverage.
+   * Black means not covered by any tile-maps LOD of the shadow.
+   */
+  DEBUG_SHADOW_LOD = 12u,
+  /**
+   * Outputs white pixels for pages allocated and black pixels for unused pages.
+   * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work.
+   */
+  DEBUG_SHADOW_PAGE_ALLOCATION = 13u,
+  /**
+   * Outputs the tile-map atlas. Default tile-map is too big for the usual screen resolution.
+   * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+   */
+  DEBUG_SHADOW_TILE_ALLOCATION = 14u,
+  /**
+   * Visualize linear depth stored in the atlas regions of the active light.
+   * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+   */
+  DEBUG_SHADOW_SHADOW_DEPTH = 15u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Sampling
  * \{ */
 
@@ -110,7 +163,7 @@ struct CameraData {
   float4x4 viewinv;
   float4x4 winmat;
   float4x4 wininv;
-  /** Camera UV scale and bias. Also known as `viewcamtexcofac`. */
+  /** Camera UV scale and bias. */
   float2 uv_scale;
   float2 uv_bias;
   /** Panorama parameters. */
@@ -124,7 +177,7 @@ struct CameraData {
   float clip_far;
   eCameraType type;
 
-  bool initialized;
+  bool1 initialized;
 
 #ifdef __cplusplus
   /* Small constructor to allow detecting new buffers. */
@@ -141,6 +194,17 @@ BLI_STATIC_ASSERT_ALIGN(CameraData, 16)
 
 #define FILM_PRECOMP_SAMPLE_MAX 16
 
+enum eFilmWeightLayerIndex : uint32_t {
+  FILM_WEIGHT_LAYER_ACCUMULATION = 0u,
+  FILM_WEIGHT_LAYER_DISTANCE = 1u,
+};
+
+enum ePassStorageType : uint32_t {
+  PASS_STORAGE_COLOR = 0u,
+  PASS_STORAGE_VALUE = 1u,
+  PASS_STORAGE_CRYPTOMATTE = 2u,
+};
+
 struct FilmSample {
   int2 texel;
   float weight;
@@ -154,6 +218,8 @@ struct FilmData {
   int2 extent;
   /** Offset of the film in the full-res frame, in pixels. */
   int2 offset;
+  /** Extent used by the render buffers when rendering the main views. */
+  int2 render_extent;
   /** Sub-pixel offset applied to the window matrix.
    * NOTE: In final film pixel unit.
    * NOTE: Positive values makes the view translate in the negative axes direction.
@@ -172,6 +238,9 @@ struct FilmData {
   /** Is true if accumulation of filtered passes is needed. */
   bool1 any_render_pass_1;
   bool1 any_render_pass_2;
+  /** Controlled by user in lookdev mode or by render settings. */
+  float background_opacity;
+  float _pad0;
   /** Output counts per type. */
   int color_len, value_len;
   /** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */
@@ -192,13 +261,19 @@ struct FilmData {
   int combined_id;
   /** Id of the render-pass to be displayed. -1 for combined. */
   int display_id;
-  /** True if the render-pass to be displayed is from the value accum buffer. */
-  bool1 display_is_value;
+  /** Storage type of the render-pass to be displayed. */
+  ePassStorageType display_storage_type;
   /** True if we bypass the accumulation and directly output the accumulation buffer. */
   bool1 display_only;
   /** Start of AOVs and number of aov. */
   int aov_color_id, aov_color_len;
   int aov_value_id, aov_value_len;
+  /** Start of cryptomatte per layer (-1 if pass is not enabled). */
+  int cryptomatte_object_id;
+  int cryptomatte_asset_id;
+  int cryptomatte_material_id;
+  /** Max number of samples stored per layer (is even number). */
+  int cryptomatte_samples_len;
   /** Settings to render mist pass */
   float mist_scale, mist_bias, mist_exponent;
   /** Scene exposure used for better noise reduction. */
@@ -206,7 +281,7 @@ struct FilmData {
   /** Scaling factor for scaled resolution rendering. */
   int scaling_factor;
   /** Film pixel filter radius. */
-  float filter_size;
+  float filter_radius;
   /** Precomputed samples. First in the table is the closest one. The rest is unordered. */
   int samples_len;
   /** Sum of the weights of all samples in the sample table. */
@@ -215,17 +290,17 @@ struct FilmData {
 };
 BLI_STATIC_ASSERT_ALIGN(FilmData, 16)
 
-static inline float film_filter_weight(float filter_size, float sample_distance_sqr)
+static inline float film_filter_weight(float filter_radius, float sample_distance_sqr)
 {
 #if 1 /* Faster */
   /* Gaussian fitted to Blackman-Harris. */
-  float r = sample_distance_sqr / (filter_size * filter_size);
+  float r = sample_distance_sqr / (filter_radius * filter_radius);
   const float sigma = 0.284;
   const float fac = -0.5 / (sigma * sigma);
   float weight = expf(fac * r);
 #else
   /* Blackman-Harris filter. */
-  float r = M_2PI * saturate(0.5 + sqrtf(sample_distance_sqr) / (2.0 * filter_size));
+  float r = M_2PI * saturate(0.5 + sqrtf(sample_distance_sqr) / (2.0 * filter_radius));
   float weight = 0.35875 - 0.48829 * cosf(r) + 0.14128 * cosf(2.0 * r) - 0.01168 * cosf(3.0 * r);
 #endif
   return weight;
@@ -234,6 +309,17 @@ static inline float film_filter_weight(float filter_size, float sample_distance_
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Render passes
+ * \{ */
+
+enum eRenderPassLayerIndex : uint32_t {
+  RENDER_PASS_LAYER_DIFFUSE_LIGHT = 0u,
+  RENDER_PASS_LAYER_SPECULAR_LIGHT = 1u,
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Arbitrary Output Variables
  * \{ */
 
@@ -307,6 +393,272 @@ BLI_STATIC_ASSERT_ALIGN(VelocityGeometryIndex, 16)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Motion Blur
+ * \{ */
+
+#define MOTION_BLUR_TILE_SIZE 32
+#define MOTION_BLUR_MAX_TILE 512 /* 16384 / MOTION_BLUR_TILE_SIZE */
+struct MotionBlurData {
+  /** As the name suggests. Used to avoid a division in the sampling. */
+  float2 target_size_inv;
+  /** Viewport motion scaling factor. Make blur relative to frame time not render time. */
+  float2 motion_scale;
+  /** Depth scaling factor. Avoid blurring background behind moving objects. */
+  float depth_scale;
+
+  float _pad0, _pad1, _pad2;
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16)
+
+/* For some reasons some GLSL compilers do not like this struct.
+ * So we declare it as a uint array instead and do indexing ourselves. */
+#ifdef __cplusplus
+struct MotionBlurTileIndirection {
+  /**
+   * Stores indirection to the tile with the highest velocity covering each tile.
+   * This is stored using velocity in the MSB to be able to use atomicMax operations.
+   */
+  uint prev[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+  uint next[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16)
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+/* 5% error threshold. */
+#define DOF_FAST_GATHER_COC_ERROR 0.05
+#define DOF_GATHER_RING_COUNT 5
+#define DOF_DILATE_RING_COUNT 3
+
+struct DepthOfFieldData {
+  /** Size of the render targets for gather & scatter passes. */
+  int2 extent;
+  /** Size of a pixel in uv space (1.0 / extent). */
+  float2 texel_size;
+  /** Scale factor for anisotropic bokeh. */
+  float2 bokeh_anisotropic_scale;
+  float2 bokeh_anisotropic_scale_inv;
+  /* Correction factor to align main target pixels with the filtered mipmap chain texture. */
+  float2 gather_uv_fac;
+  /** Scatter parameters. */
+  float scatter_coc_threshold;
+  float scatter_color_threshold;
+  float scatter_neighbor_max_color;
+  int scatter_sprite_per_row;
+  /** Number of side the bokeh shape has. */
+  float bokeh_blades;
+  /** Rotation of the bokeh shape. */
+  float bokeh_rotation;
+  /** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */
+  float coc_mul, coc_bias;
+  /** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */
+  float coc_abs_max;
+  /** Copy of camera type. */
+  eCameraType camera_type;
+  /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */
+  float4 filter_samples_weight;
+  float filter_center_weight;
+  /** Max number of sprite in the scatter pass for each ground. */
+  int scatter_max_rect;
+
+  int _pad0, _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16)
+
+struct ScatterRect {
+  /** Color and CoC of the 4 pixels the scatter sprite represents. */
+  float4 color_and_coc[4];
+  /** Rect center position in half pixel space. */
+  float2 offset;
+  /** Rect half extent in half pixel space. */
+  float2 half_extent;
+};
+BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16)
+
+/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */
+#ifndef M_PI
+#  define EEVEE_PI
+#  define M_PI 3.14159265358979323846 /* pi */
+#endif
+
+static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth)
+{
+  depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth;
+  return dof.coc_mul * depth + dof.coc_bias;
+}
+
+static inline float regular_polygon_side_length(float sides_count)
+{
+  return 2.0f * sinf(M_PI / sides_count);
+}
+
+/* Returns intersection ratio between the radius edge at theta and the regular polygon edge.
+ * Start first corners at theta == 0. */
+static inline float circle_to_polygon_radius(float sides_count, float theta)
+{
+  /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide
+   * 36). */
+  float side_angle = (2.0f * M_PI) / sides_count;
+  return cosf(side_angle * 0.5f) /
+         cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI)));
+}
+
+/* Remap input angle to have homogenous spacing of points along a polygon edge.
+ * Expects theta to be in [0..2pi] range. */
+static inline float circle_to_polygon_angle(float sides_count, float theta)
+{
+  float side_angle = (2.0f * M_PI) / sides_count;
+  float halfside_angle = side_angle * 0.5f;
+  float side = floorf(theta / side_angle);
+  /* Length of segment from center to the middle of polygon side. */
+  float adjacent = circle_to_polygon_radius(sides_count, 0.0f);
+
+  /* This is the relative position of the sample on the polygon half side. */
+  float local_theta = theta - side * side_angle;
+  float ratio = (local_theta - halfside_angle) / halfside_angle;
+
+  float halfside_len = regular_polygon_side_length(sides_count) * 0.5f;
+  float opposite = ratio * halfside_len;
+
+  /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */
+  float final_local_theta = atanf(opposite / adjacent);
+
+  return side * side_angle + final_local_theta;
+}
+
+#ifdef EEVEE_PI
+#  undef M_PI
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Culling
+ * \{ */
+
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */
+#define CULLING_ZBIN_COUNT 4096
+/* Max tile map resolution per axes. */
+#define CULLING_TILE_RES 16
+
+struct LightCullingData {
+  /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+  float2 tile_to_uv_fac;
+  /** Scale and bias applied to linear Z to get zbin. */
+  float zbin_scale;
+  float zbin_bias;
+  /** Valid item count in the source data array. */
+  uint items_count;
+  /** Items that are processed by the 2.5D culling. */
+  uint local_lights_len;
+  /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */
+  uint sun_lights_len;
+  /** Number of items that passes the first culling test. (local lights only) */
+  uint visible_count;
+  /** Extent of one square tile in pixels. */
+  float tile_size;
+  /** Number of tiles on the X/Y axis. */
+  uint tile_x_len;
+  uint tile_y_len;
+  /** Number of word per tile. Depends on the maximum number of lights. */
+  uint tile_word_len;
+};
+BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Lights
+ * \{ */
+
+#define LIGHT_NO_SHADOW -1
+
+enum eLightType : uint32_t {
+  LIGHT_SUN = 0u,
+  LIGHT_POINT = 1u,
+  LIGHT_SPOT = 2u,
+  LIGHT_RECT = 3u,
+  LIGHT_ELLIPSE = 4u
+};
+
+static inline bool is_area_light(eLightType type)
+{
+  return type >= LIGHT_RECT;
+}
+
+struct LightData {
+  /** Normalized object matrix. Last column contains data accessible using the following macros. */
+  float4x4 object_mat;
+  /** Packed data in the last column of the object_mat. */
+#define _area_size_x object_mat[0][3]
+#define _area_size_y object_mat[1][3]
+#define _radius _area_size_x
+#define _spot_mul object_mat[2][3]
+#define _spot_bias object_mat[3][3]
+  /** Aliases for axes. */
+#ifndef USE_GPU_SHADER_CREATE_INFO
+#  define _right object_mat[0]
+#  define _up object_mat[1]
+#  define _back object_mat[2]
+#  define _position object_mat[3]
+#else
+#  define _right object_mat[0].xyz
+#  define _up object_mat[1].xyz
+#  define _back object_mat[2].xyz
+#  define _position object_mat[3].xyz
+#endif
+  /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */
+  float influence_radius_invsqr_surface;
+  float influence_radius_invsqr_volume;
+  /** Maximum influence radius. Used for culling. */
+  float influence_radius_max;
+  /** Index of the shadow struct on CPU. -1 means no shadow. */
+  int shadow_id;
+  /** NOTE: It is ok to use float3 here. A float is declared right after it.
+   * float3 is also aligned to 16 bytes. */
+  float3 color;
+  /** Power depending on shader type. */
+  float diffuse_power;
+  float specular_power;
+  float volume_power;
+  float transmit_power;
+  /** Special radius factor for point lighting. */
+  float radius_squared;
+  /** Light Type. */
+  eLightType type;
+  /** Spot angle tangent. */
+  float spot_tan;
+  /** Spot size. Aligned to size of float2. */
+  float2 spot_size_inv;
+  /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */
+  // ShadowData shadow_data;
+};
+BLI_STATIC_ASSERT_ALIGN(LightData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z Buffer
+ * \{ */
+
+struct HiZData {
+  /** Scale factor to remove HiZBuffer padding. */
+  float2 uv_scale;
+
+  float2 _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(HiZData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Ray-Tracing
  * \{ */
 
@@ -327,6 +679,34 @@ enum eClosureBits : uint32_t {
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Subsurface
+ * \{ */
+
+#define SSS_SAMPLE_MAX 64
+#define SSS_BURLEY_TRUNCATE 16.0
+#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328
+#define SSS_TRANSMIT_LUT_SIZE 64.0
+#define SSS_TRANSMIT_LUT_RADIUS 1.218
+#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_STEP_RES 64.0
+
+struct SubsurfaceData {
+  /** xy: 2D sample position [-1..1], zw: sample_bounds. */
+  /* NOTE(fclem) Using float4 for alignment. */
+  float4 samples[SSS_SAMPLE_MAX];
+  /** Sample index after which samples are not randomly rotated anymore. */
+  int jitter_threshold;
+  /** Number of samples precomputed in the set. */
+  int sample_len;
+  int _pad0;
+  int _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Utility Texture
  * \{ */
 
@@ -365,11 +745,24 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
 
 using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
 using CameraDataBuf = draw::UniformBuffer<CameraData>;
+using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
+using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
+using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
 using FilmDataBuf = draw::UniformBuffer<FilmData>;
+using HiZDataBuf = draw::UniformBuffer<HiZData>;
+using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>;
+using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
+using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>;
+using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
+using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
+using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
 using SamplingDataBuf = draw::StorageBuffer<SamplingData>;
 using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
 using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
 using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;
+using CryptomatteObjectBuf = draw::StorageArrayBuffer<float2, 16>;
 
 }  // namespace blender::eevee
 #endif