/* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright 2021 Blender Foundation.
 */

/** \file
 * \ingroup eevee
 *
 * The shadow module manages shadow update tagging & shadow rendering.
 */

#pragma once

#include "BLI_vector.hh"

#include "GPU_batch.h"

#include "eevee_allocator.hh"
#include "eevee_id_map.hh"
#include "eevee_material.hh"
#include "eevee_shader.hh"
#include "eevee_shader_shared.hh"

namespace blender::eevee {

/**
 * TODO(fclem): Future plans
 * The start of the implementation was done on CPU with the constraints of UBO limits and no
 * compute capabilities in mind.
 * But after removing this limit this left the door open for a full GPU driven pipeline of
 * shadow and light management where the CPU would only push Objects updates and manage buffer
 * grow/shrink behaviors. The GPU would then do what ShadowTileAllocator, ShadowPunctual and
 * ShadowDirectional classes are doing.
 * We still need to find a way to issue the shadow render passes at once and cull objects per view
 * on GPU.
 */

class Instance;
class ShadowModule;

/** World space axis aligned bounding box. */
struct AABB {
  /**
   * TODO(fclem) There is padding to match the std430 layout requirement inside shaders storage.
   * The goal would be to send the Oriented Bound Box for better culling.
   */
  float3 min;
  float _pad0;
  float3 max;
  float _pad1;

  AABB() = default;
  AABB(float val) : min(-val), max(val){};
  AABB(float3 min_, float3 max_) : min(min_), max(max_){};
  AABB(Object *ob)
  {
    init_min_max();
    BoundBox *bb = BKE_object_boundbox_get(ob);
    for (int i = 0; i < 8; i++) {
      float vec[3];
      copy_v3_v3(vec, bb->vec[i]);
      mul_m4_v3(ob->obmat, vec);
      minmax_v3v3_v3(min, max, vec);
    }
  }

  void debug_draw(void)
  {
    BoundBox bb = *this;
    float4 color = {1, 0, 0, 1};
    DRW_debug_bbox(&bb, color);
  }

  float3 center(void) const
  {
    return math::midpoint(min, max);
  }

  void init_min_max(void)
  {
    INIT_MINMAX(min, max);
  }

  void merge(const AABB &a)
  {
    DO_MIN(a.min, min);
    DO_MAX(a.max, max);
  }

  void merge(const float3 &a)
  {
    DO_MIN(a, min);
    DO_MAX(a, max);
  }

  void merge(const BoundBox &bbox)
  {
    for (auto i : IndexRange(ARRAY_SIZE(bbox.vec))) {
      merge(*reinterpret_cast<const float3 *>(bbox.vec[i]));
    }
  }

  /* Transform an AABB into another space.
   * Returns the AABB inside the new space (so equal or bigger). */
  friend AABB operator*(const float4x4 &m, const AABB &aabb)
  {
    BoundBox bbox = aabb;
    AABB result;
    result.init_min_max();
    for (auto i : IndexRange(ARRAY_SIZE(bbox.vec))) {
      result.merge(m * float3(bbox.vec[i]));
    }
    return result;
  }

  static AABB intersect(const AABB &a, const AABB &b)
  {
    AABB result;
    result.min = math::max(a.min, b.min);
    result.max = math::min(a.max, b.max);
    return result;
  }

  float radius(void) const
  {
    return math::length(max - min) / 2.0f;
  }

  float3 extent() const
  {
    return math::max(float3(0.0f), max - min);
  }

  bool is_empty() const
  {
    return (min.x >= max.x) || (min.y >= max.y) || (min.z >= max.z);
  }

  operator BoundBox() const
  {
    float3 middle = center();
    float3 halfdim = max - middle;
    BoundBox bb;
    *reinterpret_cast<float3 *>(bb.vec[0]) = middle + halfdim * float3(1, 1, 1);
    *reinterpret_cast<float3 *>(bb.vec[1]) = middle + halfdim * float3(-1, 1, 1);
    *reinterpret_cast<float3 *>(bb.vec[2]) = middle + halfdim * float3(-1, -1, 1);
    *reinterpret_cast<float3 *>(bb.vec[3]) = middle + halfdim * float3(1, -1, 1);
    *reinterpret_cast<float3 *>(bb.vec[4]) = middle + halfdim * float3(1, 1, -1);
    *reinterpret_cast<float3 *>(bb.vec[5]) = middle + halfdim * float3(-1, 1, -1);
    *reinterpret_cast<float3 *>(bb.vec[6]) = middle + halfdim * float3(-1, -1, -1);
    *reinterpret_cast<float3 *>(bb.vec[7]) = middle + halfdim * float3(1, -1, -1);
    return bb;
  }
};

/* -------------------------------------------------------------------- */
/** \name Shadow
 *
 * \{ */

/* To be applied after viewmatrix. */
constexpr static const float shadow_face_mat[6][4][4] = {
    {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}},   /* Z_NEG */
    {{0, 0, -1, 0}, {-1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}}, /* X_POS */
    {{0, 0, 1, 0}, {1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}},   /* X_NEG */
    {{1, 0, 0, 0}, {0, 0, -1, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}},  /* Y_POS */
    {{-1, 0, 0, 0}, {0, 0, 1, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}},  /* Y_NEG */
    {{1, 0, 0, 0}, {0, -1, 0, 0}, {0, 0, -1, 0}, {0, 0, 0, 1}}, /* Z_POS */
};

/* Converts to [-SHADOW_TILEMAP_RES / 2..SHADOW_TILEMAP_RES / 2] for XY and [0..1] for Z. */
constexpr static const float shadow_clipmap_scale_mat[4][4] = {{SHADOW_TILEMAP_RES / 2, 0, 0, 0},
                                                               {0, SHADOW_TILEMAP_RES / 2, 0, 0},
                                                               {0, 0, 0.5, 0},
                                                               {0, 0, 0.5, 1}};

constexpr static const float tilemat_scale_bias_mat[4][4] = {
    {SHADOW_TILEMAP_RES / 2, 0, 0, 0},
    {0, SHADOW_TILEMAP_RES / 2, 0, 0},
    {0, 0, 1, 0},
    {SHADOW_TILEMAP_RES / 2, SHADOW_TILEMAP_RES / 2, 0, 1}};

enum eCubeFace {
  /* Ordering by culling order. If cone aperture is shallow, we cull the later view. */
  Z_NEG = 0,
  X_POS,
  X_NEG,
  Y_POS,
  Y_NEG,
  Z_POS,
};

/**
 * Stores indirection table and states of each tile of a virtual shadowmap clipmap level.
 * One tilemap has the effective resolution of `pagesize * tile_map_resolution` .
 * Each tilemap overhead is quite small if they do not have any pages allocated.
 */
struct ShadowTileMap : public ShadowTileMapData {
  static constexpr int64_t tile_map_resolution = SHADOW_TILEMAP_RES;
  static constexpr int64_t tiles_count = tile_map_resolution * tile_map_resolution;
  /**
   * Maximum "bounding" angle of a tile inside a cubemap.
   * Half the diagonal of tile since we test using the tile center.
   */
  static float tile_cone_half_angle;

  /** Level of detail for clipmap. */
  int level = INT_MAX;
  /** Integer offset of the center of the 16x16 tiles from the origin of the tile space. */
  int2 grid_offset = int2(16);
  /** Cube face index. */
  eCubeFace cubeface = Z_NEG;
  /** Cached, used for rendering. */
  float4x4 viewmat, winmat;
  /** Cached, used for detecting updates. */
  float4x4 object_mat;
  /** Near and far clip distances. For clipmap they are updated after sync. */
  float near, far;

 public:
  ShadowTileMap(int64_t _index)
  {
    index = _index;
  };

  void sync_clipmap(const float3 &camera_position,
                    const float4x4 &object_mat_,
                    float near_,
                    float far_,
                    int2 origin_offset,
                    int clipmap_level);
  void sync_cubeface(
      const float4x4 &object_mat, float near, float far, float cone_aperture, eCubeFace face);

  float tilemap_coverage_get(void) const
  {
    /* This function should be kept in sync with shadow_directional_clipmap_level(). */
    /* NOTE(fclem): If we would to introduce a global scaling option it would be here. */
    BLI_assert(!is_cubeface);
    return powf(2.0f, level);
  }

  float tile_size_get(void) const
  {
    return tilemap_coverage_get() / tile_map_resolution;
  }

  float4x4 winmat_get(const rcti *tile_minmax) const;
  void setup_view(const rcti &rect, DRWView *&view) const;
  void debug_draw(void) const;

  /* For external callers. Use this in order to not miss an update. */
  void set_level(int clipmap_level)
  {
    if (level != clipmap_level) {
      level = clipmap_level;
      set_dirty();
    }
  }
  void set_is_cubemap(bool is_cubemap_)
  {
    if (is_cubeface != is_cubemap_) {
      is_cubeface = is_cubemap_;
      set_dirty();
    }
  }

  void set_dirty()
  {
    grid_shift = int2(SHADOW_TILEMAP_RES);
  }

  void set_updated()
  {
    grid_shift = int2(0);
  }
};

struct ShadowCommon {
  /** Tilemap for each cubeface needed (in eCubeFace order) or for each clipmap level. */
  Vector<ShadowTileMap *> tilemaps;
  /** To have access to the tilemap allocator. */
  ShadowModule *shadows_;

  ShadowCommon(ShadowModule *shadows) : shadows_(shadows){};

  void free_resources();
};

class ShadowPunctual : public ShadowCommon {
 private:
  /** Area light size. */
  float size_x_, size_y_;
  /** Shape type. */
  eLightType light_type_;
  /** Random position on the light. In world space. */
  float3 random_offset_;
  /** Light position. */
  float3 position_;
  /** Near and far clip distances. */
  float far_, near_;
  /** View space offset to apply to the shadow. */
  float bias_;

 public:
  ShadowPunctual(ShadowModule *shadows) : ShadowCommon(shadows){};

  void sync(eLightType light_type,
            const float4x4 &object_mat,
            float cone_aperture,
            float near_clip,
            float far_clip,
            float bias);

  operator ShadowData();
};

class ShadowDirectional : public ShadowCommon {
 private:
  /** User minimum resolution. */
  float min_resolution_;
  /** View space offset to apply to the shadow. */
  float bias_;
  /** Near and far clip distances. For clipmap, when they are updated after sync. */
  float near_, far_;
  /** Offset of the lowest clipmap relative to the highest one. */
  int2 base_offset_;
  /** Copy of object matrix. Normalized. */
  float4x4 object_mat_;

 public:
  ShadowDirectional(ShadowModule *shadows) : ShadowCommon(shadows){};

  void sync(const float4x4 &object_mat, float bias, float min_resolution);
  void end_sync(int min_level,
                int max_level,
                const float3 &camera_position,
                const AABB &casters_bounds,
                const BoundBox &casters_visible,
                const Camera &camera);

  operator ShadowData();
};

/** \} */

/* -------------------------------------------------------------------- */
/** \name Shadow Casters & Receivers
 *
 * \{ */

/* Can be either a shadow caster or a shadow receiver. */
struct ShadowObject {
  AABB aabb;

  bool initialized = false;
  bool used;
  bool updated;

  void sync(Object *ob)
  {
    aabb = AABB(ob);
    initialized = true;
    updated = true;
  }
};

/** \} */

/* -------------------------------------------------------------------- */
/** \name ShadowModule
 *
 * Manages shadow atlas and shadow region datas.
 * \{ */

/**
 * Manages the tilemaps and allocates continuous regions to a shadow object.
 * This way indexing is simple and fast inside the shaders.
 * The tilemap atlas has a fixed 64x64 size. So it can contain 4096 tilemap or 16x16 pixels each.
 * We allocate for many tilemaps because we don't want to reallocate the buffer as it would mean
 * trashing the whole cache which it.
 * In the future we could resize and copy old tilemap infos. But for now we KISS.
 */
struct ShadowTileAllocator {
  static constexpr int64_t size = SHADOW_MAX_TILEMAP;
  /** Limit the with of the texture. */
  static constexpr int64_t maps_per_row = SHADOW_TILEMAP_PER_ROW;
  /* TODO(fclem): Do it for real... Use real bitmap. */
  Vector<bool> usage_bitmap_ = Vector<bool>(size);
  /** Circular buffer allocation scheme. This is the last allocated index. */
  int64_t next_index = 0;
  /** Vector containning the actual maps. Unordered. */
  Vector<ShadowTileMap *> maps;
  /** Deleted maps go here to be freed after the next sync. */
  Vector<ShadowTileMap *> maps_deleted;
  /**
   * Tilemap atlas containing mapping to shadow pages inside the atlas texture.
   * All shadow tilemaps are packed into one texture.
   * Contains every clipmaps level of all directional light and each cubeface with mipmap.
   */
  Texture tilemap_tx = {"tilemap_tx"};
  /** Very small texture containing the result of the update pass. */
  /** FIXME(fclem): It would be nice to avoid GPU > CPU readback. */
  Texture tilemap_rects_tx = {"tilemap_rects_tx"};
  /** UBO containing the description for every allocated tilemap. */
  ShadowTileMapDataBuf tilemaps_data;
  /** Number of maps inside the tilemaps_data. */
  int64_t active_maps_len = 0;
  /** Number of maps at the end of tilemaps_data that are being deleted and need clear. */
  int64_t deleted_maps_len = 0;

  ShadowTileAllocator();
  ~ShadowTileAllocator();

  /** Returns empty span on failure. */
  Span<ShadowTileMap *> alloc(int64_t count);

  void free(Vector<ShadowTileMap *> &free_list);

  void end_sync();
};

/**
 * Simple struct here to group all things page related.
 */
struct ShadowVirtualPageManager {
  ShadowVirtualPageManager();
  ~ShadowVirtualPageManager();

  void end_sync();
};

class ShadowModule {
  friend ShadowPunctual;
  friend ShadowDirectional;

  template<typename T> class ShadowAllocator : public IndexedAllocator<T> {
   private:
    ShadowModule &shadows_;

   public:
    ShadowAllocator(ShadowModule &shadows) : shadows_(shadows){};

    int64_t alloc(void)
    {
      return IndexedAllocator<T>::alloc(T(&shadows_));
    }
  };

 public:
  /** Need to be first because of destructor order. */
  ShadowTileAllocator tilemap_allocator;

  ShadowAllocator<ShadowPunctual> punctuals;
  ShadowAllocator<ShadowDirectional> directionals;

 private:
  Instance &inst_;

  /** Map of shadow casters to track deletion & update of intersected shadows. */
  Map<ObjectKey, ShadowObject> objects_;

  /** Used to detect sample change for soft shadows. */
  uint64_t last_sample_ = 0;

  /**
   * TODO(fclem) These should be stored inside the Shadow objects instead.
   * The issues is that only 32 DRWView can have effective culling data with the current
   * implementation. So we try to reduce the number of DRWView allocated to avoid the slow path.
   **/
  DRWView *views_[6] = {nullptr};

  /**
   * Separate render buffer. This is meant to be replace by directly rendering inside the atlas.
   */
  Texture render_tx_ = {"shadow_target_tx_"};
  Framebuffer render_fb_ = {"shadow_fb"};

  /* -------------------------------------------------------------------- */
  /** \name Tilemap Management
   * \{ */

  /**
   * Clear the visibility, usage and request bits.
   * Also shifts the whole tilemap for directional shadow clipmaps.
   */
  DRWPass *tilemap_setup_ps_;
  /** Update passes that will mark all shadow pages from a light to update or as unused. */
  DRWPass *tilemap_visibility_ps_;
  /** Update passes that will mark all shadow pages touching an updated shadow caster. */
  DRWPass *tilemap_update_tag_ps_;
  /** Tag each tile intersecting with a shadow receiver. */
  /* NOTE(fclem): Until we implement depth buffer scanning, we rely solely on this to tag
   * needed tiles. */
  DRWPass *tilemap_usage_tag_ps_;
  /** Use depth buffer to tag needed shadow pages. */
  DRWPass *tilemap_depth_scan_ps_;
  /** Discard pages that are redundant in the mipmap chain. */
  DRWPass *tilemap_lod_mask_ps_;

  /** List of AABBs for tagging passes. */
  DRWCallBuffer *casters_updated_;
  DRWCallBuffer *receivers_non_opaque_;

  int do_tilemap_setup_ = true;
  const DRWView *last_processed_view = nullptr;
  float tilemap_pixel_radius_;
  float screen_pixel_radius_inv_;

  /** \} */

  /* -------------------------------------------------------------------- */
  /** \name Page Management
   * \{ */

  Texture atlas_tx_ = {"shadow_atlas_tx_"};

  /** Pool of unallocated pages waiting to be assigned to specific tiles in the tilemap atlas. */
  ShadowPageHeapBuf pages_free_data_ = {"pages_free_buf"};
  /** Pool of cached tiles waiting to be reused. */
  ShadowPageCacheBuf pages_cached_data_ = {"pages_cached_buf"};
  /** List of tiles that are to be rendered. */
  ShadowPageHeapBuf pages_list_data_ = {"pages_list_buf"};
  /** Infos for book keeping and debug. */
  ShadowPagesInfoDataBuf pages_infos_data_ = {"pages_infos_buf"};

  /** Page buffer clear. This is only done if shadow atlas is reallocated. */
  DRWPass *page_init_ps_;
  /** Defragment the page free array. */
  DRWPass *page_defrag_ps_;
  /** Free pages of deleted tiles. You can think of a garbage collection. */
  DRWPass *page_free_ps_;
  /** Allocate pages for new tiles. */
  DRWPass *page_alloc_ps_;
  /** Create render page list. */
  DRWPass *page_list_ps_;
  /** Clear depth of tiles to render to 1.0 and 0.0 for others. */
  DRWPass *page_mark_ps_;
  /** Copy pages in the copy list. */
  DRWPass *page_copy_ps_;

  bool do_page_init_ = true;
  int3 copy_dispatch_size_;
  int3 scan_dispatch_size_;
  int rendering_tilemap_;
  int rendering_lod_;

  /** \} */

  /* -------------------------------------------------------------------- */
  /** \name Debugging
   * \{ */

  /** Display informations about the virtual shadows. */
  DRWPass *debug_draw_ps_;
  /** Depth input for debug drawing. Reference only. */
  GPUTexture *input_depth_tx_;
  /** Object key used to retreive last active light. The debug info shown are from this light. */
  ObjectKey debug_light_key;
  /** View used for the whole virtual shadow mapping setup. Used to debug culling. */
  DRWView *debug_view_;
  /** Debug data sent to GPU. */
  ShadowDebugDataBuf debug_data_;
  /** Debug texture to check page status. */
  Texture debug_page_tx_ = {"debug_page_tx_"};

  /** \} */

  /** Scene immutable parameter. */
  int shadow_page_size_ = 256;
  bool soft_shadows_enabled_ = false;
  /** Default to invalid texture type. */
  eGPUTextureFormat shadow_format_ = GPU_RGBA8;

  /** Used for caster & receiver AABB lists. */
  GPUVertFormat aabb_format_;
  /** Global bounds that contains all shadow casters. Used by directionnal for best fit. */
  AABB casters_bounds_;

 public:
  ShadowModule(Instance &inst) : punctuals(*this), directionals(*this), inst_(inst)
  {
    GPU_vertformat_clear(&aabb_format_);
    /* Must match the C++ AABB layout. */
    BLI_assert(sizeof(AABB) == sizeof(float) * 8);
    GPU_vertformat_attr_add(&aabb_format_, "aabb_min", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
    GPU_vertformat_attr_add(&aabb_format_, "aabb_max", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
  }
  ~ShadowModule(){};

  void init(void);

  void begin_sync(void);
  void sync_object(Object *ob,
                   const ObjectHandle &handle,
                   bool is_shadow_caster,
                   bool is_alpha_blend);
  void end_sync(void);

  void set_view(const DRWView *view, GPUTexture *depth_tx);

  void debug_end_sync(void);
  void debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz);

  GPUTexture *atlas_tx_get(void)
  {
    return atlas_tx_;
  }
  GPUTexture *tilemap_tx_get(void)
  {
    return tilemap_allocator.tilemap_tx;
  }

 private:
  void remove_unused(void);
  void debug_page_map_call(DRWPass *pass);
};

/** \} */

/* -------------------------------------------------------------------- */
/** \name ShadowPass
 *
 * A simple depth pass to which all shadow casters subscribe.
 * \{ */

class ShadowPass {
 private:
  Instance &inst_;

  DRWPass *surface_ps_ = nullptr;

 public:
  ShadowPass(Instance &inst) : inst_(inst){};

  void sync(void);

  DRWShadingGroup *material_add(::Material *blender_mat, GPUMaterial *gpumat);

  void render(void);
};

/** \} */

}  // namespace blender::eevee