/* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

#pragma once

#include "kernel/camera/camera.h"

#include "kernel/film/adaptive_sampling.h"
#include "kernel/film/light_passes.h"

#include "kernel/integrator/path_state.h"

#include "kernel/sample/pattern.h"

#include "kernel/geom/geom.h"

CCL_NAMESPACE_BEGIN

/* This helps with AA but it's not the real solution as it does not AA the geometry
 * but it's better than nothing, thus committed. */
ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
{
  /* use mirror repeat (like opengl texture) so that if the barycentric
   * coordinate goes past the end of the triangle it is not always clamped
   * to the same value, gives ugly patterns */
  u /= max;
  float fu = floorf(u);
  u = u - fu;

  return ((((int)fu) & 1) ? 1.0f - u : u) * max;
}

/* Offset towards center of triangle to avoid ray-tracing precision issues. */
ccl_device const float2 bake_offset_towards_center(KernelGlobals kg,
                                                   const int prim,
                                                   const float u,
                                                   const float v)
{
  float3 tri_verts[3];
  triangle_vertices(kg, prim, tri_verts);

  /* Empirically determined values, by no means perfect. */
  const float position_offset = 1e-4f;
  const float uv_offset = 1e-5f;

  /* Offset position towards center, amount relative to absolute size of position coordinates. */
  const float3 P = u * tri_verts[0] + v * tri_verts[1] + (1.0f - u - v) * tri_verts[2];
  const float3 center = (tri_verts[0] + tri_verts[1] + tri_verts[2]) / 3.0f;
  const float3 to_center = center - P;

  const float3 offset_P = P + normalize(to_center) *
                                  min(len(to_center),
                                      max(reduce_max(fabs(P)), 1.0f) * position_offset);

  /* Compute barycentric coordinates at new position. */
  const float3 v1 = tri_verts[1] - tri_verts[0];
  const float3 v2 = tri_verts[2] - tri_verts[0];
  const float3 vP = offset_P - tri_verts[0];

  const float d11 = dot(v1, v1);
  const float d12 = dot(v1, v2);
  const float d22 = dot(v2, v2);
  const float dP1 = dot(vP, v1);
  const float dP2 = dot(vP, v2);

  const float denom = d11 * d22 - d12 * d12;
  if (denom == 0.0f) {
    return make_float2(0.0f, 0.0f);
  }

  const float offset_v = clamp((d22 * dP1 - d12 * dP2) / denom, uv_offset, 1.0f - uv_offset);
  const float offset_w = clamp((d11 * dP2 - d12 * dP1) / denom, uv_offset, 1.0f - uv_offset);
  const float offset_u = clamp(1.0f - offset_v - offset_w, uv_offset, 1.0f - uv_offset);

  return make_float2(offset_u, offset_v);
}

/* Return false to indicate that this pixel is finished.
 * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
 * that the pixel did converge. */
ccl_device bool integrator_init_from_bake(KernelGlobals kg,
                                          IntegratorState state,
                                          ccl_global const KernelWorkTile *ccl_restrict tile,
                                          ccl_global float *render_buffer,
                                          const int x,
                                          const int y,
                                          const int scheduled_sample)
{
  PROFILING_INIT(kg, PROFILING_RAY_SETUP);

  /* Initialize path state to give basic buffer access and allow early outputs. */
  path_state_init(state, tile, x, y);

  /* Check whether the pixel has converged and should not be sampled anymore. */
  if (!film_need_sample_pixel(kg, state, render_buffer)) {
    return false;
  }

  /* Always count the sample, even if the camera sample will reject the ray. */
  const int sample = film_write_sample(
      kg, state, render_buffer, scheduled_sample, tile->sample_offset);

  /* Setup render buffers. */
  const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
  const int pass_stride = kernel_data.film.pass_stride;
  ccl_global float *buffer = render_buffer + (uint64_t)index * pass_stride;

  ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
  ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;

  const int seed = __float_as_uint(primitive[0]);
  int prim = __float_as_uint(primitive[1]);
  if (prim == -1) {
    /* Accumulate transparency for empty pixels. */
    film_write_transparent(kg, state, 0, 1.0f, buffer);
    return true;
  }

  prim += kernel_data.bake.tri_offset;

  /* Random number generator. */
  const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;

  const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
                                             path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);

  /* Initialize path state for path integration. */
  path_state_init_integrator(kg, state, sample, rng_hash);

  /* Barycentric UV. */
  float u = primitive[2];
  float v = primitive[3];

  float dudx = differential[0];
  float dudy = differential[1];
  float dvdx = differential[2];
  float dvdy = differential[3];

  /* Exactly at vertex? Nudge inwards to avoid self-intersection. */
  if ((u == 0.0f || u == 1.0f) && (v == 0.0f || v == 1.0f)) {
    const float2 uv = bake_offset_towards_center(kg, prim, u, v);
    u = uv.x;
    v = uv.y;
  }

  /* Sub-pixel offset. */
  if (sample > 0) {
    u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f),
                                 1.0f);
    v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f),
                                 1.0f - u);
  }

  /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */
  const float tmp = u;
  u = v;
  v = 1.0f - tmp - v;

  const float tmpdx = dudx;
  const float tmpdy = dudy;
  dudx = dvdx;
  dudy = dvdy;
  dvdx = -tmpdx - dvdx;
  dvdy = -tmpdy - dvdy;

  /* Position and normal on triangle. */
  const int object = kernel_data.bake.object_index;
  float3 P, Ng;
  int shader;
  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);

  const int object_flag = kernel_data_fetch(object_flag, object);
  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
    P = transform_point_auto(&tfm, P);
  }

  if (kernel_data.film.pass_background != PASS_UNUSED) {
    /* Environment baking. */

    /* Setup and write ray. */
    Ray ray ccl_optional_struct_init;
    ray.P = zero_float3();
    ray.D = normalize(P);
    ray.tmin = 0.0f;
    ray.tmax = FLT_MAX;
    ray.time = 0.5f;
    ray.dP = differential_zero_compact();
    ray.dD = differential_zero_compact();
    integrator_state_write_ray(kg, state, &ray);

    /* Setup next kernel to execute. */
    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
  }
  else {
    /* Surface baking. */
    float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;

    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
      Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
      N = normalize(transform_direction_transposed(&itfm, N));
      Ng = normalize(transform_direction_transposed(&itfm, Ng));
    }

    const int shader_index = shader & SHADER_MASK;
    const int shader_flags = kernel_data_fetch(shaders, shader_index).flags;

    /* Fast path for position and normal passes not affected by shaders. */
    if (kernel_data.film.pass_position != PASS_UNUSED) {
      film_write_pass_float3(buffer + kernel_data.film.pass_position, P);
      return true;
    }
    else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) {
      film_write_pass_float3(buffer + kernel_data.film.pass_normal, N);
      return true;
    }

    /* Setup ray. */
    Ray ray ccl_optional_struct_init;

    if (kernel_data.bake.use_camera) {
      float3 D = camera_direction_from_point(kg, P);

      const float DN = dot(D, N);

      /* Nudge camera direction, so that the faces facing away from the camera still have
       * somewhat usable shading. (Otherwise, glossy faces would be simply black.)
       *
       * The surface normal offset affects smooth surfaces. Lower values will make
       * smooth surfaces more faceted, but higher values may show up from the camera
       * at grazing angles.
       *
       * This value can actually be pretty high before it's noticeably wrong. */
      const float surface_normal_offset = 0.2f;

      /* Keep the ray direction at least `surface_normal_offset` "above" the smooth normal. */
      if (DN <= surface_normal_offset) {
        D -= N * (DN - surface_normal_offset);
        D = normalize(D);
      }

      /* On the backside, just lerp towards the surface normal for the ray direction,
       * as DN goes from 0.0 to -1.0. */
      if (DN <= 0.0f) {
        D = normalize(mix(D, N, -DN));
      }

      /* We don't want to bake the back face, so make sure the ray direction never
       * goes behind the geometry (flat) normal. This is a fail-safe, and should rarely happen. */
      const float true_normal_epsilon = 0.00001f;

      if (dot(D, Ng) <= true_normal_epsilon) {
        D -= Ng * (dot(D, Ng) - true_normal_epsilon);
        D = normalize(D);
      }

      ray.P = P + D;
      ray.D = -D;
    }
    else {
      ray.P = P + N;
      ray.D = -N;
    }

    ray.tmin = 0.0f;
    ray.tmax = FLT_MAX;
    ray.time = 0.5f;

    /* Setup differentials. */
    float3 dPdu, dPdv;
    triangle_dPdudv(kg, prim, &dPdu, &dPdv);
    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
      dPdu = transform_direction(&tfm, dPdu);
      dPdv = transform_direction(&tfm, dPdv);
    }

    differential3 dP;
    dP.dx = dPdu * dudx + dPdv * dvdx;
    dP.dy = dPdu * dudy + dPdv * dvdy;
    ray.dP = differential_make_compact(dP);
    ray.dD = differential_zero_compact();

    /* Write ray. */
    integrator_state_write_ray(kg, state, &ray);

    /* Setup and write intersection. */
    Intersection isect ccl_optional_struct_init;
    isect.object = kernel_data.bake.object_index;
    isect.prim = prim;
    isect.u = u;
    isect.v = v;
    isect.t = 1.0f;
    isect.type = PRIMITIVE_TRIANGLE;
    integrator_state_write_isect(kg, state, &isect);

    /* Setup next kernel to execute. */
    const bool use_caustics = kernel_data.integrator.use_caustics &&
                              (object_flag & SD_OBJECT_CAUSTICS);
    const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);

    if (use_caustics) {
      integrator_path_init_sorted(
          kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
    }
    else if (use_raytrace_kernel) {
      integrator_path_init_sorted(
          kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
    }
    else {
      integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
    }
  }

  return true;
}

CCL_NAMESPACE_END