/*
 * Copyright 2011-2016 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __KERNEL_SPLIT_DATA_TYPES_H__
#define __KERNEL_SPLIT_DATA_TYPES_H__

CCL_NAMESPACE_BEGIN

/* parameters used by the split kernels, we use a single struct to avoid passing these to each
 * kernel */

typedef struct SplitParams {
  WorkTile tile;
  uint total_work_size;

  ccl_global unsigned int *work_pools;

  ccl_global int *queue_index;
  int queue_size;
  ccl_global char *use_queues_flag;

  /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
  int dummy_sd_flag;
} SplitParams;

/* Global memory variables [porting]; These memory is used for
 * co-operation between different kernels; Data written by one
 * kernel will be available to another kernel via this global
 * memory.
 */

/* SPLIT_DATA_ENTRY(type, name, num) */

#ifdef __BRANCHED_PATH__

typedef ccl_global struct SplitBranchedState {
  /* various state that must be kept and restored after an indirect loop */
  PathState path_state;
  float3 throughput;
  Ray ray;

  Intersection isect;

  char ray_state;

  /* indirect loop state */
  int next_closure;
  int next_sample;

#  ifdef __SUBSURFACE__
  int ss_next_closure;
  int ss_next_sample;
  int next_hit;
  int num_hits;

  uint lcg_state;
  LocalIntersection ss_isect;
#  endif /* __SUBSURFACE__ */

  int shared_sample_count; /* number of branched samples shared with other threads */
  int original_ray;        /* index of original ray when sharing branched samples */
  bool waiting_on_shared_samples;
} SplitBranchedState;

#  define SPLIT_DATA_BRANCHED_ENTRIES \
    SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \
    SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
#else
#  define SPLIT_DATA_BRANCHED_ENTRIES
#endif /* __BRANCHED_PATH__ */

#ifdef __SUBSURFACE__
#  define SPLIT_DATA_SUBSURFACE_ENTRIES \
    SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
#else
#  define SPLIT_DATA_SUBSURFACE_ENTRIES
#endif /* __SUBSURFACE__ */

#ifdef __VOLUME__
#  define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
#else
#  define SPLIT_DATA_VOLUME_ENTRIES
#endif /* __VOLUME__ */

#define SPLIT_DATA_ENTRIES \
  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
  SPLIT_DATA_ENTRY( \
      ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \
  SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
  SPLIT_DATA_SUBSURFACE_ENTRIES \
  SPLIT_DATA_VOLUME_ENTRIES \
  SPLIT_DATA_BRANCHED_ENTRIES \
  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)

/* Entries to be copied to inactive rays when sharing branched samples
 * (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
  SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
  SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
  SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
  SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
  SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
  SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
  SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
  SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
  SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
  SPLIT_DATA_SUBSURFACE_ENTRIES \
  SPLIT_DATA_VOLUME_ENTRIES \
  SPLIT_DATA_BRANCHED_ENTRIES \
  SPLIT_DATA_ENTRY(ShaderData, _sd, 0)

/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
  SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY

  /* this is actually in a separate buffer from the rest of the split state data (so it can be read
   * back from the host easily) but is still used the same as the other data so we have it here in
   * this struct as well
   */
  ccl_global char *ray_state;
} SplitData;

#ifndef __KERNEL_CUDA__
#  define kernel_split_state (kg->split_data)
#  define kernel_split_params (kg->split_param_data)
#else
__device__ SplitData __split_data;
#  define kernel_split_state (__split_data)
__device__ SplitParams __split_param_data;
#  define kernel_split_params (__split_param_data)
#endif /* __KERNEL_CUDA__ */

#define kernel_split_sd(sd, ray_index) \
  ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \
                  (sizeof(ShaderData) + \
                   sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \
                      (ray_index)))

/* Local storage for queue_enqueue kernel. */
typedef struct QueueEnqueueLocals {
  uint queue_atomics[2];
} QueueEnqueueLocals;

/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
typedef struct BackgroundAOLocals {
  uint queue_atomics_bg;
  uint queue_atomics_ao;
} BackgroundAOLocals;

typedef struct ShaderSortLocals {
  uint local_value[SHADER_SORT_BLOCK_SIZE];
  ushort local_index[SHADER_SORT_BLOCK_SIZE];
} ShaderSortLocals;

CCL_NAMESPACE_END

#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */