Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_types.h')
-rw-r--r--intern/cycles/kernel/kernel_types.h883
1 files changed, 605 insertions, 278 deletions
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 8250eaf6073..72fbf7be557 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -17,9 +17,9 @@
#ifndef __KERNEL_TYPES_H__
#define __KERNEL_TYPES_H__
-#include "kernel_math.h"
-#include "svm/svm_types.h"
-#include "util_static_assert.h"
+#include "kernel/kernel_math.h"
+#include "kernel/svm/svm_types.h"
+#include "util/util_static_assert.h"
#ifndef __KERNEL_GPU__
# define __KERNEL_CPU__
@@ -34,18 +34,18 @@
CCL_NAMESPACE_BEGIN
-/* constants */
-#define OBJECT_SIZE 12
-#define OBJECT_VECTOR_SIZE 6
-#define LIGHT_SIZE 11
-#define FILTER_TABLE_SIZE 1024
-#define RAMP_TABLE_SIZE 256
-#define SHUTTER_TABLE_SIZE 256
-#define PARTICLE_SIZE 5
-#define SHADER_SIZE 5
+/* Constants */
+#define OBJECT_MOTION_PASS_SIZE 2
+#define FILTER_TABLE_SIZE 1024
+#define RAMP_TABLE_SIZE 256
+#define SHUTTER_TABLE_SIZE 256
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
+#define BSSRDF_MAX_BOUNCES 256
+#define LOCAL_MAX_HITS 4
+
+#define VOLUME_BOUNDS_MAX 1024
#define BECKMANN_TABLE_SIZE 256
@@ -56,7 +56,28 @@ CCL_NAMESPACE_BEGIN
#define VOLUME_STACK_SIZE 16
-/* device capabilities */
+/* Split kernel constants */
+#define WORK_POOL_SIZE_GPU 64
+#define WORK_POOL_SIZE_CPU 1
+#ifdef __KERNEL_GPU__
+# define WORK_POOL_SIZE WORK_POOL_SIZE_GPU
+#else
+# define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
+#endif
+
+
+#define SHADER_SORT_BLOCK_SIZE 2048
+
+#ifdef __KERNEL_OPENCL__
+# define SHADER_SORT_LOCAL_SIZE 64
+#elif defined(__KERNEL_CUDA__)
+# define SHADER_SORT_LOCAL_SIZE 32
+#else
+# define SHADER_SORT_LOCAL_SIZE 1
+#endif
+
+
+/* Device capabilities */
#ifdef __KERNEL_CPU__
# ifdef __KERNEL_SSE2__
# define __QBVH__
@@ -67,24 +88,28 @@ CCL_NAMESPACE_BEGIN
# ifdef WITH_OSL
# define __OSL__
# endif
+# define __PRINCIPLED__
# define __SUBSURFACE__
# define __CMJ__
# define __VOLUME__
-# define __VOLUME_DECOUPLED__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
+# define __VOLUME_DECOUPLED__
# define __VOLUME_RECORD_ALL__
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# define __BRANCHED_PATH__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SUBSURFACE__
-# define __CMJ__
+# define __PRINCIPLED__
# define __SHADOW_RECORD_ALL__
+# define __CMJ__
+# ifndef __SPLIT_KERNEL__
+# define __BRANCHED_PATH__
+# endif
#endif /* __KERNEL_CUDA__ */
#ifdef __KERNEL_OPENCL__
@@ -94,41 +119,50 @@ CCL_NAMESPACE_BEGIN
# ifdef __KERNEL_OPENCL_NVIDIA__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# ifdef __KERNEL_EXPERIMENTAL__
-# define __CMJ__
-# endif
+# define __SUBSURFACE__
+# define __PRINCIPLED__
+# define __VOLUME__
+# define __VOLUME_SCATTER__
+# define __SHADOW_RECORD_ALL__
+# define __CMJ__
+# define __BRANCHED_PATH__
# endif /* __KERNEL_OPENCL_NVIDIA__ */
# ifdef __KERNEL_OPENCL_APPLE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
+# define __PRINCIPLED__
+# define __CMJ__
/* TODO(sergey): Currently experimental section is ignored here,
* this is because megakernel in device_opencl does not support
* custom cflags depending on the scene features.
*/
-# ifdef __KERNEL_EXPERIMENTAL__
-# define __CMJ__
-# endif
-# endif /* __KERNEL_OPENCL_NVIDIA__ */
+# endif /* __KERNEL_OPENCL_APPLE__ */
# ifdef __KERNEL_OPENCL_AMD__
# define __CL_USE_NATIVE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
+# define __SUBSURFACE__
+# define __PRINCIPLED__
+# define __VOLUME__
+# define __VOLUME_SCATTER__
+# define __SHADOW_RECORD_ALL__
+# define __CMJ__
+# define __BRANCHED_PATH__
# endif /* __KERNEL_OPENCL_AMD__ */
# ifdef __KERNEL_OPENCL_INTEL_CPU__
# define __CL_USE_NATIVE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# ifdef __KERNEL_EXPERIMENTAL__
-# define __CMJ__
-# endif
+# define __PRINCIPLED__
+# define __CMJ__
# endif /* __KERNEL_OPENCL_INTEL_CPU__ */
#endif /* __KERNEL_OPENCL__ */
-/* kernel features */
+/* Kernel features */
#define __SOBOL__
#define __INSTANCING__
#define __DPDU__
@@ -141,6 +175,9 @@ CCL_NAMESPACE_BEGIN
#define __INTERSECTION_REFINE__
#define __CLAMP_SAMPLE__
#define __PATCH_EVAL__
+#define __SHADOW_TRICKS__
+#define __DENOISING_FEATURES__
+#define __SHADER_RAYTRACE__
#ifdef __KERNEL_SHADING__
# define __SVM__
@@ -163,10 +200,6 @@ CCL_NAMESPACE_BEGIN
# define __BAKING__
#endif
-#ifdef WITH_CYCLES_DEBUG
-# define __KERNEL_DEBUG__
-#endif
-
/* Scene-based selective features compilation. */
#ifdef __NO_CAMERA_MOTION__
# undef __CAMERA_MOTION__
@@ -196,10 +229,27 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_TRANSPARENT__
# undef __TRANSPARENT_SHADOWS__
#endif
+#ifdef __NO_SHADOW_TRICKS__
+# undef __SHADOW_TRICKS__
+#endif
+#ifdef __NO_PRINCIPLED__
+# undef __PRINCIPLED__
+#endif
+#ifdef __NO_DENOISING__
+# undef __DENOISING_FEATURES__
+#endif
+#ifdef __NO_SHADER_RAYTRACE__
+# undef __SHADER_RAYTRACE__
+#endif
-/* Random Numbers */
+/* Features that enable others */
+#ifdef WITH_CYCLES_DEBUG
+# define __KERNEL_DEBUG__
+#endif
-typedef uint RNG;
+#if defined(__SUBSURFACE__) || defined(__SHADER_RAYTRACE__)
+# define __BVH_LOCAL__
+#endif
/* Shader Evaluation */
@@ -213,6 +263,7 @@ typedef enum ShaderEvalType {
/* data passes */
SHADER_EVAL_NORMAL,
SHADER_EVAL_UV,
+ SHADER_EVAL_ROUGHNESS,
SHADER_EVAL_DIFFUSE_COLOR,
SHADER_EVAL_GLOSSY_COLOR,
SHADER_EVAL_TRANSMISSION_COLOR,
@@ -240,31 +291,24 @@ enum PathTraceDimension {
PRNG_FILTER_V = 1,
PRNG_LENS_U = 2,
PRNG_LENS_V = 3,
-#ifdef __CAMERA_MOTION__
PRNG_TIME = 4,
PRNG_UNUSED_0 = 5,
PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
-#endif
- PRNG_BASE_NUM = 8,
+ PRNG_BASE_NUM = 10,
PRNG_BSDF_U = 0,
PRNG_BSDF_V = 1,
- PRNG_BSDF = 2,
- PRNG_LIGHT = 3,
- PRNG_LIGHT_U = 4,
- PRNG_LIGHT_V = 5,
- PRNG_LIGHT_TERMINATE = 6,
- PRNG_TERMINATE = 7,
-
-#ifdef __VOLUME__
- PRNG_PHASE_U = 8,
- PRNG_PHASE_V = 9,
- PRNG_PHASE = 10,
- PRNG_SCATTER_DISTANCE = 11,
-#endif
-
- PRNG_BOUNCE_NUM = 12,
+ PRNG_LIGHT_U = 2,
+ PRNG_LIGHT_V = 3,
+ PRNG_LIGHT_TERMINATE = 4,
+ PRNG_TERMINATE = 5,
+ PRNG_PHASE_CHANNEL = 6,
+ PRNG_SCATTER_DISTANCE = 7,
+ PRNG_BOUNCE_NUM = 8,
+
+ PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
+ PRNG_BEVEL_V = 7,
};
enum SamplingPattern {
@@ -277,29 +321,56 @@ enum SamplingPattern {
/* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
enum PathRayFlag {
- PATH_RAY_CAMERA = 1,
- PATH_RAY_REFLECT = 2,
- PATH_RAY_TRANSMIT = 4,
- PATH_RAY_DIFFUSE = 8,
- PATH_RAY_GLOSSY = 16,
- PATH_RAY_SINGULAR = 32,
- PATH_RAY_TRANSPARENT = 64,
-
- PATH_RAY_SHADOW_OPAQUE = 128,
- PATH_RAY_SHADOW_TRANSPARENT = 256,
- PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
-
- PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */
- PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */
+ PATH_RAY_CAMERA = (1 << 0),
+ PATH_RAY_REFLECT = (1 << 1),
+ PATH_RAY_TRANSMIT = (1 << 2),
+ PATH_RAY_DIFFUSE = (1 << 3),
+ PATH_RAY_GLOSSY = (1 << 4),
+ PATH_RAY_SINGULAR = (1 << 5),
+ PATH_RAY_TRANSPARENT = (1 << 6),
+
+ PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
+ PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
+ PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_OPAQUE_CATCHER),
+ PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9),
+ PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10),
+ PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
+ PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
+ PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
+
+ PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */
+ PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */
/* Special flag to tag unaligned BVH nodes. */
- PATH_RAY_NODE_UNALIGNED = 2048,
-
- PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024|2048),
-
- PATH_RAY_MIS_SKIP = 4096,
- PATH_RAY_DIFFUSE_ANCESTOR = 8192,
- PATH_RAY_SINGLE_PASS_DONE = 16384,
+ PATH_RAY_NODE_UNALIGNED = (1 << 13),
+
+ PATH_RAY_ALL_VISIBILITY = ((1 << 14)-1),
+
+ /* Don't apply multiple importance sampling weights to emission from
+ * lamp or surface hits, because they were not direct light sampled. */
+ PATH_RAY_MIS_SKIP = (1 << 14),
+ /* Diffuse bounce earlier in the path, skip SSS to improve performance
+ * and avoid branching twice with disk sampling SSS. */
+ PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15),
+ /* Single pass has been written. */
+ PATH_RAY_SINGLE_PASS_DONE = (1 << 16),
+ /* Ray is behind a shadow catcher .*/
+ PATH_RAY_SHADOW_CATCHER = (1 << 17),
+ /* Store shadow data for shadow catcher or denoising. */
+ PATH_RAY_STORE_SHADOW_INFO = (1 << 18),
+ /* Zero background alpha, for camera or transparent glass rays. */
+ PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19),
+ /* Terminate ray immediately at next bounce. */
+ PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20),
+ /* Ray is to be terminated, but continue with transparent bounces and
+ * emission as long as we encounter them. This is required to make the
+ * MIS between direct and indirect light rays match, as shadow rays go
+ * through transparent surfaces to reach emisison too. */
+ PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
+ /* Ray is to be terminated. */
+ PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE|PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
+ /* Path and shader is being evaluated for direct lighting emission. */
+ PATH_RAY_EMISSION = (1 << 22)
};
/* Closure Label */
@@ -313,49 +384,82 @@ typedef enum ClosureLabel {
LABEL_SINGULAR = 16,
LABEL_TRANSPARENT = 32,
LABEL_VOLUME_SCATTER = 64,
+ LABEL_TRANSMIT_TRANSPARENT = 128,
} ClosureLabel;
/* Render Passes */
+#define PASS_NAME_JOIN(a, b) a ## _ ## b
+#define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
+
+#define PASSMASK_COMPONENT(comp) (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | \
+ PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
+ PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
+
typedef enum PassType {
PASS_NONE = 0,
- PASS_COMBINED = (1 << 0),
- PASS_DEPTH = (1 << 1),
- PASS_NORMAL = (1 << 2),
- PASS_UV = (1 << 3),
- PASS_OBJECT_ID = (1 << 4),
- PASS_MATERIAL_ID = (1 << 5),
- PASS_DIFFUSE_COLOR = (1 << 6),
- PASS_GLOSSY_COLOR = (1 << 7),
- PASS_TRANSMISSION_COLOR = (1 << 8),
- PASS_DIFFUSE_INDIRECT = (1 << 9),
- PASS_GLOSSY_INDIRECT = (1 << 10),
- PASS_TRANSMISSION_INDIRECT = (1 << 11),
- PASS_DIFFUSE_DIRECT = (1 << 12),
- PASS_GLOSSY_DIRECT = (1 << 13),
- PASS_TRANSMISSION_DIRECT = (1 << 14),
- PASS_EMISSION = (1 << 15),
- PASS_BACKGROUND = (1 << 16),
- PASS_AO = (1 << 17),
- PASS_SHADOW = (1 << 18),
- PASS_MOTION = (1 << 19),
- PASS_MOTION_WEIGHT = (1 << 20),
- PASS_MIST = (1 << 21),
- PASS_SUBSURFACE_DIRECT = (1 << 22),
- PASS_SUBSURFACE_INDIRECT = (1 << 23),
- PASS_SUBSURFACE_COLOR = (1 << 24),
- PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
+
+ /* Main passes */
+ PASS_COMBINED = 1,
+ PASS_DEPTH,
+ PASS_NORMAL,
+ PASS_UV,
+ PASS_OBJECT_ID,
+ PASS_MATERIAL_ID,
+ PASS_MOTION,
+ PASS_MOTION_WEIGHT,
#ifdef __KERNEL_DEBUG__
- PASS_BVH_TRAVERSED_NODES = (1 << 26),
- PASS_BVH_TRAVERSED_INSTANCES = (1 << 27),
- PASS_BVH_INTERSECTIONS = (1 << 28),
- PASS_RAY_BOUNCES = (1 << 29),
+ PASS_BVH_TRAVERSED_NODES,
+ PASS_BVH_TRAVERSED_INSTANCES,
+ PASS_BVH_INTERSECTIONS,
+ PASS_RAY_BOUNCES,
#endif
+ PASS_RENDER_TIME,
+ PASS_CATEGORY_MAIN_END = 31,
+
+ PASS_MIST = 32,
+ PASS_EMISSION,
+ PASS_BACKGROUND,
+ PASS_AO,
+ PASS_SHADOW,
+ PASS_LIGHT, /* no real pass, used to force use_light_pass */
+ PASS_DIFFUSE_DIRECT,
+ PASS_DIFFUSE_INDIRECT,
+ PASS_DIFFUSE_COLOR,
+ PASS_GLOSSY_DIRECT,
+ PASS_GLOSSY_INDIRECT,
+ PASS_GLOSSY_COLOR,
+ PASS_TRANSMISSION_DIRECT,
+ PASS_TRANSMISSION_INDIRECT,
+ PASS_TRANSMISSION_COLOR,
+ PASS_SUBSURFACE_DIRECT,
+ PASS_SUBSURFACE_INDIRECT,
+ PASS_SUBSURFACE_COLOR,
+ PASS_VOLUME_DIRECT,
+ PASS_VOLUME_INDIRECT,
+ /* No Scatter color since it's tricky to define what it would even mean. */
+ PASS_CATEGORY_LIGHT_END = 63,
} PassType;
-#define PASS_ALL (~0)
-
-typedef enum BakePassFilter {
+#define PASS_ANY (~0)
+
+typedef enum DenoisingPassOffsets {
+ DENOISING_PASS_NORMAL = 0,
+ DENOISING_PASS_NORMAL_VAR = 3,
+ DENOISING_PASS_ALBEDO = 6,
+ DENOISING_PASS_ALBEDO_VAR = 9,
+ DENOISING_PASS_DEPTH = 12,
+ DENOISING_PASS_DEPTH_VAR = 13,
+ DENOISING_PASS_SHADOW_A = 14,
+ DENOISING_PASS_SHADOW_B = 17,
+ DENOISING_PASS_COLOR = 20,
+ DENOISING_PASS_COLOR_VAR = 23,
+
+ DENOISING_PASS_SIZE_BASE = 26,
+ DENOISING_PASS_SIZE_CLEAN = 3,
+} DenoisingPassOffsets;
+
+typedef enum eBakePassFilter {
BAKE_FILTER_NONE = 0,
BAKE_FILTER_DIRECT = (1 << 0),
BAKE_FILTER_INDIRECT = (1 << 1),
@@ -366,7 +470,7 @@ typedef enum BakePassFilter {
BAKE_FILTER_SUBSURFACE = (1 << 6),
BAKE_FILTER_EMISSION = (1 << 7),
BAKE_FILTER_AO = (1 << 8),
-} BakePassFilter;
+} eBakePassFilter;
typedef enum BakePassFilterCombos {
BAKE_FILTER_COMBINED = (
@@ -388,25 +492,60 @@ typedef enum BakePassFilterCombos {
BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE),
} BakePassFilterCombos;
+typedef enum DenoiseFlag {
+ DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0),
+ DENOISING_CLEAN_DIFFUSE_IND = (1 << 1),
+ DENOISING_CLEAN_GLOSSY_DIR = (1 << 2),
+ DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
+ DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
+ DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
+ DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6),
+ DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7),
+ DENOISING_CLEAN_ALL_PASSES = (1 << 8)-1,
+} DenoiseFlag;
+
+#ifdef __KERNEL_DEBUG__
+/* NOTE: This is a runtime-only struct, alignment is not
+ * really important here.
+ */
+typedef struct DebugData {
+ int num_bvh_traversed_nodes;
+ int num_bvh_traversed_instances;
+ int num_bvh_intersections;
+ int num_ray_bounces;
+} DebugData;
+#endif
+
+typedef ccl_addr_space struct PathRadianceState {
+#ifdef __PASSES__
+ float3 diffuse;
+ float3 glossy;
+ float3 transmission;
+ float3 subsurface;
+ float3 scatter;
+
+ float3 direct;
+#endif
+} PathRadianceState;
+
typedef ccl_addr_space struct PathRadiance {
#ifdef __PASSES__
int use_light_pass;
#endif
+ float transparent;
float3 emission;
#ifdef __PASSES__
float3 background;
float3 ao;
float3 indirect;
- float3 direct_throughput;
float3 direct_emission;
float3 color_diffuse;
float3 color_glossy;
float3 color_transmission;
float3 color_subsurface;
- float3 color_scatter;
float3 direct_diffuse;
float3 direct_glossy;
@@ -420,15 +559,46 @@ typedef ccl_addr_space struct PathRadiance {
float3 indirect_subsurface;
float3 indirect_scatter;
- float3 path_diffuse;
- float3 path_glossy;
- float3 path_transmission;
- float3 path_subsurface;
- float3 path_scatter;
-
float4 shadow;
float mist;
#endif
+
+ struct PathRadianceState state;
+
+#ifdef __SHADOW_TRICKS__
+ /* Total light reachable across the path, ignoring shadow blocked queries. */
+ float3 path_total;
+ /* Total light reachable across the path with shadow blocked queries
+ * applied here.
+ *
+ * Dividing this figure by path_total will give estimate of shadow pass.
+ */
+ float3 path_total_shaded;
+
+ /* Color of the background on which shadow is alpha-overed. */
+ float3 shadow_background_color;
+
+ /* Path radiance sum and throughput at the moment when ray hits shadow
+ * catcher object.
+ */
+ float shadow_throughput;
+
+ /* Accumulated transparency along the path after shadow catcher bounce. */
+ float shadow_transparency;
+
+ /* Indicate if any shadow catcher data is set. */
+ int has_shadow_catcher;
+#endif
+
+#ifdef __DENOISING_FEATURES__
+ float3 denoising_normal;
+ float3 denoising_albedo;
+ float denoising_depth;
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+ DebugData debug_data;
+#endif /* __KERNEL_DEBUG__ */
} PathRadiance;
typedef struct BsdfEval {
@@ -444,6 +614,9 @@ typedef struct BsdfEval {
float3 subsurface;
float3 scatter;
#endif
+#ifdef __SHADOW_TRICKS__
+ float3 sum_no_mis;
+#endif
} BsdfEval;
/* Shader Flag */
@@ -537,7 +710,7 @@ typedef struct Ray {
/* Intersection */
-typedef ccl_addr_space struct Intersection {
+typedef struct Intersection {
float t, u, v;
int prim;
int object;
@@ -617,12 +790,14 @@ typedef enum AttributeStandard {
ATTR_STD_MOTION_VERTEX_NORMAL,
ATTR_STD_PARTICLE,
ATTR_STD_CURVE_INTERCEPT,
+ ATTR_STD_CURVE_RANDOM,
ATTR_STD_PTEX_FACE_ID,
ATTR_STD_PTEX_UV,
ATTR_STD_VOLUME_DENSITY,
ATTR_STD_VOLUME_COLOR,
ATTR_STD_VOLUME_FLAME,
ATTR_STD_VOLUME_HEAT,
+ ATTR_STD_VOLUME_TEMPERATURE,
ATTR_STD_VOLUME_VELOCITY,
ATTR_STD_POINTINESS,
ATTR_STD_NUM,
@@ -645,10 +820,14 @@ typedef struct AttributeDescriptor {
/* Closure data */
#ifdef __MULTI_CLOSURE__
-# ifndef __MAX_CLOSURE__
-# define MAX_CLOSURE 64
+# ifdef __SPLIT_KERNEL__
+# define MAX_CLOSURE 1
# else
-# define MAX_CLOSURE __MAX_CLOSURE__
+# ifndef __MAX_CLOSURE__
+# define MAX_CLOSURE 64
+# else
+# define MAX_CLOSURE __MAX_CLOSURE__
+# endif
# endif
#else
# define MAX_CLOSURE 1
@@ -668,28 +847,15 @@ typedef struct AttributeDescriptor {
#define SHADER_CLOSURE_BASE \
float3 weight; \
ClosureType type; \
- float sample_weight \
+ float sample_weight; \
+ float3 N
typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
SHADER_CLOSURE_BASE;
- float data[14]; /* pad to 80 bytes */
+ float data[10]; /* pad to 80 bytes */
} ShaderClosure;
-/* Shader Context
- *
- * For OSL we recycle a fixed number of contexts for speed */
-
-typedef enum ShaderContext {
- SHADER_CONTEXT_MAIN = 0,
- SHADER_CONTEXT_INDIRECT = 1,
- SHADER_CONTEXT_EMISSION = 2,
- SHADER_CONTEXT_SHADOW = 3,
- SHADER_CONTEXT_SSS = 4,
- SHADER_CONTEXT_VOLUME = 5,
- SHADER_CONTEXT_NUM = 6
-} ShaderContext;
-
/* Shader Data
*
* Main shader state at a point on the surface or in a volume. All coordinates
@@ -701,7 +867,7 @@ enum ShaderDataFlag {
/* Set when ray hits backside of surface. */
SD_BACKFACING = (1 << 0),
- /* Shader has emissive closure. */
+ /* Shader has non-zero emission. */
SD_EMISSION = (1 << 1),
/* Shader has BSDF closure. */
SD_BSDF = (1 << 2),
@@ -711,8 +877,8 @@ enum ShaderDataFlag {
SD_BSSRDF = (1 << 4),
/* Shader has holdout closure. */
SD_HOLDOUT = (1 << 5),
- /* Shader has volume absorption closure. */
- SD_ABSORPTION = (1 << 6),
+ /* Shader has non-zero volume extinction. */
+ SD_EXTINCTION = (1 << 6),
/* Shader has have volume phase (scatter) closure. */
SD_SCATTER = (1 << 7),
/* Shader has AO closure. */
@@ -727,7 +893,7 @@ enum ShaderDataFlag {
SD_BSDF_HAS_EVAL |
SD_BSSRDF |
SD_HOLDOUT |
- SD_ABSORPTION |
+ SD_EXTINCTION |
SD_SCATTER |
SD_AO |
SD_BSDF_NEEDS_LCG),
@@ -752,25 +918,28 @@ enum ShaderDataFlag {
SD_VOLUME_MIS = (1 << 23),
/* Use cubic interpolation for voxels. */
SD_VOLUME_CUBIC = (1 << 24),
- /* Has data connected to the displacement input. */
+ /* Has data connected to the displacement input or uses bump map. */
SD_HAS_BUMP = (1 << 25),
/* Has true displacement. */
SD_HAS_DISPLACEMENT = (1 << 26),
- /* Has constant emission (value stored in __shader_flag) */
+ /* Has constant emission (value stored in __shaders) */
SD_HAS_CONSTANT_EMISSION = (1 << 27),
+ /* Needs to access attributes */
+ SD_NEED_ATTRIBUTES = (1 << 28),
SD_SHADER_FLAGS = (SD_USE_MIS |
SD_HAS_TRANSPARENT_SHADOW |
SD_HAS_VOLUME |
SD_HAS_ONLY_VOLUME |
- SD_HETEROGENEOUS_VOLUME|
+ SD_HETEROGENEOUS_VOLUME |
SD_HAS_BSSRDF_BUMP |
SD_VOLUME_EQUIANGULAR |
SD_VOLUME_MIS |
SD_VOLUME_CUBIC |
SD_HAS_BUMP |
SD_HAS_DISPLACEMENT |
- SD_HAS_CONSTANT_EMISSION)
+ SD_HAS_CONSTANT_EMISSION |
+ SD_NEED_ATTRIBUTES)
};
/* Object flags. */
@@ -789,115 +958,113 @@ enum ShaderDataObjectFlag {
SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
/* Has position for motion vertices. */
SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
+ /* object is used to catch shadows */
+ SD_OBJECT_SHADOW_CATCHER = (1 << 7),
+ /* object has volume attributes */
+ SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK |
SD_OBJECT_MOTION |
SD_OBJECT_TRANSFORM_APPLIED |
SD_OBJECT_NEGATIVE_SCALE_APPLIED |
SD_OBJECT_HAS_VOLUME |
- SD_OBJECT_INTERSECTS_VOLUME)
+ SD_OBJECT_INTERSECTS_VOLUME |
+ SD_OBJECT_SHADOW_CATCHER |
+ SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
};
-#ifdef __SPLIT_KERNEL__
-# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
-# if !defined(__SPLIT_KERNEL_SOA__)
- /* ShaderData is stored as an Array-of-Structures */
-# define ccl_soa_member(type, name) type soa_##name
-# define ccl_fetch(s, t) (s[SD_THREAD].soa_##t)
-# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index])
-# else
- /* ShaderData is stored as an Structure-of-Arrays */
-# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
-# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
-# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
-# define ccl_soa_member(type, name) type soa_##name
-# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(soa_##t) + SD_FIELD_SIZE(soa_##t) * SD_THREAD - SD_OFFSETOF(soa_##t)))->soa_##t)
-# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index])
-# endif
-#else
-# define ccl_soa_member(type, name) type name
-# define ccl_fetch(s, t) (s->t)
-# define ccl_fetch_array(s, t, index) (&s->t[index])
-#endif
-
typedef ccl_addr_space struct ShaderData {
/* position */
- ccl_soa_member(float3, P);
+ float3 P;
/* smooth normal for shading */
- ccl_soa_member(float3, N);
+ float3 N;
/* true geometric normal */
- ccl_soa_member(float3, Ng);
+ float3 Ng;
/* view/incoming direction */
- ccl_soa_member(float3, I);
+ float3 I;
/* shader id */
- ccl_soa_member(int, shader);
+ int shader;
/* booleans describing shader, see ShaderDataFlag */
- ccl_soa_member(int, flag);
+ int flag;
/* booleans describing object of the shader, see ShaderDataObjectFlag */
- ccl_soa_member(int, object_flag);
+ int object_flag;
/* primitive id if there is one, ~0 otherwise */
- ccl_soa_member(int, prim);
+ int prim;
/* combined type and curve segment for hair */
- ccl_soa_member(int, type);
+ int type;
/* parametric coordinates
* - barycentric weights for triangles */
- ccl_soa_member(float, u);
- ccl_soa_member(float, v);
+ float u;
+ float v;
/* object id if there is one, ~0 otherwise */
- ccl_soa_member(int, object);
+ int object;
+ /* lamp id if there is one, ~0 otherwise */
+ int lamp;
/* motion blur sample time */
- ccl_soa_member(float, time);
+ float time;
/* length of the ray being shaded */
- ccl_soa_member(float, ray_length);
+ float ray_length;
#ifdef __RAY_DIFFERENTIALS__
/* differential of P. these are orthogonal to Ng, not N */
- ccl_soa_member(differential3, dP);
+ differential3 dP;
/* differential of I */
- ccl_soa_member(differential3, dI);
+ differential3 dI;
/* differential of u, v */
- ccl_soa_member(differential, du);
- ccl_soa_member(differential, dv);
+ differential du;
+ differential dv;
#endif
#ifdef __DPDU__
/* differential of P w.r.t. parametric coordinates. note that dPdu is
* not readily suitable as a tangent for shading on triangles. */
- ccl_soa_member(float3, dPdu);
- ccl_soa_member(float3, dPdv);
+ float3 dPdu;
+ float3 dPdv;
#endif
#ifdef __OBJECT_MOTION__
/* object <-> world space transformations, cached to avoid
* re-interpolating them constantly for shading */
- ccl_soa_member(Transform, ob_tfm);
- ccl_soa_member(Transform, ob_itfm);
+ Transform ob_tfm;
+ Transform ob_itfm;
#endif
- /* Closure data, we store a fixed array of closures */
- ccl_soa_member(struct ShaderClosure, closure[MAX_CLOSURE]);
- ccl_soa_member(int, num_closure);
- ccl_soa_member(int, num_closure_extra);
- ccl_soa_member(float, randb_closure);
- ccl_soa_member(float3, svm_closure_weight);
-
- /* LCG state for closures that require additional random numbers. */
- ccl_soa_member(uint, lcg_state);
-
/* ray start position, only set for backgrounds */
- ccl_soa_member(float3, ray_P);
- ccl_soa_member(differential3, ray_dP);
+ float3 ray_P;
+ differential3 ray_dP;
#ifdef __OSL__
struct KernelGlobals *osl_globals;
struct PathState *osl_path_state;
#endif
+
+ /* LCG state for closures that require additional random numbers. */
+ uint lcg_state;
+
+ /* Closure data, we store a fixed array of closures */
+ int num_closure;
+ int num_closure_left;
+ float randb_closure;
+ float3 svm_closure_weight;
+
+ /* Closure weights summed directly, so we can evaluate
+ * emission and shadow transparency with MAX_CLOSURE 0. */
+ float3 closure_emission_background;
+ float3 closure_transparent_extinction;
+
+ /* At the end so we can adjust size in ShaderDataTinyStorage. */
+ struct ShaderClosure closure[MAX_CLOSURE];
} ShaderData;
+typedef ccl_addr_space struct ShaderDataTinyStorage {
+ char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
+} ShaderDataTinyStorage;
+#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage)
+
/* Path State */
#ifdef __VOLUME__
@@ -912,9 +1079,11 @@ typedef struct PathState {
int flag;
/* random number generator state */
- int rng_offset; /* dimension offset */
- int sample; /* path sample number */
- int num_samples; /* total number of times this path will be sampled */
+ uint rng_hash; /* per pixel hash */
+ int rng_offset; /* dimension offset */
+ int sample; /* path sample number */
+ int num_samples; /* total number of times this path will be sampled */
+ float branch_factor; /* number of branches in indirect paths */
/* bounce counting */
int bounce;
@@ -923,6 +1092,10 @@ typedef struct PathState {
int transmission_bounce;
int transparent_bounce;
+#ifdef __DENOISING_FEATURES__
+ float denoising_feature_weight;
+#endif /* __DENOISING_FEATURES__ */
+
/* multiple importance sampling */
float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
float ray_pdf; /* last bounce pdf */
@@ -933,37 +1106,34 @@ typedef struct PathState {
/* volume rendering */
#ifdef __VOLUME__
int volume_bounce;
- RNG rng_congruential;
+ int volume_bounds_bounce;
VolumeStack volume_stack[VOLUME_STACK_SIZE];
#endif
} PathState;
-/* Subsurface */
-
-/* Struct to gather multiple SSS hits. */
-struct SubsurfaceIntersection
-{
+/* Struct to gather multiple nearby intersections. */
+typedef struct LocalIntersection {
Ray ray;
- float3 weight[BSSRDF_MAX_HITS];
+ float3 weight[LOCAL_MAX_HITS];
int num_hits;
- struct Intersection hits[BSSRDF_MAX_HITS];
- float3 Ng[BSSRDF_MAX_HITS];
-};
+ struct Intersection hits[LOCAL_MAX_HITS];
+ float3 Ng[LOCAL_MAX_HITS];
+} LocalIntersection;
+
+/* Subsurface */
/* Struct to gather SSS indirect rays and delay tracing them. */
-struct SubsurfaceIndirectRays
-{
- bool need_update_volume_stack;
- bool tracing;
+typedef struct SubsurfaceIndirectRays {
PathState state[BSSRDF_MAX_HITS];
- struct PathRadiance direct_L;
int num_rays;
+
struct Ray rays[BSSRDF_MAX_HITS];
float3 throughputs[BSSRDF_MAX_HITS];
- struct PathRadiance L[BSSRDF_MAX_HITS];
-};
+ struct PathRadianceState L_state[BSSRDF_MAX_HITS];
+} SubsurfaceIndirectRays;
+static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
/* Constant Kernel Data
*
@@ -989,7 +1159,7 @@ typedef struct KernelCamera {
/* matrices */
Transform cameratoworld;
- Transform rastertocamera;
+ ProjectionTransform rastertocamera;
/* differentials */
float4 dx;
@@ -1003,7 +1173,7 @@ typedef struct KernelCamera {
/* motion blur */
float shuttertime;
- int have_motion, have_perspective_motion;
+ int num_motion_steps, have_perspective_motion;
/* clipping */
float nearclip;
@@ -1023,22 +1193,22 @@ typedef struct KernelCamera {
int is_inside_volume;
/* more matrices */
- Transform screentoworld;
- Transform rastertoworld;
- /* work around cuda sm 2.0 crash, this seems to
- * cross some limit in combination with motion
- * Transform ndctoworld; */
- Transform worldtoscreen;
- Transform worldtoraster;
- Transform worldtondc;
+ ProjectionTransform screentoworld;
+ ProjectionTransform rastertoworld;
+ ProjectionTransform ndctoworld;
+ ProjectionTransform worldtoscreen;
+ ProjectionTransform worldtoraster;
+ ProjectionTransform worldtondc;
Transform worldtocamera;
- MotionTransform motion;
+ /* Stores changes in the projeciton matrix. Use for camera zoom motion
+ * blur and motion pass output for perspective camera. */
+ ProjectionTransform perspective_pre;
+ ProjectionTransform perspective_post;
- /* Denotes changes in the projective matrix, namely in rastertocamera.
- * Used for camera zoom motion blur,
- */
- PerspectiveMotionTransform perspective_motion;
+ /* Transforms for motion pass. */
+ Transform motion_pass_pre;
+ Transform motion_pass_post;
int shutter_table_offset;
@@ -1053,6 +1223,7 @@ static_assert_align(KernelCamera, 16);
typedef struct KernelFilm {
float exposure;
int pass_flag;
+ int light_pass_flag;
int pass_stride;
int use_light_pass;
@@ -1075,11 +1246,13 @@ typedef struct KernelFilm {
int pass_glossy_indirect;
int pass_transmission_indirect;
int pass_subsurface_indirect;
+ int pass_volume_indirect;
int pass_diffuse_direct;
int pass_glossy_direct;
int pass_transmission_direct;
int pass_subsurface_direct;
+ int pass_volume_direct;
int pass_emission;
int pass_background;
@@ -1089,13 +1262,18 @@ typedef struct KernelFilm {
int pass_shadow;
float pass_shadow_scale;
int filter_table_offset;
- int pass_pad2;
int pass_mist;
float mist_start;
float mist_inv_depth;
float mist_falloff;
+ int pass_denoising_data;
+ int pass_denoising_clean;
+ int denoising_flags;
+
+ int pad1, pad2, pad3;
+
#ifdef __KERNEL_DEBUG__
int pass_bvh_traversed_nodes;
int pass_bvh_traversed_instances;
@@ -1110,12 +1288,13 @@ typedef struct KernelBackground {
int surface_shader;
int volume_shader;
int transparent;
- int pad;
+ float transparent_roughness_squared_threshold;
/* ambient occlusion */
float ao_factor;
float ao_distance;
- float ao_pad1, ao_pad2;
+ float ao_bounces_factor;
+ float ao_pad;
} KernelBackground;
static_assert_align(KernelBackground, 16);
@@ -1127,8 +1306,8 @@ typedef struct KernelIntegrator {
int num_all_lights;
float pdf_triangles;
float pdf_lights;
- float inv_pdf_lights;
int pdf_background_res;
+ float light_inv_rr_threshold;
/* light portals */
float portal_pdf;
@@ -1136,7 +1315,6 @@ typedef struct KernelIntegrator {
int portal_offset;
/* bounces */
- int min_bounce;
int max_bounce;
int max_diffuse_bounce;
@@ -1147,7 +1325,6 @@ typedef struct KernelIntegrator {
int ao_bounces;
/* transparent */
- int transparent_min_bounce;
int transparent_max_bounce;
int transparent_shadows;
@@ -1165,6 +1342,7 @@ typedef struct KernelIntegrator {
/* branched path */
int branched;
+ int volume_decoupled;
int diffuse_samples;
int glossy_samples;
int transmission_samples;
@@ -1187,23 +1365,31 @@ typedef struct KernelIntegrator {
float volume_step_size;
int volume_samples;
- float light_inv_rr_threshold;
-
int start_sample;
- int pad1, pad2, pad3;
+
+ int max_closures;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
+typedef enum KernelBVHLayout {
+ BVH_LAYOUT_NONE = 0,
+
+ BVH_LAYOUT_BVH2 = (1 << 0),
+ BVH_LAYOUT_BVH4 = (1 << 1),
+
+ BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH4,
+ BVH_LAYOUT_ALL = (unsigned int)(-1),
+} KernelBVHLayout;
+
typedef struct KernelBVH {
/* root node */
int root;
- int attributes_map_stride;
int have_motion;
int have_curves;
int have_instancing;
- int use_qbvh;
+ int bvh_layout;
int use_bvh_steps;
- int pad1;
+ int pad1, pad2;
} KernelBVH;
static_assert_align(KernelBVH, 16);
@@ -1244,17 +1430,113 @@ typedef struct KernelData {
} KernelData;
static_assert_align(KernelData, 16);
-#ifdef __KERNEL_DEBUG__
-/* NOTE: This is a runtime-only struct, alignment is not
- * really important here.
- */
-typedef ccl_addr_space struct DebugData {
- int num_bvh_traversed_nodes;
- int num_bvh_traversed_instances;
- int num_bvh_intersections;
- int num_ray_bounces;
-} DebugData;
-#endif
+/* Kernel data structures. */
+
+typedef struct KernelObject {
+ Transform tfm;
+ Transform itfm;
+
+ float surface_area;
+ float pass_id;
+ float random_number;
+ int particle_index;
+
+ float dupli_generated[3];
+ float dupli_uv[2];
+
+ int numkeys;
+ int numsteps;
+ int numverts;
+
+ uint patch_map_offset;
+ uint attribute_map_offset;
+ uint motion_offset;
+ uint pad;
+} KernelObject;
+static_assert_align(KernelObject, 16);
+
+typedef struct KernelSpotLight {
+ float radius;
+ float invarea;
+ float spot_angle;
+ float spot_smooth;
+ float dir[3];
+ float pad;
+} KernelSpotLight;
+
+/* PointLight is SpotLight with only radius and invarea being used. */
+
+typedef struct KernelAreaLight {
+ float axisu[3];
+ float invarea;
+ float axisv[3];
+ float pad1;
+ float dir[3];
+ float pad2;
+} KernelAreaLight;
+
+typedef struct KernelDistantLight {
+ float radius;
+ float cosangle;
+ float invarea;
+ float pad;
+} KernelDistantLight;
+
+typedef struct KernelLight {
+ int type;
+ float co[3];
+ int shader_id;
+ int samples;
+ float max_bounces;
+ float random;
+ Transform tfm;
+ Transform itfm;
+ union {
+ KernelSpotLight spot;
+ KernelAreaLight area;
+ KernelDistantLight distant;
+ };
+} KernelLight;
+static_assert_align(KernelLight, 16);
+
+typedef struct KernelLightDistribution {
+ float totarea;
+ int prim;
+ union {
+ struct {
+ int shader_flag;
+ int object_id;
+ } mesh_light;
+ struct {
+ float pad;
+ float size;
+ } lamp;
+ };
+} KernelLightDistribution;
+static_assert_align(KernelLightDistribution, 16);
+
+typedef struct KernelParticle {
+ int index;
+ float age;
+ float lifetime;
+ float size;
+ float4 rotation;
+ /* Only xyz are used of the following. float4 instead of float3 are used
+ * to ensure consistent padding/alignment across devices. */
+ float4 location;
+ float4 velocity;
+ float4 angular_velocity;
+} KernelParticle;
+static_assert_align(KernelParticle, 16);
+
+typedef struct KernelShader {
+ float constant_emission[3];
+ float pad1;
+ int flags;
+ int pass_id;
+ int pad2, pad3;
+} KernelShader;
+static_assert_align(KernelShader, 16);
/* Declarations required for split kernel */
@@ -1268,7 +1550,6 @@ typedef ccl_addr_space struct DebugData {
* Queue 3 - Shadow ray cast kernel - AO
* Queeu 4 - Shadow ray cast kernel - direct lighting
*/
-#define NUM_QUEUES 4
/* Queue names */
enum QueueNumber {
@@ -1281,45 +1562,77 @@ enum QueueNumber {
* 3. Rays to be regenerated
* are enqueued here.
*/
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contribution for AO are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contributing for direct lighting are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+ /* Rays sorted according to shader->id */
+ QUEUE_SHADER_SORTED_RAYS,
+
+#ifdef __BRANCHED_PATH__
+ /* All rays moving to next iteration of the indirect loop for light */
+ QUEUE_LIGHT_INDIRECT_ITER,
+ /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
+ QUEUE_INACTIVE_RAYS,
+# ifdef __VOLUME__
+ /* All rays moving to next iteration of the indirect loop for volumes */
+ QUEUE_VOLUME_INDIRECT_ITER,
+# endif
+# ifdef __SUBSURFACE__
+ /* All rays moving to next iteration of the indirect loop for subsurface */
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+# endif
+#endif /* __BRANCHED_PATH__ */
+
+ NUM_QUEUES
};
-/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
-#define RAY_STATE_MASK 0x007
-#define RAY_FLAG_MASK 0x0F8
+/* We use RAY_STATE_MASK to get ray_state */
+#define RAY_STATE_MASK 0x0F
+#define RAY_FLAG_MASK 0xF0
enum RayState {
+ RAY_INVALID = 0,
/* Denotes ray is actively involved in path-iteration. */
- RAY_ACTIVE = 0,
+ RAY_ACTIVE,
/* Denotes ray has completed processing all samples and is inactive. */
- RAY_INACTIVE = 1,
- /* Denoted ray has exited path-iteration and needs to update output buffer. */
- RAY_UPDATE_BUFFER = 2,
+ RAY_INACTIVE,
+ /* Denotes ray has exited path-iteration and needs to update output buffer. */
+ RAY_UPDATE_BUFFER,
+ /* Denotes ray needs to skip most surface shader work. */
+ RAY_HAS_ONLY_VOLUME,
/* Donotes ray has hit background */
- RAY_HIT_BACKGROUND = 3,
+ RAY_HIT_BACKGROUND,
/* Denotes ray has to be regenerated */
- RAY_TO_REGENERATE = 4,
+ RAY_TO_REGENERATE,
/* Denotes ray has been regenerated */
- RAY_REGENERATED = 5,
- /* Denotes ray should skip direct lighting */
- RAY_SKIP_DL = 6,
- /* Flag's ray has to execute shadow blocked function in AO part */
- RAY_SHADOW_RAY_CAST_AO = 16,
- /* Flag's ray has to execute shadow blocked function in direct lighting part. */
- RAY_SHADOW_RAY_CAST_DL = 32,
+ RAY_REGENERATED,
+ /* Denotes ray is moving to next iteration of the branched indirect loop */
+ RAY_LIGHT_INDIRECT_NEXT_ITER,
+ RAY_VOLUME_INDIRECT_NEXT_ITER,
+ RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+ /* Ray flags */
+
+ /* Flags to denote that the ray is currently evaluating the branched indirect loop */
+ RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+ RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+ RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+ RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+ /* Ray is evaluating an iteration of an indirect loop for another thread */
+ RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
@@ -1334,6 +1647,20 @@ enum RayState {
#define PATCH_MAP_NODE_IS_LEAF (1u << 31)
#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
+/* Work Tiles */
+
+typedef struct WorkTile {
+ uint x, y, w, h;
+
+ uint start_sample;
+ uint num_samples;
+
+ uint offset;
+ uint stride;
+
+ ccl_global float *buffer;
+} WorkTile;
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */