Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Kyriazis <George.Kyriazis@amd.com>2015-05-09 17:34:30 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-05-09 17:52:40 +0300
commit7f4479da425b2d44a585f1b7b63f91d9dfecef02 (patch)
tree96ae5e7d4e091f89beedcd37609b3769783a00af /intern/cycles/kernel/kernel_types.h
parentf680c1b54a28a02fb86271bca649da0660542e9a (diff)
Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely someone else which we're forgetting to mention. Currently only AMD cards are enabled for the new split kernel, but it is possible to force split opencl kernel to be used by setting the following environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1. Not all the features are supported yet, and that being said no motion blur, camera blur, SSS and volumetrics for now. Also transparent shadows are disabled on AMD device because of some compiler bug. This kernel is also only implements regular path tracing and supporting branched one will take a bit. Branched path tracing is exposed to the interface still, which is a bit misleading and will be hidden there soon. More feature will be enabled once they're ported to the split kernel and tested. Neither regular CPU nor CUDA has any difference, they're generating the same exact code, which means no regressions/improvements there. Based on the research paper: https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf Here's the documentation: https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit Design discussion of the patch: https://developer.blender.org/T44197 Differential Revision: https://developer.blender.org/D1200
Diffstat (limited to 'intern/cycles/kernel/kernel_types.h')
-rw-r--r--intern/cycles/kernel/kernel_types.h187
1 files changed, 108 insertions, 79 deletions
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index daa5ec1b9f1..303a78d8ac0 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -24,6 +24,13 @@
#define __KERNEL_CPU__
#endif
+/* TODO(sergey): This is only to make it possible to include this header
+ * from outside of the kernel. but this could be done somewhat cleaner?
+ */
+#ifndef ccl_addr_space
+#define ccl_addr_space
+#endif
+
CCL_NAMESPACE_BEGIN
/* constants */
@@ -90,7 +97,19 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL_NVIDIA__
#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+/* TODO(sergey): Advanced shading code still requires work
+ * for split kernel.
+ */
+# ifndef __SPLIT_KERNEL__
+# define __KERNEL_ADV_SHADING__
+# else
+# define __MULTI_CLOSURE__
+# define __TRANSPARENT_SHADOWS__
+# define __PASSES__
+# define __BACKGROUND_MIS__
+# define __LAMP_MIS__
+# define __AO__
+# endif
#endif
#ifdef __KERNEL_OPENCL_APPLE__
@@ -103,7 +122,7 @@ CCL_NAMESPACE_BEGIN
#define __KERNEL_SHADING__
//__KERNEL_ADV_SHADING__
#define __MULTI_CLOSURE__
-#define __TRANSPARENT_SHADOWS__
+//#define __TRANSPARENT_SHADOWS__
#define __PASSES__
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
@@ -117,10 +136,22 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL_INTEL_CPU__
#define __CL_USE_NATIVE__
#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+/* TODO(sergey): Advanced shading code still requires work
+ * for split kernel.
+ */
+# ifndef __SPLIT_KERNEL__
+# define __KERNEL_ADV_SHADING__
+# else
+# define __MULTI_CLOSURE__
+# define __TRANSPARENT_SHADOWS__
+# define __PASSES__
+# define __BACKGROUND_MIS__
+# define __LAMP_MIS__
+# define __AO__
+# endif
#endif
-#endif
+#endif // __KERNEL_OPENCL__
/* kernel features */
#define __SOBOL__
@@ -322,7 +353,7 @@ typedef enum PassType {
#ifdef __PASSES__
-typedef struct PathRadiance {
+typedef ccl_addr_space struct PathRadiance {
int use_light_pass;
float3 emission;
@@ -374,7 +405,7 @@ typedef struct BsdfEval {
#else
-typedef float3 PathRadiance;
+typedef ccl_addr_space float3 PathRadiance;
typedef float3 BsdfEval;
#endif
@@ -441,9 +472,9 @@ typedef struct differential {
typedef struct Ray {
float3 P; /* origin */
float3 D; /* direction */
+
float t; /* length of the ray */
float time; /* time (for motion blur) */
-
#ifdef __RAY_DIFFERENTIALS__
differential3 dP;
differential3 dD;
@@ -452,7 +483,7 @@ typedef struct Ray {
/* Intersection */
-typedef struct Intersection {
+typedef ccl_addr_space struct Intersection {
float t, u, v;
int prim;
int object;
@@ -537,7 +568,11 @@ typedef enum AttributeStandard {
/* Closure data */
#ifdef __MULTI_CLOSURE__
-#define MAX_CLOSURE 64
+# ifndef __MAX_CLOSURE__
+# define MAX_CLOSURE 64
+# else
+# define MAX_CLOSURE __MAX_CLOSURE__
+# endif
#else
#define MAX_CLOSURE 1
#endif
@@ -547,7 +582,7 @@ typedef enum AttributeStandard {
* does not put own padding trying to align this members.
* - We make sure OSL pointer is also 16 bytes aligned.
*/
-typedef struct ShaderClosure {
+typedef ccl_addr_space struct ShaderClosure {
float3 weight;
float3 N;
float3 T;
@@ -632,78 +667,23 @@ enum ShaderDataFlag {
struct KernelGlobals;
-typedef struct ShaderData {
- /* position */
- float3 P;
- /* smooth normal for shading */
- float3 N;
- /* true geometric normal */
- float3 Ng;
- /* view/incoming direction */
- float3 I;
- /* shader id */
- int shader;
- /* booleans describing shader, see ShaderDataFlag */
- int flag;
-
- /* primitive id if there is one, ~0 otherwise */
- int prim;
-
- /* combined type and curve segment for hair */
- int type;
-
- /* parametric coordinates
- * - barycentric weights for triangles */
- float u, v;
- /* object id if there is one, ~0 otherwise */
- int object;
-
- /* motion blur sample time */
- float time;
-
- /* length of the ray being shaded */
- float ray_length;
-
- /* ray bounce depth */
- int ray_depth;
-
- /* ray transparent depth */
- int transparent_depth;
-
-#ifdef __RAY_DIFFERENTIALS__
- /* differential of P. these are orthogonal to Ng, not N */
- differential3 dP;
- /* differential of I */
- differential3 dI;
- /* differential of u, v */
- differential du;
- differential dv;
-#endif
-#ifdef __DPDU__
- /* differential of P w.r.t. parametric coordinates. note that dPdu is
- * not readily suitable as a tangent for shading on triangles. */
- float3 dPdu, dPdv;
-#endif
-
-#ifdef __OBJECT_MOTION__
- /* object <-> world space transformations, cached to avoid
- * re-interpolating them constantly for shading */
- Transform ob_tfm;
- Transform ob_itfm;
+#ifdef __SPLIT_KERNEL__
+#define SD_VAR(type, what) ccl_global type *what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
+#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
+#define ccl_fetch(s, t) (s->t[TIDX])
+#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
+#else
+#define SD_VAR(type, what) type what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
+#define ccl_fetch(s, t) (s->t)
+#define ccl_fetch_array(s, t, index) (&s->t[index])
#endif
- /* Closure data, we store a fixed array of closures */
- ShaderClosure closure[MAX_CLOSURE];
- int num_closure;
- float randb_closure;
+typedef ccl_addr_space struct ShaderData {
- /* ray start position, only set for backgrounds */
- float3 ray_P;
- differential3 ray_dP;
+#include "kernel_shaderdata_vars.h"
-#ifdef __OSL__
- struct KernelGlobals *osl_globals;
-#endif
} ShaderData;
/* Path State */
@@ -996,13 +976,62 @@ typedef struct KernelData {
} KernelData;
#ifdef __KERNEL_DEBUG__
-typedef struct DebugData {
+typedef ccl_addr_space struct DebugData {
// Total number of BVH node traversal steps and primitives intersections
// for the camera rays.
int num_bvh_traversal_steps;
} DebugData;
#endif
+/* Declarations required for split kernel */
+
+/* Macro for queues */
+/* Value marking queue's empty slot */
+#define QUEUE_EMPTY_SLOT -1
+
+/*
+* Queue 1 - Active rays
+* Queue 2 - Background queue
+* Queue 3 - Shadow ray cast kernel - AO
+* Queeu 4 - Shadow ray cast kernel - direct lighting
+*/
+#define NUM_QUEUES 4
+
+/* Queue names */
+enum QueueNumber {
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS, /* All active rays and regenerated rays are enqueued here */
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, /* All
+ * 1.Background-hit rays,
+ * 2.Rays that has exited path-iteration but needs to update output buffer
+ * 3.Rays to be regenerated
+ * are enqueued here */
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS, /* All rays for which a shadow ray should be cast to determine radiance
+ contribution for AO are enqueued here */
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS, /* All rays for which a shadow ray should be cast to determine radiance
+ contributuin for direct lighting are enqueued here */
+};
+
+/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
+#define RAY_STATE_MASK 0x007
+#define RAY_FLAG_MASK 0x0F8
+enum RayState {
+ RAY_ACTIVE = 0, // Denotes ray is actively involved in path-iteration
+ RAY_INACTIVE = 1, // Denotes ray has completed processing all samples and is inactive
+ RAY_UPDATE_BUFFER = 2, // Denoted ray has exited path-iteration and needs to update output buffer
+ RAY_HIT_BACKGROUND = 3, // Donotes ray has hit background
+ RAY_TO_REGENERATE = 4, // Denotes ray has to be regenerated
+ RAY_REGENERATED = 5, // Denotes ray has been regenerated
+ RAY_SKIP_DL = 6, // Denotes ray should skip direct lighting
+ RAY_SHADOW_RAY_CAST_AO = 16, // Flag's ray has to execute shadow blocked function in AO part
+ RAY_SHADOW_RAY_CAST_DL = 32 // Flag's ray has to execute shadow blocked function in direct lighting part
+};
+
+#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
+#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
+#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
+#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */