Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/split/kernel_data_init.h')
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h261
1 files changed, 68 insertions, 193 deletions
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 6e158d53d23..77fb61b80a8 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -14,221 +14,96 @@
* limitations under the License.
*/
-#include "kernel_split_common.h"
+CCL_NAMESPACE_BEGIN
-/* Note on kernel_data_initialization kernel
- * This kernel Initializes structures needed in path-iteration kernels.
- * This is the first kernel in ray-tracing logic.
+/* This kernel Initializes structures needed in path-iteration kernels.
*
- * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
- *
- * Its input and output are as follows,
- *
- * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
- * Un-initialized throughput -------| |--- Initialized throughput
- * Un-initialized L_transparent ----| |--- Initialized L_transparent
- * Un-initialized PathRadiance -----| |--- Initialized PathRadiance
- * Un-initialized Ray --------------| |--- Initialized Ray
- * Un-initialized PathState --------| |--- Initialized PathState
- * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
- * Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0)
- * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false)
- * Un-initialized ray_state --------| |--- Initialized ray_state
- * parallel_samples --------------- | |--- Initialized per_sample_output_buffers
- * rng_state -----------------------| |--- Initialized work_array
- * data ----------------------------| |--- Initialized work_pool_wgs
- * start_sample --------------------| |
- * sx ------------------------------| |
- * sy ------------------------------| |
- * sw ------------------------------| |
- * sh ------------------------------| |
- * stride --------------------------| |
- * queuesize -----------------------| |
- * num_samples ---------------------| |
- *
- * Note on Queues :
+ * Note on Queues:
* All slots in queues are initialized to queue empty slot;
* The number of elements in the queues is initialized to 0;
*/
+
+#ifndef __KERNEL_CPU__
ccl_device void kernel_data_init(
+#else
+void KERNEL_FUNCTION_FULL_NAME(data_init)(
+#endif
KernelGlobals *kg,
- ShaderData *sd_DL_shadow,
ccl_constant KernelData *data,
- ccl_global float *per_sample_output_buffers,
- ccl_global uint *rng_state,
- ccl_global uint *rng_coop, /* rng array to store rng values for all rays */
- ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */
- ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */
- PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */
- ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */
- ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */
- Intersection *Intersection_coop_shadow,
- ccl_global char *ray_state, /* Stores information on current state of a ray */
-
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "../kernel_textures.h"
-
- int start_sample, int sx, int sy, int sw, int sh, int offset, int stride,
- int rng_state_offset_x,
- int rng_state_offset_y,
- int rng_state_stride,
- ccl_global int *Queue_data, /* Memory for queues */
+ ccl_global void *split_data_buffer,
+ int num_elements,
+ ccl_global char *ray_state,
+
+#ifdef __KERNEL_OPENCL__
+ KERNEL_BUFFER_PARAMS,
+#endif
+
+ int start_sample,
+ int end_sample,
+ int sx, int sy, int sw, int sh, int offset, int stride,
ccl_global int *Queue_index, /* Tracks the number of elements in queues */
int queuesize, /* size (capacity) of the queue */
ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
- ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */
-#ifdef __WORK_STEALING__
- ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */
- unsigned int num_samples, /* Total number of samples per pixel */
-#endif
-#ifdef __KERNEL_DEBUG__
- DebugData *debugdata_coop,
-#endif
- int parallel_samples) /* Number of samples to be processed in parallel */
+ ccl_global unsigned int *work_pools, /* Work pool for each work group */
+ unsigned int num_samples,
+ ccl_global float *buffer)
{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, data_init);
+#else
+
+#ifdef __KERNEL_OPENCL__
kg->data = data;
- kg->sd_input = sd_DL_shadow;
- kg->isect_shadow = Intersection_coop_shadow;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "../kernel_textures.h"
-
- int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
-
-#ifdef __WORK_STEALING__
- int lid = get_local_id(1) * get_local_size(0) + get_local_id(0);
- /* Initialize work_pool_wgs */
- if(lid == 0) {
- int group_index = get_group_id(1) * get_num_groups(0) + get_group_id(0);
- work_pool_wgs[group_index] = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif /* __WORK_STEALING__ */
+#endif
- /* Initialize queue data and queue index. */
- if(thread_index < queuesize) {
- /* Initialize active ray queue. */
- Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize background and buffer update queue. */
- Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of AO queue. */
- Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of direct lighting queue. */
- Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- }
+ kernel_split_params.tile.x = sx;
+ kernel_split_params.tile.y = sy;
+ kernel_split_params.tile.w = sw;
+ kernel_split_params.tile.h = sh;
- if(thread_index == 0) {
- Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
- /* The scene-intersect kernel should not use the queues very first time.
- * since the queue would be empty.
- */
- use_queues_flag[0] = 0;
- }
+ kernel_split_params.tile.start_sample = start_sample;
+ kernel_split_params.tile.num_samples = num_samples;
- int x = get_global_id(0);
- int y = get_global_id(1);
+ kernel_split_params.tile.offset = offset;
+ kernel_split_params.tile.stride = stride;
- if(x < (sw * parallel_samples) && y < sh) {
- int ray_index = x + y * (sw * parallel_samples);
+ kernel_split_params.tile.buffer = buffer;
- /* This is the first assignment to ray_state;
- * So we dont use ASSIGN_RAY_STATE macro.
- */
- ray_state[ray_index] = RAY_ACTIVE;
-
- unsigned int my_sample;
- unsigned int pixel_x;
- unsigned int pixel_y;
- unsigned int tile_x;
- unsigned int tile_y;
- unsigned int my_sample_tile;
-
-#ifdef __WORK_STEALING__
- unsigned int my_work = 0;
- /* Get work. */
- get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
- /* Get the sample associated with the work. */
- my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
-
- my_sample_tile = 0;
-
- /* Get pixel and tile position associated with the work. */
- get_pixel_tile_position(&pixel_x, &pixel_y,
- &tile_x, &tile_y,
- my_work,
- sw, sh, sx, sy,
- parallel_samples,
- ray_index);
- work_array[ray_index] = my_work;
-#else /* __WORK_STEALING__ */
- unsigned int tile_index = ray_index / parallel_samples;
- tile_x = tile_index % sw;
- tile_y = tile_index / sw;
- my_sample_tile = ray_index - (tile_index * parallel_samples);
- my_sample = my_sample_tile + start_sample;
-
- /* Initialize work array. */
- work_array[ray_index] = my_sample ;
-
- /* Calculate pixel position of this ray. */
- pixel_x = sx + tile_x;
- pixel_y = sy + tile_y;
-#endif /* __WORK_STEALING__ */
-
- rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
-
- /* Initialise per_sample_output_buffers to all zeros. */
- per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
- int per_sample_output_buffers_iterator = 0;
- for(per_sample_output_buffers_iterator = 0;
- per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
- per_sample_output_buffers_iterator++)
- {
- per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
- }
+ kernel_split_params.total_work_size = sw * sh * num_samples;
+
+ kernel_split_params.work_pools = work_pools;
- /* Initialize random numbers and ray. */
- kernel_path_trace_setup(kg,
- rng_state,
- my_sample,
- pixel_x, pixel_y,
- &rng_coop[ray_index],
- &Ray_coop[ray_index]);
-
- if(Ray_coop[ray_index].t != 0.0f) {
- /* Initialize throughput, L_transparent, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- L_transparent_coop[ray_index] = 0.0f;
- path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
- path_state_init(kg,
- kg->sd_input,
- &PathState_coop[ray_index],
- &rng_coop[ray_index],
- my_sample,
- &Ray_coop[ray_index]);
-#ifdef __KERNEL_DEBUG__
- debug_data_init(&debugdata_coop[ray_index]);
+ kernel_split_params.queue_index = Queue_index;
+ kernel_split_params.queue_size = queuesize;
+ kernel_split_params.use_queues_flag = use_queues_flag;
+
+ split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
+
+#ifdef __KERNEL_OPENCL__
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
#endif
- }
- else {
- /* These rays do not participate in path-iteration. */
- float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- /* Accumulate result in output buffer. */
- kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
- path_rng_end(kg, rng_state, rng_coop[ray_index]);
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ /* Initialize queue data and queue index. */
+ if(thread_index < queuesize) {
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
}
}
- /* Mark rest of the ray-state indices as RAY_INACTIVE. */
- if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
- /* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
- ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;
+ if(thread_index == 0) {
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ Queue_index[i] = 0;
+ }
+
+ /* The scene-intersect kernel should not use the queues very first time.
+ * since the queue would be empty.
+ */
+ *use_queues_flag = 0;
}
+#endif /* KERENL_STUB */
}
+
+CCL_NAMESPACE_END