diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_queues.h')
-rw-r--r-- | intern/cycles/kernel/kernel_queues.h | 145 |
1 files changed, 72 insertions, 73 deletions
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index de8cc4a0cef..91a39fc1465 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -23,24 +23,24 @@ CCL_NAMESPACE_BEGIN * Queue utility functions for split kernel */ #ifdef __KERNEL_OPENCL__ -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +# pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable #endif /* * Enqueue ray index into the queue */ ccl_device void enqueue_ray_index( - int ray_index, /* Ray index to be enqueued. */ - int queue_number, /* Queue in which the ray index should be enqueued. */ - ccl_global int *queues, /* Buffer of all queues. */ - int queue_size, /* Size of each queue. */ - ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ + int ray_index, /* Ray index to be enqueued. */ + int queue_number, /* Queue in which the ray index should be enqueued. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queue_size, /* Size of each queue. */ + ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ { - /* This thread's queue index. */ - int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number]) - + (queue_number * queue_size); - queues[my_queue_index] = ray_index; + /* This thread's queue index. */ + int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) + + (queue_number * queue_size); + queues[my_queue_index] = ray_index; } /* @@ -51,96 +51,95 @@ ccl_device void enqueue_ray_index( * is no more ray to allocate to other threads. */ ccl_device int get_ray_index( - KernelGlobals *kg, - int thread_index, /* Global thread index. */ - int queue_number, /* Queue to operate on. */ - ccl_global int *queues, /* Buffer of all queues. */ - int queuesize, /* Size of a queue. */ - int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */ + KernelGlobals *kg, + int thread_index, /* Global thread index. */ + int queue_number, /* Queue to operate on. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queuesize, /* Size of a queue. */ + int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */ { - int ray_index = queues[queue_number * queuesize + thread_index]; - if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) { - queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; - } - return ray_index; + int ray_index = queues[queue_number * queuesize + thread_index]; + if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) { + queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + return ray_index; } /* The following functions are to realize Local memory variant of enqueue ray index function. */ /* All threads should call this function. */ ccl_device void enqueue_ray_index_local( - int ray_index, /* Ray index to enqueue. */ - int queue_number, /* Queue in which to enqueue ray index. */ - char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ - int queuesize, /* queue size. */ - ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ - ccl_global int *Queue_data, /* Queues. */ - ccl_global int *Queue_index) /* To do global queue atomics. */ + int ray_index, /* Ray index to enqueue. */ + int queue_number, /* Queue in which to enqueue ray index. */ + char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ + int queuesize, /* queue size. */ + ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ + ccl_global int *Queue_data, /* Queues. */ + ccl_global int *Queue_index) /* To do global queue atomics. */ { - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - - /* Get local queue id .*/ - unsigned int lqidx; - if(enqueue_flag) { - lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - /* Get global queue offset. */ - if(lidx == 0) { - *local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number], - *local_queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - /* Get global queue index and enqueue ray. */ - if(enqueue_flag) { - unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; - Queue_data[my_gqidx] = ray_index; - } + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + + /* Get local queue id .*/ + unsigned int lqidx; + if (enqueue_flag) { + lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + /* Get global queue offset. */ + if (lidx == 0) { + *local_queue_atomics = atomic_fetch_and_add_uint32( + (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + /* Get global queue index and enqueue ray. */ + if (enqueue_flag) { + unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; + Queue_data[my_gqidx] = ray_index; + } } ccl_device unsigned int get_local_queue_index( - int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ - ccl_local_param unsigned int *local_queue_atomics) + int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ + ccl_local_param unsigned int *local_queue_atomics) { - int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); - return my_lqidx; + int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); + return my_lqidx; } ccl_device unsigned int get_global_per_queue_offset( - int queue_number, - ccl_local_param unsigned int *local_queue_atomics, - ccl_global int* global_queue_atomics) + int queue_number, + ccl_local_param unsigned int *local_queue_atomics, + ccl_global int *global_queue_atomics) { - unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number], - local_queue_atomics[queue_number]); - return queue_offset; + unsigned int queue_offset = atomic_fetch_and_add_uint32( + (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]); + return queue_offset; } ccl_device unsigned int get_global_queue_index( int queue_number, int queuesize, unsigned int lqidx, - ccl_local_param unsigned int * global_per_queue_offset) + ccl_local_param unsigned int *global_per_queue_offset) { - int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; - return my_gqidx; + int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; + return my_gqidx; } -ccl_device int dequeue_ray_index( - int queue_number, - ccl_global int *queues, - int queue_size, - ccl_global int *queue_index) +ccl_device int dequeue_ray_index(int queue_number, + ccl_global int *queues, + int queue_size, + ccl_global int *queue_index) { - int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1; + int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1; - if(index < 0) { - return QUEUE_EMPTY_SLOT; - } + if (index < 0) { + return QUEUE_EMPTY_SLOT; + } - return queues[index + queue_number * queue_size]; + return queues[index + queue_number * queue_size]; } CCL_NAMESPACE_END |