diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_queues.h')
-rw-r--r-- | intern/cycles/kernel/kernel_queues.h | 51 |
1 files changed, 37 insertions, 14 deletions
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index cf5614b8a86..e32d4bbbc1b 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -17,12 +17,15 @@ #ifndef __KERNEL_QUEUE_H__ #define __KERNEL_QUEUE_H__ +CCL_NAMESPACE_BEGIN + /* * Queue utility functions for split kernel */ - +#ifdef __KERNEL_OPENCL__ #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#endif /* * Enqueue ray index into the queue @@ -35,7 +38,8 @@ ccl_device void enqueue_ray_index( ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ { /* This thread's queue index. */ - int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size); + int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + + (queue_number * queue_size); queues[my_queue_index] = ray_index; } @@ -47,6 +51,7 @@ ccl_device void enqueue_ray_index( * is no more ray to allocate to other threads. */ ccl_device int get_ray_index( + KernelGlobals *kg, int thread_index, /* Global thread index. */ int queue_number, /* Queue to operate on. */ ccl_global int *queues, /* Buffer of all queues. */ @@ -68,24 +73,25 @@ ccl_device void enqueue_ray_index_local( int queue_number, /* Queue in which to enqueue ray index. */ char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ int queuesize, /* queue size. */ - ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics. */ + ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ ccl_global int *Queue_data, /* Queues. */ ccl_global int *Queue_index) /* To do global queue atomics. */ { - int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); /* Get local queue id .*/ unsigned int lqidx; if(enqueue_flag) { - lqidx = atomic_inc(local_queue_atomics); + lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); } - barrier(CLK_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); /* Get global queue offset. */ if(lidx == 0) { - *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics); + *local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number], + *local_queue_atomics); } - barrier(CLK_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); /* Get global queue index and enqueue ray. */ if(enqueue_flag) { @@ -96,19 +102,19 @@ ccl_device void enqueue_ray_index_local( ccl_device unsigned int get_local_queue_index( int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ - ccl_local unsigned int *local_queue_atomics) + ccl_local_param unsigned int *local_queue_atomics) { - int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]); + int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); return my_lqidx; } ccl_device unsigned int get_global_per_queue_offset( int queue_number, - ccl_local unsigned int *local_queue_atomics, + ccl_local_param unsigned int *local_queue_atomics, ccl_global int* global_queue_atomics) { - unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number], - local_queue_atomics[queue_number]); + unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number], + local_queue_atomics[queue_number]); return queue_offset; } @@ -116,10 +122,27 @@ ccl_device unsigned int get_global_queue_index( int queue_number, int queuesize, unsigned int lqidx, - ccl_local unsigned int * global_per_queue_offset) + ccl_local_param unsigned int * global_per_queue_offset) { int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; return my_gqidx; } +ccl_device int dequeue_ray_index( + int queue_number, + ccl_global int *queues, + int queue_size, + ccl_global int *queue_index) +{ + int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1; + + if(index < 0) { + return QUEUE_EMPTY_SLOT; + } + + return queues[index + queue_number * queue_size]; +} + +CCL_NAMESPACE_END + #endif // __KERNEL_QUEUE_H__ |