diff options
Diffstat (limited to 'intern/cycles/kernel/device/hip')
-rw-r--r-- | intern/cycles/kernel/device/hip/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/hip/config.h | 26 |
2 files changed, 26 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h index 282c3eca641..fb07602539b 100644 --- a/intern/cycles/kernel/device/hip/compat.h +++ b/intern/cycles/kernel/device/hip/compat.h @@ -74,6 +74,7 @@ typedef unsigned long long uint64_t; #define ccl_gpu_block_idx_x (blockIdx.x) #define ccl_gpu_grid_dim_x (gridDim.x) #define ccl_gpu_warp_size (warpSize) +#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) #define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x) #define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x) diff --git a/intern/cycles/kernel/device/hip/config.h b/intern/cycles/kernel/device/hip/config.h index 2fde0d46015..b9dbc2f7fa8 100644 --- a/intern/cycles/kernel/device/hip/config.h +++ b/intern/cycles/kernel/device/hip/config.h @@ -36,11 +36,35 @@ /* Compute number of threads per block and minimum blocks per multiprocessor * given the maximum number of registers per thread. */ -#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \ +#define ccl_gpu_kernel_threads(block_num_threads) \ + extern "C" __global__ void __launch_bounds__(block_num_threads) + +#define ccl_gpu_kernel_threads_registers(block_num_threads, thread_num_registers) \ extern "C" __global__ void __launch_bounds__(block_num_threads, \ GPU_MULTIPRESSOR_MAX_REGISTERS / \ (block_num_threads * thread_num_registers)) +/* allow ccl_gpu_kernel to accept 1 or 2 parameters */ +#define SELECT_MACRO(_1, _2, NAME, ...) NAME +#define ccl_gpu_kernel(...) \ + SELECT_MACRO(__VA_ARGS__, ccl_gpu_kernel_threads_registers, ccl_gpu_kernel_threads)(__VA_ARGS__) + +#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) + +#define ccl_gpu_kernel_call(x) x + +/* define a function object where "func" is the lambda body, and additional parameters are used to + * specify captured state */ +#define ccl_gpu_kernel_lambda(func, ...) \ + struct KernelLambda { \ + __VA_ARGS__; \ + __device__ int operator()(const int state) \ + { \ + return (func); \ + } \ + } ccl_gpu_kernel_lambda_pass; \ + ccl_gpu_kernel_lambda_pass + /* sanity checks */ #if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS |