diff options
Diffstat (limited to 'intern/cycles/kernel/device/optix/compat.h')
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 31 |
1 files changed, 8 insertions, 23 deletions
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 1a11a533b7e..e13101f57b8 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -33,14 +33,16 @@ typedef unsigned long long uint64_t; #endif #define ccl_device \ - __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything + static __device__ \ + __forceinline__ // Function calls are bad for OptiX performance, so inline everything +#define ccl_device_extern extern "C" __device__ #define ccl_device_inline ccl_device #define ccl_device_forceinline ccl_device -#define ccl_device_inline_method ccl_device -#define ccl_device_noinline __device__ __noinline__ +#define ccl_device_inline_method __device__ __forceinline__ +#define ccl_device_noinline static __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_global -#define ccl_inline_constant __constant__ +#define ccl_inline_constant static __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ @@ -57,23 +59,6 @@ typedef unsigned long long uint64_t; #define kernel_assert(cond) -/* GPU thread, block, grid size and index */ - -#define ccl_gpu_thread_idx_x (threadIdx.x) -#define ccl_gpu_block_dim_x (blockDim.x) -#define ccl_gpu_block_idx_x (blockIdx.x) -#define ccl_gpu_grid_dim_x (gridDim.x) -#define ccl_gpu_warp_size (warpSize) -#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x) -#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x) - -/* GPU warp synchronization. */ - -#define ccl_gpu_syncthreads() __syncthreads() -#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate) - /* GPU texture objects */ typedef unsigned long long CUtexObject; @@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D typedef unsigned short half; -__device__ half __float2half(const float f) +ccl_device_forceinline half __float2half(const float f) { half val; asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); return val; } -__device__ float __half2float(const half h) +ccl_device_forceinline float __half2float(const half h) { float val; asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h)); |