diff options
author | Weizhen Huang <itsnotrj@gmail.com> | 2022-11-11 18:17:58 +0300 |
---|---|---|
committer | Weizhen Huang <itsnotrj@gmail.com> | 2022-11-11 18:17:58 +0300 |
commit | dad3574a8c0b673efa9d7f7c890d9562e3f6a267 (patch) | |
tree | 84dff4a90c34a44744c329de21a999289f9d209a /intern/cycles/kernel/device/optix/compat.h | |
parent | e9260afa93068d0181de6e3223f65e7bb016aecc (diff) | |
parent | dc8a1d38b73420428f9fb878386b99ebb69dd841 (diff) |
Merge branch 'master' into soc-2022-many-lights-sampling
Diffstat (limited to 'intern/cycles/kernel/device/optix/compat.h')
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 31 |
1 files changed, 8 insertions, 23 deletions
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 1a11a533b7e..e13101f57b8 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -33,14 +33,16 @@ typedef unsigned long long uint64_t; #endif #define ccl_device \ - __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything + static __device__ \ + __forceinline__ // Function calls are bad for OptiX performance, so inline everything +#define ccl_device_extern extern "C" __device__ #define ccl_device_inline ccl_device #define ccl_device_forceinline ccl_device -#define ccl_device_inline_method ccl_device -#define ccl_device_noinline __device__ __noinline__ +#define ccl_device_inline_method __device__ __forceinline__ +#define ccl_device_noinline static __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_global -#define ccl_inline_constant __constant__ +#define ccl_inline_constant static __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ @@ -57,23 +59,6 @@ typedef unsigned long long uint64_t; #define kernel_assert(cond) -/* GPU thread, block, grid size and index */ - -#define ccl_gpu_thread_idx_x (threadIdx.x) -#define ccl_gpu_block_dim_x (blockDim.x) -#define ccl_gpu_block_idx_x (blockIdx.x) -#define ccl_gpu_grid_dim_x (gridDim.x) -#define ccl_gpu_warp_size (warpSize) -#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x) -#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x) - -/* GPU warp synchronization. */ - -#define ccl_gpu_syncthreads() __syncthreads() -#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate) - /* GPU texture objects */ typedef unsigned long long CUtexObject; @@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D typedef unsigned short half; -__device__ half __float2half(const float f) +ccl_device_forceinline half __float2half(const float f) { half val; asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); return val; } -__device__ float __half2float(const half h) +ccl_device_forceinline float __half2float(const half h) { float val; asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h)); |