diff options
Diffstat (limited to 'intern/cycles/kernel/device/optix')
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 31 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/globals.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/kernel_osl.cu | 83 |
3 files changed, 97 insertions, 24 deletions
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 1a11a533b7e..e13101f57b8 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -33,14 +33,16 @@ typedef unsigned long long uint64_t; #endif #define ccl_device \ - __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything + static __device__ \ + __forceinline__ // Function calls are bad for OptiX performance, so inline everything +#define ccl_device_extern extern "C" __device__ #define ccl_device_inline ccl_device #define ccl_device_forceinline ccl_device -#define ccl_device_inline_method ccl_device -#define ccl_device_noinline __device__ __noinline__ +#define ccl_device_inline_method __device__ __forceinline__ +#define ccl_device_noinline static __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_global -#define ccl_inline_constant __constant__ +#define ccl_inline_constant static __constant__ #define ccl_device_constant __constant__ __device__ #define ccl_constant const #define ccl_gpu_shared __shared__ @@ -57,23 +59,6 @@ typedef unsigned long long uint64_t; #define kernel_assert(cond) -/* GPU thread, block, grid size and index */ - -#define ccl_gpu_thread_idx_x (threadIdx.x) -#define ccl_gpu_block_dim_x (blockDim.x) -#define ccl_gpu_block_idx_x (blockIdx.x) -#define ccl_gpu_grid_dim_x (gridDim.x) -#define ccl_gpu_warp_size (warpSize) -#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x) -#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x) - -/* GPU warp synchronization. */ - -#define ccl_gpu_syncthreads() __syncthreads() -#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate) - /* GPU texture objects */ typedef unsigned long long CUtexObject; @@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D typedef unsigned short half; -__device__ half __float2half(const float f) +ccl_device_forceinline half __float2half(const float f) { half val; asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); return val; } -__device__ float __half2float(const half h) +ccl_device_forceinline float __half2float(const half h) { float val; asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h)); diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h index 7af2e421378..126df74bc8c 100644 --- a/intern/cycles/kernel/device/optix/globals.h +++ b/intern/cycles/kernel/device/optix/globals.h @@ -25,6 +25,7 @@ struct KernelParamsOptiX { /* Kernel arguments */ const int *path_index_array; float *render_buffer; + int offset; /* Global scene data and textures */ KernelData data; @@ -36,7 +37,11 @@ struct KernelParamsOptiX { }; #ifdef __NVCC__ -extern "C" static __constant__ KernelParamsOptiX kernel_params; +extern "C" +# ifndef __CUDACC_RDC__ + static +# endif + __constant__ KernelParamsOptiX kernel_params; #endif /* Abstraction macros */ diff --git a/intern/cycles/kernel/device/optix/kernel_osl.cu b/intern/cycles/kernel/device/optix/kernel_osl.cu new file mode 100644 index 00000000000..0f3f477935b --- /dev/null +++ b/intern/cycles/kernel/device/optix/kernel_osl.cu @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#define WITH_OSL + +/* Copy of the regular OptiX kernels with additional OSL support. */ + +#include "kernel/device/optix/kernel_shader_raytrace.cu" + +#include "kernel/bake/bake.h" +#include "kernel/integrator/shade_background.h" +#include "kernel/integrator/shade_light.h" +#include "kernel/integrator/shade_shadow.h" +#include "kernel/integrator/shade_volume.h" + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_shade_background(nullptr, path_index, kernel_params.render_buffer); +} + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_shade_light(nullptr, path_index, kernel_params.render_buffer); +} + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer); +} + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer); +} + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer); +} + +extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace() +{ + KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array; + float *const output = kernel_params.render_buffer; + const int global_index = kernel_params.offset + optixGetLaunchIndex().x; + kernel_displace_evaluate(nullptr, input, output, global_index); +} + +extern "C" __global__ void __raygen__kernel_optix_shader_eval_background() +{ + KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array; + float *const output = kernel_params.render_buffer; + const int global_index = kernel_params.offset + optixGetLaunchIndex().x; + kernel_background_evaluate(nullptr, input, output, global_index); +} + +extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency() +{ + KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array; + float *const output = kernel_params.render_buffer; + const int global_index = kernel_params.offset + optixGetLaunchIndex().x; + kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index); +} |