Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2022-11-09 16:25:32 +0300
committerPatrick Mours <pmours@nvidia.com>2022-11-09 17:30:21 +0300
commite6b38deb9dbb58118f6ee644409ce52f06eac5e5 (patch)
treeaa1c384db146094482f24c94f704742c6624db00 /intern/cycles/kernel/device
parentefe073f57c34b438d21750795e97458a3d007be7 (diff)
Cycles: Add basic support for using OSL with OptiX
This patch generalizes the OSL support in Cycles to include GPU device types and adds an implementation for that in the OptiX device. There are some caveats still, including simplified texturing due to lack of OIIO on the GPU and a few missing OSL intrinsics. Note that this is incomplete and missing an update to the OSL library before being enabled! The implementation is already committed now to simplify further development. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D15902
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/cuda/compat.h5
-rw-r--r--intern/cycles/kernel/device/hip/compat.h1
-rw-r--r--intern/cycles/kernel/device/metal/compat.h1
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/compat.h31
-rw-r--r--intern/cycles/kernel/device/optix/globals.h7
-rw-r--r--intern/cycles/kernel/device/optix/kernel_osl.cu83
7 files changed, 103 insertions, 26 deletions
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h
index 51e1381d552..3a950779c11 100644
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -30,6 +30,7 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
+#define ccl_device_extern extern "C" __device__
#if __CUDA_ARCH__ < 500
# define ccl_device_inline __device__ __forceinline__
# define ccl_device_forceinline __device__ __forceinline__
@@ -109,14 +110,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
-__device__ half __float2half(const float f)
+ccl_device_forceinline half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
-__device__ float __half2float(const half h)
+ccl_device_forceinline float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h
index 648988c31b6..8755395c82c 100644
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -28,6 +28,7 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
+#define ccl_device_extern extern "C" __device__
#define ccl_device_inline __device__ __inline__
#define ccl_device_forceinline __device__ __forceinline__
#define ccl_device_noinline __device__ __noinline__
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index f689e93e5a2..2dd6cc98b59 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -38,6 +38,7 @@ using namespace metal::raytracing;
# define ccl_device_noinline ccl_device __attribute__((noinline))
#endif
+#define ccl_device_extern extern "C"
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index dfaec65130c..b83512180d7 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -28,6 +28,7 @@
/* Qualifier wrappers for different names on different devices */
#define ccl_device
+#define ccl_device_extern extern "C"
#define ccl_global
#define ccl_always_inline __attribute__((always_inline))
#define ccl_device_inline inline
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index 1a11a533b7e..e13101f57b8 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -33,14 +33,16 @@ typedef unsigned long long uint64_t;
#endif
#define ccl_device \
- __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+ static __device__ \
+ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+#define ccl_device_extern extern "C" __device__
#define ccl_device_inline ccl_device
#define ccl_device_forceinline ccl_device
-#define ccl_device_inline_method ccl_device
-#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_inline_method __device__ __forceinline__
+#define ccl_device_noinline static __device__ __noinline__
#define ccl_device_noinline_cpu ccl_device
#define ccl_global
-#define ccl_inline_constant __constant__
+#define ccl_inline_constant static __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_constant const
#define ccl_gpu_shared __shared__
@@ -57,23 +59,6 @@ typedef unsigned long long uint64_t;
#define kernel_assert(cond)
-/* GPU thread, block, grid size and index */
-
-#define ccl_gpu_thread_idx_x (threadIdx.x)
-#define ccl_gpu_block_dim_x (blockDim.x)
-#define ccl_gpu_block_idx_x (blockIdx.x)
-#define ccl_gpu_grid_dim_x (gridDim.x)
-#define ccl_gpu_warp_size (warpSize)
-#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
-#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
-
-/* GPU warp synchronization. */
-
-#define ccl_gpu_syncthreads() __syncthreads()
-#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
-
/* GPU texture objects */
typedef unsigned long long CUtexObject;
@@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
-__device__ half __float2half(const float f)
+ccl_device_forceinline half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
-__device__ float __half2float(const half h)
+ccl_device_forceinline float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index 7af2e421378..126df74bc8c 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -25,6 +25,7 @@ struct KernelParamsOptiX {
/* Kernel arguments */
const int *path_index_array;
float *render_buffer;
+ int offset;
/* Global scene data and textures */
KernelData data;
@@ -36,7 +37,11 @@ struct KernelParamsOptiX {
};
#ifdef __NVCC__
-extern "C" static __constant__ KernelParamsOptiX kernel_params;
+extern "C"
+# ifndef __CUDACC_RDC__
+ static
+# endif
+ __constant__ KernelParamsOptiX kernel_params;
#endif
/* Abstraction macros */
diff --git a/intern/cycles/kernel/device/optix/kernel_osl.cu b/intern/cycles/kernel/device/optix/kernel_osl.cu
new file mode 100644
index 00000000000..0f3f477935b
--- /dev/null
+++ b/intern/cycles/kernel/device/optix/kernel_osl.cu
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#define WITH_OSL
+
+/* Copy of the regular OptiX kernels with additional OSL support. */
+
+#include "kernel/device/optix/kernel_shader_raytrace.cu"
+
+#include "kernel/bake/bake.h"
+#include "kernel/integrator/shade_background.h"
+#include "kernel/integrator/shade_light.h"
+#include "kernel/integrator/shade_shadow.h"
+#include "kernel/integrator/shade_volume.h"
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_background(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_light(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_displace_evaluate(nullptr, input, output, global_index);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_background()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_background_evaluate(nullptr, input, output, global_index);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
+}