Cycles: Add basic support for using OSL with OptiX

This patch generalizes the OSL support in Cycles to include GPU device types and adds an implementation for that in the OptiX device. There are some caveats still, including simplified texturing due to lack of OIIO on the GPU and a few missing OSL intrinsics. Note that this is incomplete and missing an update to the OSL library before being enabled! The implementation is already committed now to simplify further development. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D15902
author: Patrick Mours <pmours@nvidia.com> 2022-11-09 16:25:32 +0300
committer: Patrick Mours <pmours@nvidia.com> 2022-11-09 17:30:21 +0300
commit: e6b38deb9dbb58118f6ee644409ce52f06eac5e5 (patch)
tree: aa1c384db146094482f24c94f704742c6624db00 /intern/cycles/device/optix/queue.cpp
parent: efe073f57c34b438d21750795e97458a3d007be7 (diff)
1 files changed, 74 insertions, 16 deletions
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index 3bc547ed11d..1bfd154d449 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -24,21 +24,33 @@ void OptiXDeviceQueue::init_execution()
   CUDADeviceQueue::init_execution();
 }
 
-static bool is_optix_specific_kernel(DeviceKernel kernel)
+static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
 {
-  return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
-          kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
-          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
-          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
-          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+#  ifdef WITH_OSL
+  /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
+  if (use_osl && device_kernel_has_shading(kernel)) {
+    return true;
+  }
+#  else
+  (void)use_osl;
+#  endif
+
+  return device_kernel_has_intersection(kernel);
 }
 
 bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
                                const int work_size,
                                DeviceKernelArguments const &args)
 {
-  if (!is_optix_specific_kernel(kernel)) {
+  OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
+
+#  ifdef WITH_OSL
+  const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
+#  else
+  const bool use_osl = false;
+#  endif
+
+  if (!is_optix_specific_kernel(kernel, use_osl)) {
     return CUDADeviceQueue::enqueue(kernel, work_size, args);
   }
 
@@ -50,8 +62,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
 
   const CUDAContextScope scope(cuda_device_);
 
-  OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
-
   const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
   const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
 
@@ -62,9 +72,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
                         sizeof(device_ptr),
                         cuda_stream_));
 
-  if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
-      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
+  if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) {
     cuda_device_assert(
         cuda_device_,
         cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@@ -72,6 +80,15 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
                           sizeof(device_ptr),
                           cuda_stream_));
   }
+  if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
+      kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
+      kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) {
+    cuda_device_assert(cuda_device_,
+                       cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
+                                         args.values[2],  // &d_offset
+                                         sizeof(int32_t),
+                                         cuda_stream_));
+  }
 
   cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
 
@@ -79,14 +96,35 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
   OptixShaderBindingTable sbt_params = {};
 
   switch (kernel) {
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
+      break;
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
+      break;
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
+      break;
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-      pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
+      pipeline = optix_device->pipelines[PIP_SHADE];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
       break;
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
-      pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
+      pipeline = optix_device->pipelines[PIP_SHADE];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
       break;
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
+      break;
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
+      break;
+
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
       pipeline = optix_device->pipelines[PIP_INTERSECT];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
@@ -104,6 +142,20 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
       break;
 
+    case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
+      break;
+    case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
+      break;
+    case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
+      pipeline = optix_device->pipelines[PIP_SHADE];
+      sbt_params.raygenRecord = sbt_data_ptr +
+                                PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
+      break;
+
     default:
       LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
                  << " is attempted to be enqueued.";
@@ -112,7 +164,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
 
   sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
   sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
-  sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS;
+  sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
   sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
   sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
   sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
@@ -120,6 +172,12 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
   sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
   sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
 
+#  ifdef WITH_OSL
+  if (use_osl) {
+    sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
+  }
+#  endif
+
   /* Launch the ray generation program. */
   optix_device_assert(optix_device,
                       optixLaunch(pipeline,
author	Patrick Mours <pmours@nvidia.com>	2022-11-09 16:25:32 +0300
committer	Patrick Mours <pmours@nvidia.com>	2022-11-09 17:30:21 +0300
commit	e6b38deb9dbb58118f6ee644409ce52f06eac5e5 (patch)
tree	aa1c384db146094482f24c94f704742c6624db00 /intern/cycles/device/optix/queue.cpp
parent	efe073f57c34b438d21750795e97458a3d007be7 (diff)