diff options
Diffstat (limited to 'intern/cycles/device/optix/queue.cpp')
-rw-r--r-- | intern/cycles/device/optix/queue.cpp | 90 |
1 files changed, 74 insertions, 16 deletions
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index 3bc547ed11d..1bfd154d449 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -24,21 +24,33 @@ void OptiXDeviceQueue::init_execution() CUDADeviceQueue::init_execution(); } -static bool is_optix_specific_kernel(DeviceKernel kernel) +static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl) { - return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || - kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || - kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); +# ifdef WITH_OSL + /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */ + if (use_osl && device_kernel_has_shading(kernel)) { + return true; + } +# else + (void)use_osl; +# endif + + return device_kernel_has_intersection(kernel); } bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, DeviceKernelArguments const &args) { - if (!is_optix_specific_kernel(kernel)) { + OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_); + +# ifdef WITH_OSL + const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use; +# else + const bool use_osl = false; +# endif + + if (!is_optix_specific_kernel(kernel, use_osl)) { return CUDADeviceQueue::enqueue(kernel, work_size, args); } @@ -50,8 +62,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const CUDAContextScope scope(cuda_device_); - OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_); - const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer; const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer; @@ -62,9 +72,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sizeof(device_ptr), cuda_stream_)); - if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) { + if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) { cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), @@ -72,6 +80,15 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sizeof(device_ptr), cuda_stream_)); } + if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE || + kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND || + kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) { + cuda_device_assert(cuda_device_, + cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset), + args.values[2], // &d_offset + sizeof(int32_t), + cuda_stream_)); + } cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_)); @@ -79,14 +96,35 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, OptixShaderBindingTable sbt_params = {}; switch (kernel) { + case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord); + break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: - pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE]; + pipeline = optix_device->pipelines[PIP_SHADE]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: - pipeline = optix_device->pipelines[PIP_SHADE_MNEE]; + pipeline = optix_device->pipelines[PIP_SHADE]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: pipeline = optix_device->pipelines[PIP_INTERSECT]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord); @@ -104,6 +142,20 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord); break; + case DEVICE_KERNEL_SHADER_EVAL_DISPLACE: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord); + break; + case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + + PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord); + break; + default: LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel) << " is attempted to be enqueued."; @@ -112,7 +164,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord); sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); - sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS; + sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS; sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord); sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS; @@ -120,6 +172,12 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS; sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord); +# ifdef WITH_OSL + if (use_osl) { + sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size()); + } +# endif + /* Launch the ray generation program. */ optix_device_assert(optix_device, optixLaunch(pipeline, |