diff options
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 147 |
1 files changed, 99 insertions, 48 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 95234845f98..682540a51fd 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice { PG_BAKE, // kernel_bake_evaluate PG_DISP, // kernel_displace_evaluate PG_BACK, // kernel_background_evaluate - NUM_PROGRAM_GROUPS + PG_CALL, + NUM_PROGRAM_GROUPS = PG_CALL + 3 }; // List of OptiX pipelines @@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice { set_error("OptiX backend does not support baking yet"); return false; } - // Disable shader raytracing support for now, since continuation callables are slow - if (requested_features.use_shader_raytrace) { - set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet"); - return false; - } const CUDAContextScope scope(cuContext); @@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice { } { // Load and compile PTX module with OptiX kernels - string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx"); + string ptx_data, ptx_filename = path_get(requested_features.use_shader_raytrace ? + "lib/kernel_optix_shader_raytrace.ptx" : + "lib/kernel_optix.ptx"); if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { if (!getenv("OPTIX_ROOT_DIR")) { set_error( @@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice { group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background"; } + // Shader raytracing replaces some functions with direct callables + if (requested_features.use_shader_raytrace) { + group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 0].callables.moduleDC = optix_module; + group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes"; + group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 1].callables.moduleDC = optix_module; + group_descs[PG_CALL + 1].callables.entryFunctionNameDC = + "__direct_callable__kernel_volume_shadow"; + group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; + group_descs[PG_CALL + 2].callables.moduleDC = optix_module; + group_descs[PG_CALL + 2].callables.entryFunctionNameDC = + "__direct_callable__subsurface_scatter_multi_setup"; + } + check_result_optix_ret(optixProgramGroupCreate( context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups)); @@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice { # endif { // Create path tracing pipeline - OptixProgramGroup pipeline_groups[] = { - groups[PG_RGEN], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + vector<OptixProgramGroup> pipeline_groups; + pipeline_groups.reserve(NUM_PROGRAM_GROUPS); + pipeline_groups.push_back(groups[PG_RGEN]); + pipeline_groups.push_back(groups[PG_MISS]); + pipeline_groups.push_back(groups[PG_HITD]); + pipeline_groups.push_back(groups[PG_HITS]); + pipeline_groups.push_back(groups[PG_HITL]); # if OPTIX_ABI_VERSION >= 36 - groups[PG_HITD_MOTION], - groups[PG_HITS_MOTION], + if (motion_blur) { + pipeline_groups.push_back(groups[PG_HITD_MOTION]); + pipeline_groups.push_back(groups[PG_HITS_MOTION]); + } # endif - }; - check_result_optix_ret( - optixPipelineCreate(context, - &pipeline_options, - &link_options, - pipeline_groups, - (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), - nullptr, - 0, - &pipelines[PIP_PATH_TRACE])); + if (requested_features.use_shader_raytrace) { + pipeline_groups.push_back(groups[PG_CALL + 0]); + pipeline_groups.push_back(groups[PG_CALL + 1]); + pipeline_groups.push_back(groups[PG_CALL + 2]); + } + + check_result_optix_ret(optixPipelineCreate(context, + &pipeline_options, + &link_options, + pipeline_groups.data(), + pipeline_groups.size(), + nullptr, + 0, + &pipelines[PIP_PATH_TRACE])); // Combine ray generation and trace continuation stack size const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css; + // Max direct callable depth is one of the following, so combine accordingly + // - __raygen__ -> svm_eval_nodes + // - __raygen__ -> kernel_volume_shadow -> svm_eval_nodes + // - __raygen__ -> subsurface_scatter_multi_setup -> svm_eval_nodes + const unsigned int dss = stack_size[PG_CALL + 0].dssDC + + std::max(stack_size[PG_CALL + 1].dssDC, + stack_size[PG_CALL + 2].dssDC); // Set stack size depending on pipeline options check_result_optix_ret( - optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2))); + optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], + 0, + requested_features.use_shader_raytrace ? dss : 0, + css, + motion_blur ? 3 : 2)); } // Only need to create shader evaluation pipeline if one of these features is used: @@ -599,37 +630,51 @@ class OptiXDevice : public CUDADevice { requested_features.use_true_displacement; if (use_shader_eval_pipeline) { // Create shader evaluation pipeline - OptixProgramGroup pipeline_groups[] = { - groups[PG_BAKE], - groups[PG_DISP], - groups[PG_BACK], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + vector<OptixProgramGroup> pipeline_groups; + pipeline_groups.reserve(NUM_PROGRAM_GROUPS); + pipeline_groups.push_back(groups[PG_BAKE]); + pipeline_groups.push_back(groups[PG_DISP]); + pipeline_groups.push_back(groups[PG_BACK]); + pipeline_groups.push_back(groups[PG_MISS]); + pipeline_groups.push_back(groups[PG_HITD]); + pipeline_groups.push_back(groups[PG_HITS]); + pipeline_groups.push_back(groups[PG_HITL]); # if OPTIX_ABI_VERSION >= 36 - groups[PG_HITD_MOTION], - groups[PG_HITS_MOTION], + if (motion_blur) { + pipeline_groups.push_back(groups[PG_HITD_MOTION]); + pipeline_groups.push_back(groups[PG_HITS_MOTION]); + } # endif - }; - check_result_optix_ret( - optixPipelineCreate(context, - &pipeline_options, - &link_options, - pipeline_groups, - (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), - nullptr, - 0, - &pipelines[PIP_SHADER_EVAL])); + if (requested_features.use_shader_raytrace) { + pipeline_groups.push_back(groups[PG_CALL + 0]); + pipeline_groups.push_back(groups[PG_CALL + 1]); + pipeline_groups.push_back(groups[PG_CALL + 2]); + } + + check_result_optix_ret(optixPipelineCreate(context, + &pipeline_options, + &link_options, + pipeline_groups.data(), + pipeline_groups.size(), + nullptr, + 0, + &pipelines[PIP_SHADER_EVAL])); // Calculate continuation stack size based on the maximum of all ray generation stack sizes const unsigned int css = std::max(stack_size[PG_BAKE].cssRG, std::max(stack_size[PG_DISP].cssRG, stack_size[PG_BACK].cssRG)) + link_options.maxTraceDepth * trace_css; + const unsigned int dss = stack_size[PG_CALL + 0].dssDC + + std::max(stack_size[PG_CALL + 1].dssDC, + stack_size[PG_CALL + 2].dssDC); - check_result_optix_ret(optixPipelineSetStackSize( - pipelines[PIP_SHADER_EVAL], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2))); + check_result_optix_ret( + optixPipelineSetStackSize(pipelines[PIP_SHADER_EVAL], + 0, + requested_features.use_shader_raytrace ? dss : 0, + css, + motion_blur ? 3 : 2)); } // Clean up program group objects @@ -734,6 +779,9 @@ class OptiXDevice : public CUDADevice { # else sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL # endif + sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord); + sbt_params.callablesRecordCount = 3; + sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord); // Launch the ray generation program check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE], @@ -1061,6 +1109,9 @@ class OptiXDevice : public CUDADevice { # else sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL # endif + sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord); + sbt_params.callablesRecordCount = 3; + sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord); check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL], cuda_stream[thread_index], |