Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r--intern/cycles/device/device_optix.cpp147
1 files changed, 99 insertions, 48 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 95234845f98..682540a51fd 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice {
PG_BAKE, // kernel_bake_evaluate
PG_DISP, // kernel_displace_evaluate
PG_BACK, // kernel_background_evaluate
- NUM_PROGRAM_GROUPS
+ PG_CALL,
+ NUM_PROGRAM_GROUPS = PG_CALL + 3
};
// List of OptiX pipelines
@@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice {
set_error("OptiX backend does not support baking yet");
return false;
}
- // Disable shader raytracing support for now, since continuation callables are slow
- if (requested_features.use_shader_raytrace) {
- set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet");
- return false;
- }
const CUDAContextScope scope(cuContext);
@@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice {
}
{ // Load and compile PTX module with OptiX kernels
- string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+ string ptx_data, ptx_filename = path_get(requested_features.use_shader_raytrace ?
+ "lib/kernel_optix_shader_raytrace.ptx" :
+ "lib/kernel_optix.ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
if (!getenv("OPTIX_ROOT_DIR")) {
set_error(
@@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background";
}
+ // Shader raytracing replaces some functions with direct callables
+ if (requested_features.use_shader_raytrace) {
+ group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes";
+ group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
+ "__direct_callable__kernel_volume_shadow";
+ group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
+ "__direct_callable__subsurface_scatter_multi_setup";
+ }
+
check_result_optix_ret(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice {
# endif
{ // Create path tracing pipeline
- OptixProgramGroup pipeline_groups[] = {
- groups[PG_RGEN],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_RGEN]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
# if OPTIX_ABI_VERSION >= 36
- groups[PG_HITD_MOTION],
- groups[PG_HITS_MOTION],
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
# endif
- };
- check_result_optix_ret(
- optixPipelineCreate(context,
- &pipeline_options,
- &link_options,
- pipeline_groups,
- (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
- nullptr,
- 0,
- &pipelines[PIP_PATH_TRACE]));
+ if (requested_features.use_shader_raytrace) {
+ pipeline_groups.push_back(groups[PG_CALL + 0]);
+ pipeline_groups.push_back(groups[PG_CALL + 1]);
+ pipeline_groups.push_back(groups[PG_CALL + 2]);
+ }
+
+ check_result_optix_ret(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_PATH_TRACE]));
// Combine ray generation and trace continuation stack size
const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
+ // Max direct callable depth is one of the following, so combine accordingly
+ // - __raygen__ -> svm_eval_nodes
+ // - __raygen__ -> kernel_volume_shadow -> svm_eval_nodes
+ // - __raygen__ -> subsurface_scatter_multi_setup -> svm_eval_nodes
+ const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+ std::max(stack_size[PG_CALL + 1].dssDC,
+ stack_size[PG_CALL + 2].dssDC);
// Set stack size depending on pipeline options
check_result_optix_ret(
- optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
+ optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE],
+ 0,
+ requested_features.use_shader_raytrace ? dss : 0,
+ css,
+ motion_blur ? 3 : 2));
}
// Only need to create shader evaluation pipeline if one of these features is used:
@@ -599,37 +630,51 @@ class OptiXDevice : public CUDADevice {
requested_features.use_true_displacement;
if (use_shader_eval_pipeline) { // Create shader evaluation pipeline
- OptixProgramGroup pipeline_groups[] = {
- groups[PG_BAKE],
- groups[PG_DISP],
- groups[PG_BACK],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_BAKE]);
+ pipeline_groups.push_back(groups[PG_DISP]);
+ pipeline_groups.push_back(groups[PG_BACK]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
# if OPTIX_ABI_VERSION >= 36
- groups[PG_HITD_MOTION],
- groups[PG_HITS_MOTION],
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
# endif
- };
- check_result_optix_ret(
- optixPipelineCreate(context,
- &pipeline_options,
- &link_options,
- pipeline_groups,
- (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
- nullptr,
- 0,
- &pipelines[PIP_SHADER_EVAL]));
+ if (requested_features.use_shader_raytrace) {
+ pipeline_groups.push_back(groups[PG_CALL + 0]);
+ pipeline_groups.push_back(groups[PG_CALL + 1]);
+ pipeline_groups.push_back(groups[PG_CALL + 2]);
+ }
+
+ check_result_optix_ret(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_SHADER_EVAL]));
// Calculate continuation stack size based on the maximum of all ray generation stack sizes
const unsigned int css = std::max(stack_size[PG_BAKE].cssRG,
std::max(stack_size[PG_DISP].cssRG,
stack_size[PG_BACK].cssRG)) +
link_options.maxTraceDepth * trace_css;
+ const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+ std::max(stack_size[PG_CALL + 1].dssDC,
+ stack_size[PG_CALL + 2].dssDC);
- check_result_optix_ret(optixPipelineSetStackSize(
- pipelines[PIP_SHADER_EVAL], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+ check_result_optix_ret(
+ optixPipelineSetStackSize(pipelines[PIP_SHADER_EVAL],
+ 0,
+ requested_features.use_shader_raytrace ? dss : 0,
+ css,
+ motion_blur ? 3 : 2));
}
// Clean up program group objects
@@ -734,6 +779,9 @@ class OptiXDevice : public CUDADevice {
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
+ sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord);
+ sbt_params.callablesRecordCount = 3;
+ sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
// Launch the ray generation program
check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@@ -1061,6 +1109,9 @@ class OptiXDevice : public CUDADevice {
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
+ sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord);
+ sbt_params.callablesRecordCount = 3;
+ sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
cuda_stream[thread_index],