Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2022-11-09 16:25:32 +0300
committerPatrick Mours <pmours@nvidia.com>2022-11-09 17:30:21 +0300
commite6b38deb9dbb58118f6ee644409ce52f06eac5e5 (patch)
treeaa1c384db146094482f24c94f704742c6624db00 /intern/cycles/device/optix/device_impl.cpp
parentefe073f57c34b438d21750795e97458a3d007be7 (diff)
Cycles: Add basic support for using OSL with OptiX
This patch generalizes the OSL support in Cycles to include GPU device types and adds an implementation for that in the OptiX device. There are some caveats still, including simplified texturing due to lack of OIIO on the GPU and a few missing OSL intrinsics. Note that this is incomplete and missing an update to the OSL library before being enabled! The implementation is already committed now to simplify further development. Maniphest Tasks: T101222 Differential Revision: https://developer.blender.org/D15902
Diffstat (limited to 'intern/cycles/device/optix/device_impl.cpp')
-rw-r--r--intern/cycles/device/optix/device_impl.cpp477
1 files changed, 377 insertions, 100 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index fabf4d7b69d..02f34bf3bd0 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -312,16 +312,34 @@ OptiXDevice::~OptiXDevice()
if (optix_module != NULL) {
optixModuleDestroy(optix_module);
}
- for (unsigned int i = 0; i < 2; ++i) {
+ for (int i = 0; i < 2; ++i) {
if (builtin_modules[i] != NULL) {
optixModuleDestroy(builtin_modules[i]);
}
}
- for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
+ for (int i = 0; i < NUM_PIPELINES; ++i) {
if (pipelines[i] != NULL) {
optixPipelineDestroy(pipelines[i]);
}
}
+ for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ if (groups[i] != NULL) {
+ optixProgramGroupDestroy(groups[i]);
+ }
+ }
+
+# ifdef WITH_OSL
+ for (const OptixModule &module : osl_modules) {
+ if (module != NULL) {
+ optixModuleDestroy(module);
+ }
+ }
+ for (const OptixProgramGroup &group : osl_groups) {
+ if (group != NULL) {
+ optixProgramGroupDestroy(group);
+ }
+ }
+# endif
/* Make sure denoiser is destroyed before device context! */
if (denoiser_.optix_denoiser != nullptr) {
@@ -381,6 +399,12 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
return false;
}
+# ifdef WITH_OSL
+ const bool use_osl = (kernel_features & KERNEL_FEATURE_OSL);
+# else
+ const bool use_osl = false;
+# endif
+
/* Skip creating OptiX module if only doing denoising. */
const bool need_optix_kernels = (kernel_features &
(KERNEL_FEATURE_PATH_TRACING | KERNEL_FEATURE_BAKING));
@@ -388,12 +412,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
/* Detect existence of OptiX kernel and SDK here early. So we can error out
* before compiling the CUDA kernels, to avoid failing right after when
* compiling the OptiX kernel. */
+ string suffix = use_osl ? "_osl" :
+ (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+ "_shader_raytrace" :
+ "";
string ptx_filename;
if (need_optix_kernels) {
- ptx_filename = path_get(
- (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
- "lib/kernel_optix_shader_raytrace.ptx" :
- "lib/kernel_optix.ptx");
+ ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
std::string optix_include_dir = get_optix_include_dir();
if (optix_include_dir.empty()) {
@@ -429,18 +454,41 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
optixModuleDestroy(optix_module);
optix_module = NULL;
}
- for (unsigned int i = 0; i < 2; ++i) {
+ for (int i = 0; i < 2; ++i) {
if (builtin_modules[i] != NULL) {
optixModuleDestroy(builtin_modules[i]);
builtin_modules[i] = NULL;
}
}
- for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
+ for (int i = 0; i < NUM_PIPELINES; ++i) {
if (pipelines[i] != NULL) {
optixPipelineDestroy(pipelines[i]);
pipelines[i] = NULL;
}
}
+ for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ if (groups[i] != NULL) {
+ optixProgramGroupDestroy(groups[i]);
+ groups[i] = NULL;
+ }
+ }
+
+# ifdef WITH_OSL
+ /* Recreating base OptiX module invalidates all OSL modules too, since they link against it. */
+ for (const OptixModule &module : osl_modules) {
+ if (module != NULL) {
+ optixModuleDestroy(module);
+ }
+ }
+ osl_modules.clear();
+
+ for (const OptixProgramGroup &group : osl_groups) {
+ if (group != NULL) {
+ optixProgramGroupDestroy(group);
+ }
+ }
+ osl_groups.clear();
+# endif
OptixModuleCompileOptions module_options = {};
module_options.maxRegisterCount = 0; /* Do not set an explicit register limit. */
@@ -461,7 +509,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
module_options.numPayloadTypes = 0;
# endif
- OptixPipelineCompileOptions pipeline_options = {};
/* Default to no motion blur and two-level graph, since it is the fastest option. */
pipeline_options.usesMotionBlur = false;
pipeline_options.traversableGraphFlags =
@@ -490,9 +537,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
* This is necessary since objects may be reported to have motion if the Vector pass is
* active, but may still need to be rendered without motion blur if that isn't active as well. */
- motion_blur = (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) != 0;
-
- if (motion_blur) {
+ if (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) {
pipeline_options.usesMotionBlur = true;
/* Motion blur can insert motion transforms into the traversal graph.
* It is no longer a two-level graph then, so need to set flags to allow any configuration. */
@@ -503,13 +548,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
string ptx_data;
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
string cflags = compile_kernel_get_common_cflags(kernel_features);
- ptx_filename = compile_kernel(
- cflags,
- (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
- "kernel_shader_raytrace" :
- "kernel",
- "optix",
- true);
+ ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true);
}
if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
@@ -551,7 +590,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
/* Create program groups. */
- OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
OptixProgramGroupOptions group_options = {}; /* There are no options currently. */
group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
@@ -609,7 +647,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
- if (motion_blur) {
+ if (pipeline_options.usesMotionBlur) {
builtin_options.usesMotionBlur = true;
optix_assert(optixBuiltinISModuleGet(
@@ -630,7 +668,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
}
- /* Pointclouds */
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
@@ -642,8 +679,8 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
}
+ /* Add hit group for local intersections. */
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
- /* Add hit group for local intersections. */
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
@@ -655,16 +692,19 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
"__raygen__kernel_optix_integrator_shade_surface_raytrace";
- group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
- group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
- group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao";
- group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
- group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
- group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
- "__direct_callable__svm_node_bevel";
+
+ /* Kernels with OSL support are built without SVM, so can skip those direct callables there. */
+ if (!use_osl) {
+ group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
+ group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao";
+ group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
+ group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
+ "__direct_callable__svm_node_bevel";
+ }
}
- /* MNEE. */
if (kernel_features & KERNEL_FEATURE_MNEE) {
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
@@ -672,6 +712,42 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
"__raygen__kernel_optix_integrator_shade_surface_mnee";
}
+ /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
+ if (use_osl) {
+ group_descs[PG_RGEN_SHADE_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_background";
+ group_descs[PG_RGEN_SHADE_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_LIGHT].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_LIGHT].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_light";
+ group_descs[PG_RGEN_SHADE_SURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_SURFACE].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_SURFACE].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_surface";
+ group_descs[PG_RGEN_SHADE_VOLUME].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_VOLUME].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_VOLUME].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_volume";
+ group_descs[PG_RGEN_SHADE_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_SHADOW].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_SHADOW].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_shadow";
+ group_descs[PG_RGEN_EVAL_DISPLACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_EVAL_DISPLACE].raygen.module = optix_module;
+ group_descs[PG_RGEN_EVAL_DISPLACE].raygen.entryFunctionName =
+ "__raygen__kernel_optix_shader_eval_displace";
+ group_descs[PG_RGEN_EVAL_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.module = optix_module;
+ group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.entryFunctionName =
+ "__raygen__kernel_optix_shader_eval_background";
+ group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module;
+ group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName =
+ "__raygen__kernel_optix_shader_eval_curve_shadow_transparency";
+ }
+
optix_assert(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@@ -680,7 +756,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
/* Set up SBT, which in this case is used only to select between different programs. */
sbt_data.alloc(NUM_PROGRAM_GROUPS);
memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
- for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
}
@@ -704,25 +780,26 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
OptixPipelineLinkOptions link_options = {};
link_options.maxTraceDepth = 1;
+ link_options.debugLevel = module_options.debugLevel;
- if (DebugFlags().optix.use_debug) {
- link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
- }
- else {
- link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
- }
-
- if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
- /* Create shader raytracing pipeline. */
+ if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) && !use_osl) {
+ /* Create shader raytracing and MNEE pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
- pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
+ if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
+ pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
+ pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
+ }
+ if (kernel_features & KERNEL_FEATURE_MNEE) {
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+ }
pipeline_groups.push_back(groups[PG_MISS]);
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
- if (motion_blur) {
+ if (pipeline_options.usesMotionBlur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
@@ -730,8 +807,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
}
- pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
- pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
optix_assert(optixPipelineCreate(context,
&pipeline_options,
@@ -740,30 +815,33 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.size(),
nullptr,
0,
- &pipelines[PIP_SHADE_RAYTRACE]));
+ &pipelines[PIP_SHADE]));
/* Combine ray generation and trace continuation stack size. */
- const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG +
+ const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
+ stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG) +
link_options.maxTraceDepth * trace_css;
const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
stack_size[PG_CALL_SVM_BEVEL].dssDC);
/* Set stack size depending on pipeline options. */
optix_assert(optixPipelineSetStackSize(
- pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
+ pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
}
- if (kernel_features & KERNEL_FEATURE_MNEE) {
- /* Create MNEE pipeline. */
+ { /* Create intersection-only pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
- pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+ pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
+ pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
+ pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
+ pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
pipeline_groups.push_back(groups[PG_MISS]);
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
- if (motion_blur) {
+ if (pipeline_options.usesMotionBlur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
@@ -771,8 +849,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
}
- pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
- pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
optix_assert(optixPipelineCreate(context,
&pipeline_options,
@@ -781,37 +857,234 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.size(),
nullptr,
0,
- &pipelines[PIP_SHADE_MNEE]));
+ &pipelines[PIP_INTERSECT]));
- /* Combine ray generation and trace continuation stack size. */
- const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
- link_options.maxTraceDepth * trace_css;
- const unsigned int dss = 0;
+ /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */
+ const unsigned int css =
+ std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
+ std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
+ std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
+ stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
+ link_options.maxTraceDepth * trace_css;
- /* Set stack size depending on pipeline options. */
- optix_assert(
- optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
+ optix_assert(optixPipelineSetStackSize(
+ pipelines[PIP_INTERSECT], 0, 0, css, pipeline_options.usesMotionBlur ? 3 : 2));
}
- { /* Create intersection-only pipeline. */
+ return !have_error();
+}
+
+bool OptiXDevice::load_osl_kernels()
+{
+# ifdef WITH_OSL
+ if (have_error()) {
+ return false;
+ }
+
+ struct OSLKernel {
+ string ptx;
+ string init_entry;
+ string exec_entry;
+ };
+
+ /* This has to be in the same order as the ShaderType enum, so that the index calculation in
+ * osl_eval_nodes checks out */
+ vector<OSLKernel> osl_kernels;
+
+ for (ShaderType type = SHADER_TYPE_SURFACE; type <= SHADER_TYPE_BUMP;
+ type = static_cast<ShaderType>(type + 1)) {
+ const vector<OSL::ShaderGroupRef> &groups = (type == SHADER_TYPE_SURFACE ?
+ osl_globals.surface_state :
+ type == SHADER_TYPE_VOLUME ?
+ osl_globals.volume_state :
+ type == SHADER_TYPE_DISPLACEMENT ?
+ osl_globals.displacement_state :
+ osl_globals.bump_state);
+ for (const OSL::ShaderGroupRef &group : groups) {
+ if (group) {
+ string osl_ptx, init_name, entry_name;
+ osl_globals.ss->getattribute(group.get(), "group_init_name", init_name);
+ osl_globals.ss->getattribute(group.get(), "group_entry_name", entry_name);
+ osl_globals.ss->getattribute(
+ group.get(), "ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx);
+
+ int groupdata_size = 0;
+ osl_globals.ss->getattribute(group.get(), "groupdata_size", groupdata_size);
+ if (groupdata_size > 2048) { /* See 'group_data' array in kernel/osl/osl.h */
+ set_error(
+ string_printf("Requested OSL group data size (%d) is greater than the maximum "
+ "supported with OptiX (2048)",
+ groupdata_size));
+ return false;
+ }
+
+ osl_kernels.push_back({std::move(osl_ptx), std::move(init_name), std::move(entry_name)});
+ }
+ else {
+ /* Add empty entry for non-existent shader groups, so that the index stays stable. */
+ osl_kernels.emplace_back();
+ }
+ }
+ }
+
+ const CUDAContextScope scope(this);
+
+ if (pipelines[PIP_SHADE]) {
+ optixPipelineDestroy(pipelines[PIP_SHADE]);
+ }
+
+ for (OptixModule &module : osl_modules) {
+ if (module != NULL) {
+ optixModuleDestroy(module);
+ module = NULL;
+ }
+ }
+ for (OptixProgramGroup &group : osl_groups) {
+ if (group != NULL) {
+ optixProgramGroupDestroy(group);
+ group = NULL;
+ }
+ }
+
+ OptixProgramGroupOptions group_options = {}; /* There are no options currently. */
+ OptixModuleCompileOptions module_options = {};
+ module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
+ module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
+
+ osl_groups.resize(osl_kernels.size() * 2 + 1);
+ osl_modules.resize(osl_kernels.size() + 1);
+
+ { /* Load and compile PTX module with OSL services. */
+ string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx");
+ if (!path_read_text(ptx_filename, ptx_data)) {
+ set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'",
+ ptx_filename.c_str()));
+ return false;
+ }
+
+ const OptixResult result = optixModuleCreateFromPTX(context,
+ &module_options,
+ &pipeline_options,
+ ptx_data.data(),
+ ptx_data.size(),
+ nullptr,
+ 0,
+ &osl_modules.back());
+ if (result != OPTIX_SUCCESS) {
+ set_error(string_printf("Failed to load OptiX OSL services kernel from '%s' (%s)",
+ ptx_filename.c_str(),
+ optixGetErrorName(result)));
+ return false;
+ }
+
+ OptixProgramGroupDesc group_desc = {};
+ group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_desc.callables.entryFunctionNameDC = "__direct_callable__dummy_services";
+ group_desc.callables.moduleDC = osl_modules.back();
+
+ optix_assert(optixProgramGroupCreate(
+ context, &group_desc, 1, &group_options, nullptr, 0, &osl_groups.back()));
+ }
+
+ TaskPool pool;
+ vector<OptixResult> results(osl_kernels.size(), OPTIX_SUCCESS);
+
+ for (size_t i = 0; i < osl_kernels.size(); ++i) {
+ if (osl_kernels[i].ptx.empty()) {
+ continue;
+ }
+
+# if OPTIX_ABI_VERSION >= 55
+ OptixTask task = nullptr;
+ results[i] = optixModuleCreateFromPTXWithTasks(context,
+ &module_options,
+ &pipeline_options,
+ osl_kernels[i].ptx.data(),
+ osl_kernels[i].ptx.size(),
+ nullptr,
+ nullptr,
+ &osl_modules[i],
+ &task);
+ if (results[i] == OPTIX_SUCCESS) {
+ execute_optix_task(pool, task, results[i]);
+ }
+# else
+ pool.push([this, &results, i, &module_options, &osl_kernels]() {
+ results[i] = optixModuleCreateFromPTX(context,
+ &module_options,
+ &pipeline_options,
+ osl_kernels[i].ptx.data(),
+ osl_kernels[i].ptx.size(),
+ nullptr,
+ 0,
+ &osl_modules[i]);
+ });
+# endif
+ }
+
+ pool.wait_work();
+
+ for (size_t i = 0; i < osl_kernels.size(); ++i) {
+ if (osl_kernels[i].ptx.empty()) {
+ continue;
+ }
+
+ if (results[i] != OPTIX_SUCCESS) {
+ set_error(string_printf("Failed to load OptiX OSL kernel for %s (%s)",
+ osl_kernels[i].init_entry.c_str(),
+ optixGetErrorName(results[i])));
+ return false;
+ }
+
+ OptixProgramGroupDesc group_descs[2] = {};
+ group_descs[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[0].callables.entryFunctionNameDC = osl_kernels[i].init_entry.c_str();
+ group_descs[0].callables.moduleDC = osl_modules[i];
+ group_descs[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[1].callables.entryFunctionNameDC = osl_kernels[i].exec_entry.c_str();
+ group_descs[1].callables.moduleDC = osl_modules[i];
+
+ optix_assert(optixProgramGroupCreate(
+ context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2]));
+ }
+
+ vector<OptixStackSizes> osl_stack_size(osl_groups.size());
+
+ /* Update SBT with new entries. */
+ sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
+ for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
+ optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
+ }
+ for (size_t i = 0; i < osl_groups.size(); ++i) {
+ if (osl_groups[i] != NULL) {
+ optix_assert(optixSbtRecordPackHeader(osl_groups[i], &sbt_data[NUM_PROGRAM_GROUPS + i]));
+ optix_assert(optixProgramGroupGetStackSize(osl_groups[i], &osl_stack_size[i]));
+ }
+ }
+ sbt_data.copy_to_device(); /* Upload updated SBT to device. */
+
+ OptixPipelineLinkOptions link_options = {};
+ link_options.maxTraceDepth = 0;
+ link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
+
+ {
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
- pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
- pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
- pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
- pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
- pipeline_groups.push_back(groups[PG_MISS]);
- pipeline_groups.push_back(groups[PG_HITD]);
- pipeline_groups.push_back(groups[PG_HITS]);
- pipeline_groups.push_back(groups[PG_HITL]);
- pipeline_groups.push_back(groups[PG_HITV]);
- if (motion_blur) {
- pipeline_groups.push_back(groups[PG_HITD_MOTION]);
- pipeline_groups.push_back(groups[PG_HITS_MOTION]);
- }
- if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
- pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
- pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_BACKGROUND]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_LIGHT]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_VOLUME]);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SHADOW]);
+ pipeline_groups.push_back(groups[PG_RGEN_EVAL_DISPLACE]);
+ pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]);
+ pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]);
+
+ for (const OptixProgramGroup &group : osl_groups) {
+ if (group != NULL) {
+ pipeline_groups.push_back(group);
+ }
}
optix_assert(optixPipelineCreate(context,
@@ -821,26 +1094,30 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.size(),
nullptr,
0,
- &pipelines[PIP_INTERSECT]));
+ &pipelines[PIP_SHADE]));
- /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */
- const unsigned int css =
- std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
- std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
- std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
- stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
- link_options.maxTraceDepth * trace_css;
+ unsigned int dss = 0;
+ for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
+ dss = std::max(dss, osl_stack_size[i].dssDC);
+ }
- optix_assert(
- optixPipelineSetStackSize(pipelines[PIP_INTERSECT], 0, 0, css, motion_blur ? 3 : 2));
+ optix_assert(optixPipelineSetStackSize(
+ pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2));
}
- /* Clean up program group objects. */
- for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
- optixProgramGroupDestroy(groups[i]);
- }
+ return !have_error();
+# else
+ return false;
+# endif
+}
- return true;
+void *OptiXDevice::get_cpu_osl_memory()
+{
+# ifdef WITH_OSL
+ return &osl_globals;
+# else
+ return NULL;
+# endif
}
/* --------------------------------------------------------------------
@@ -1567,7 +1844,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
size_t num_motion_steps = 1;
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
+ if (pipeline_options.usesMotionBlur && hair->get_use_motion_blur() && motion_keys) {
num_motion_steps = hair->get_motion_steps();
}
@@ -1721,7 +1998,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
size_t num_motion_steps = 1;
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
+ if (pipeline_options.usesMotionBlur && mesh->get_use_motion_blur() && motion_keys) {
num_motion_steps = mesh->get_motion_steps();
}
@@ -1788,7 +2065,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
size_t num_motion_steps = 1;
Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
+ if (pipeline_options.usesMotionBlur && pointcloud->get_use_motion_blur() && motion_points) {
num_motion_steps = pointcloud->get_motion_steps();
}
@@ -1885,7 +2162,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Calculate total motion transform size and allocate memory for them. */
size_t motion_transform_offset = 0;
- if (motion_blur) {
+ if (pipeline_options.usesMotionBlur) {
size_t total_motion_transform_size = 0;
for (Object *const ob : bvh->objects) {
if (ob->is_traceable() && ob->use_motion()) {
@@ -1936,7 +2213,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
- if (motion_blur && ob->get_geometry()->has_motion_blur()) {
+ if (pipeline_options.usesMotionBlur && ob->get_geometry()->has_motion_blur()) {
/* Select between motion blur and non-motion blur built-in intersection module. */
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
@@ -1964,7 +2241,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
/* Insert motion traversable if object has motion. */
- if (motion_blur && ob->use_motion()) {
+ if (pipeline_options.usesMotionBlur && ob->use_motion()) {
size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2;
size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
motion_keys * sizeof(OptixSRTData);