Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_optix.cpp147
-rw-r--r--intern/cycles/kernel/CMakeLists.txt15
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h29
-rw-r--r--intern/cycles/kernel/kernel_types.h2
-rw-r--r--intern/cycles/kernel/kernel_volume.h23
-rw-r--r--intern/cycles/kernel/svm/svm.h26
6 files changed, 168 insertions, 74 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 95234845f98..682540a51fd 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice {
PG_BAKE, // kernel_bake_evaluate
PG_DISP, // kernel_displace_evaluate
PG_BACK, // kernel_background_evaluate
- NUM_PROGRAM_GROUPS
+ PG_CALL,
+ NUM_PROGRAM_GROUPS = PG_CALL + 3
};
// List of OptiX pipelines
@@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice {
set_error("OptiX backend does not support baking yet");
return false;
}
- // Disable shader raytracing support for now, since continuation callables are slow
- if (requested_features.use_shader_raytrace) {
- set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet");
- return false;
- }
const CUDAContextScope scope(cuContext);
@@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice {
}
{ // Load and compile PTX module with OptiX kernels
- string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+ string ptx_data, ptx_filename = path_get(requested_features.use_shader_raytrace ?
+ "lib/kernel_optix_shader_raytrace.ptx" :
+ "lib/kernel_optix.ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
if (!getenv("OPTIX_ROOT_DIR")) {
set_error(
@@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background";
}
+ // Shader raytracing replaces some functions with direct callables
+ if (requested_features.use_shader_raytrace) {
+ group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes";
+ group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
+ "__direct_callable__kernel_volume_shadow";
+ group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+ group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
+ group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
+ "__direct_callable__subsurface_scatter_multi_setup";
+ }
+
check_result_optix_ret(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice {
# endif
{ // Create path tracing pipeline
- OptixProgramGroup pipeline_groups[] = {
- groups[PG_RGEN],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_RGEN]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
# if OPTIX_ABI_VERSION >= 36
- groups[PG_HITD_MOTION],
- groups[PG_HITS_MOTION],
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
# endif
- };
- check_result_optix_ret(
- optixPipelineCreate(context,
- &pipeline_options,
- &link_options,
- pipeline_groups,
- (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
- nullptr,
- 0,
- &pipelines[PIP_PATH_TRACE]));
+ if (requested_features.use_shader_raytrace) {
+ pipeline_groups.push_back(groups[PG_CALL + 0]);
+ pipeline_groups.push_back(groups[PG_CALL + 1]);
+ pipeline_groups.push_back(groups[PG_CALL + 2]);
+ }
+
+ check_result_optix_ret(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_PATH_TRACE]));
// Combine ray generation and trace continuation stack size
const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
+ // Max direct callable depth is one of the following, so combine accordingly
+ // - __raygen__ -> svm_eval_nodes
+ // - __raygen__ -> kernel_volume_shadow -> svm_eval_nodes
+ // - __raygen__ -> subsurface_scatter_multi_setup -> svm_eval_nodes
+ const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+ std::max(stack_size[PG_CALL + 1].dssDC,
+ stack_size[PG_CALL + 2].dssDC);
// Set stack size depending on pipeline options
check_result_optix_ret(
- optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
+ optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE],
+ 0,
+ requested_features.use_shader_raytrace ? dss : 0,
+ css,
+ motion_blur ? 3 : 2));
}
// Only need to create shader evaluation pipeline if one of these features is used:
@@ -599,37 +630,51 @@ class OptiXDevice : public CUDADevice {
requested_features.use_true_displacement;
if (use_shader_eval_pipeline) { // Create shader evaluation pipeline
- OptixProgramGroup pipeline_groups[] = {
- groups[PG_BAKE],
- groups[PG_DISP],
- groups[PG_BACK],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_BAKE]);
+ pipeline_groups.push_back(groups[PG_DISP]);
+ pipeline_groups.push_back(groups[PG_BACK]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
# if OPTIX_ABI_VERSION >= 36
- groups[PG_HITD_MOTION],
- groups[PG_HITS_MOTION],
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
# endif
- };
- check_result_optix_ret(
- optixPipelineCreate(context,
- &pipeline_options,
- &link_options,
- pipeline_groups,
- (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
- nullptr,
- 0,
- &pipelines[PIP_SHADER_EVAL]));
+ if (requested_features.use_shader_raytrace) {
+ pipeline_groups.push_back(groups[PG_CALL + 0]);
+ pipeline_groups.push_back(groups[PG_CALL + 1]);
+ pipeline_groups.push_back(groups[PG_CALL + 2]);
+ }
+
+ check_result_optix_ret(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_SHADER_EVAL]));
// Calculate continuation stack size based on the maximum of all ray generation stack sizes
const unsigned int css = std::max(stack_size[PG_BAKE].cssRG,
std::max(stack_size[PG_DISP].cssRG,
stack_size[PG_BACK].cssRG)) +
link_options.maxTraceDepth * trace_css;
+ const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+ std::max(stack_size[PG_CALL + 1].dssDC,
+ stack_size[PG_CALL + 2].dssDC);
- check_result_optix_ret(optixPipelineSetStackSize(
- pipelines[PIP_SHADER_EVAL], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+ check_result_optix_ret(
+ optixPipelineSetStackSize(pipelines[PIP_SHADER_EVAL],
+ 0,
+ requested_features.use_shader_raytrace ? dss : 0,
+ css,
+ motion_blur ? 3 : 2));
}
// Clean up program group objects
@@ -734,6 +779,9 @@ class OptiXDevice : public CUDADevice {
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
+ sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord);
+ sbt_params.callablesRecordCount = 3;
+ sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
// Launch the ray generation program
check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@@ -1061,6 +1109,9 @@ class OptiXDevice : public CUDADevice {
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
+ sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord);
+ sbt_params.callablesRecordCount = 3;
+ sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
cuda_stream[thread_index],
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index c39c67afb5a..f6b4b963a7a 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -423,7 +423,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
- set(cuda_flags
+ set(cuda_flags ${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D NVCC
@@ -545,11 +545,11 @@ endif()
# OptiX PTX modules
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
- foreach(input ${SRC_OPTIX_KERNELS})
- get_filename_component(input_we ${input} NAME_WE)
+ macro(CYCLES_OPTIX_KERNEL_ADD name flags)
+ set(input "kernels/optix/kernel_optix.cu")
+ set(output "${CMAKE_CURRENT_BINARY_DIR}/${name}.ptx")
- set(output "${CMAKE_CURRENT_BINARY_DIR}/${input_we}.ptx")
- set(cuda_flags
+ set(cuda_flags ${flags}
-I "${OPTIX_INCLUDE_DIR}"
-I "${CMAKE_CURRENT_SOURCE_DIR}/.."
-I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
@@ -625,7 +625,10 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
list(APPEND optix_ptx ${output})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
- endforeach()
+ endmacro()
+
+ CYCLES_OPTIX_KERNEL_ADD(kernel_optix "-D __NO_SHADER_RAYTRACE__")
+ CYCLES_OPTIX_KERNEL_ADD(kernel_optix_shader_raytrace "--keep-device-functions")
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
cycles_set_solution_folder(cycles_kernel_optix)
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index ed8572467ea..917f35d37dc 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -281,13 +281,28 @@ ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
return num_eval_hits;
}
-ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg,
- LocalIntersection *ss_isect,
- int hit,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ClosureType type,
- float roughness)
+#if defined(__KERNEL_OPTIX__) && defined(__SHADER_RAYTRACE__)
+ccl_device_inline void subsurface_scatter_multi_setup(KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ int hit,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ClosureType type,
+ float roughness)
+{
+ optixDirectCall<void>(2, kg, ss_isect, hit, sd, state, type, roughness);
+}
+extern "C" __device__ void __direct_callable__subsurface_scatter_multi_setup(
+#else
+ccl_device_noinline void subsurface_scatter_multi_setup(
+#endif
+ KernelGlobals *kg,
+ LocalIntersection *ss_isect,
+ int hit,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ClosureType type,
+ float roughness)
{
#ifdef __SPLIT_KERNEL__
Ray ray_object = ss_isect->ray;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 8e2b0e46a66..6beabebb92f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -139,8 +139,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPTIX__
# undef __BAKING__
# undef __BRANCHED_PATH__
-/* TODO(pmours): Cannot use optixTrace in non-inlined functions */
-# undef __SHADER_RAYTRACE__
#endif /* __KERNEL_OPTIX__ */
#ifdef __KERNEL_OPENCL__
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index f5d10c0ca8a..fdf712293e7 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -274,11 +274,24 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
/* get the volume attenuation over line segment defined by ray, with the
* assumption that there are no surfaces blocking light between the endpoints */
-ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
- Ray *ray,
- float3 *throughput)
+# if defined(__KERNEL_OPTIX__) && defined(__SHADER_RAYTRACE__)
+ccl_device_inline void kernel_volume_shadow(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ float3 *throughput)
+{
+ optixDirectCall<void>(1, kg, shadow_sd, state, ray, throughput);
+}
+extern "C" __device__ void __direct_callable__kernel_volume_shadow(
+# else
+ccl_device_noinline void kernel_volume_shadow(
+# endif
+ KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ float3 *throughput)
{
shader_setup_from_volume(kg, shadow_sd, ray);
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 6c849f5b2fc..000da1fa615 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -217,12 +217,26 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
/* Main Interpreter Loop */
-ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
- ShaderData *sd,
- ccl_addr_space PathState *state,
- ccl_global float *buffer,
- ShaderType type,
- int path_flag)
+#if defined(__KERNEL_OPTIX__) && defined(__SHADER_RAYTRACE__)
+ccl_device_inline void svm_eval_nodes(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ccl_global float *buffer,
+ ShaderType type,
+ int path_flag)
+{
+ optixDirectCall<void>(0, kg, sd, state, buffer, type, path_flag);
+}
+extern "C" __device__ void __direct_callable__svm_eval_nodes(
+#else
+ccl_device_noinline void svm_eval_nodes(
+#endif
+ KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ccl_global float *buffer,
+ ShaderType type,
+ int path_flag)
{
float stack[SVM_STACK_SIZE];
int offset = sd->shader & SHADER_MASK;