Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r--intern/cycles/device/device_optix.cpp83
1 files changed, 72 insertions, 11 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 51e1a0033ba..b008dfa376f 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -193,6 +193,9 @@ class OptiXDevice : public CUDADevice {
device_only_memory<unsigned char> denoiser_state;
int denoiser_input_passes = 0;
+ vector<device_only_memory<char>> delayed_free_bvh_memory;
+ thread_mutex delayed_free_bvh_mutex;
+
public:
OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: CUDADevice(info_, stats_, profiler_, background_),
@@ -258,6 +261,8 @@ class OptiXDevice : public CUDADevice {
// Make CUDA context current
const CUDAContextScope scope(cuContext);
+ free_bvh_memory_delayed();
+
sbt_data.free();
texture_info.free();
launch_params.free();
@@ -362,7 +367,7 @@ class OptiXDevice : public CUDADevice {
}
}
- OptixModuleCompileOptions module_options;
+ OptixModuleCompileOptions module_options = {};
module_options.maxRegisterCount = 0; // Do not set an explicit register limit
# ifdef WITH_CYCLES_DEBUG
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
@@ -377,7 +382,7 @@ class OptiXDevice : public CUDADevice {
module_options.numBoundValues = 0;
# endif
- OptixPipelineCompileOptions pipeline_options;
+ OptixPipelineCompileOptions pipeline_options = {};
// Default to no motion blur and two-level graph, since it is the fastest option
pipeline_options.usesMotionBlur = false;
pipeline_options.traversableGraphFlags =
@@ -477,7 +482,7 @@ class OptiXDevice : public CUDADevice {
# if OPTIX_ABI_VERSION >= 36
if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
- OptixBuiltinISOptions builtin_options;
+ OptixBuiltinISOptions builtin_options = {};
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
builtin_options.usesMotionBlur = false;
@@ -571,7 +576,7 @@ class OptiXDevice : public CUDADevice {
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
# endif
- OptixPipelineLinkOptions link_options;
+ OptixPipelineLinkOptions link_options = {};
link_options.maxTraceDepth = 1;
# ifdef WITH_CYCLES_DEBUG
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
@@ -721,7 +726,11 @@ class OptiXDevice : public CUDADevice {
}
}
else if (task.type == DeviceTask::SHADER) {
- launch_shader_eval(task, thread_index);
+ // CUDA kernels are used when doing baking
+ if (optix_module == NULL)
+ CUDADevice::shader(task);
+ else
+ launch_shader_eval(task, thread_index);
}
else if (task.type == DeviceTask::DENOISE_BUFFER) {
// Set up a single tile that covers the whole task and denoise it
@@ -953,16 +962,23 @@ class OptiXDevice : public CUDADevice {
}
// Create OptiX denoiser handle on demand when it is first used
- OptixDenoiserOptions denoiser_options;
+ OptixDenoiserOptions denoiser_options = {};
assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3);
+# if OPTIX_ABI_VERSION >= 47
+ denoiser_options.guideAlbedo = task.denoising.input_passes >= 2;
+ denoiser_options.guideNormal = task.denoising.input_passes >= 3;
+ check_result_optix_ret(optixDenoiserCreate(
+ context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser));
+# else
denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
-# if OPTIX_ABI_VERSION < 28
+# if OPTIX_ABI_VERSION < 28
denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
-# endif
+# endif
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
+# endif
// OptiX denoiser handle was created with the requested number of input passes
denoiser_input_passes = task.denoising.input_passes;
@@ -1032,10 +1048,34 @@ class OptiXDevice : public CUDADevice {
# endif
output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
+# if OPTIX_ABI_VERSION >= 47
+ OptixDenoiserLayer image_layers = {};
+ image_layers.input = input_layers[0];
+ image_layers.output = output_layers[0];
+
+ OptixDenoiserGuideLayer guide_layers = {};
+ guide_layers.albedo = input_layers[1];
+ guide_layers.normal = input_layers[2];
+# endif
+
// Finally run denonising
OptixDenoiserParams params = {}; // All parameters are disabled/zero
+# if OPTIX_ABI_VERSION >= 47
check_result_optix_ret(optixDenoiserInvoke(denoiser,
- 0,
+ NULL,
+ &params,
+ denoiser_state.device_pointer,
+ scratch_offset,
+ &guide_layers,
+ &image_layers,
+ 1,
+ overlap_offset.x,
+ overlap_offset.y,
+ denoiser_state.device_pointer + scratch_offset,
+ scratch_size));
+# else
+ check_result_optix_ret(optixDenoiserInvoke(denoiser,
+ NULL,
&params,
denoiser_state.device_pointer,
scratch_offset,
@@ -1046,6 +1086,7 @@ class OptiXDevice : public CUDADevice {
output_layers,
denoiser_state.device_pointer + scratch_offset,
scratch_size));
+# endif
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
void *output_args[] = {&input_ptr,
@@ -1157,7 +1198,7 @@ class OptiXDevice : public CUDADevice {
// Compute memory usage
OptixAccelBufferSizes sizes = {};
- OptixAccelBuildOptions options;
+ OptixAccelBuildOptions options = {};
options.operation = operation;
if (background) {
// Prefer best performance and lowest memory consumption in background
@@ -1195,7 +1236,7 @@ class OptiXDevice : public CUDADevice {
}
// Finally build the acceleration structure
- OptixAccelEmitDesc compacted_size_prop;
+ OptixAccelEmitDesc compacted_size_prop = {};
compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
// A tiny space was allocated for this property at the end of the temporary buffer above
// Make sure this pointer is 8-byte aligned
@@ -1265,6 +1306,8 @@ class OptiXDevice : public CUDADevice {
return;
}
+ free_bvh_memory_delayed();
+
BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
progress.set_substatus("Building OptiX acceleration structure");
@@ -1735,6 +1778,24 @@ class OptiXDevice : public CUDADevice {
}
}
+ void release_optix_bvh(BVH *bvh) override
+ {
+ thread_scoped_lock lock(delayed_free_bvh_mutex);
+ /* Do delayed free of BVH memory, since geometry holding BVH might be deleted
+ * while GPU is still rendering. */
+ BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
+
+ delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
+ delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
+ bvh_optix->traversable_handle = 0;
+ }
+
+ void free_bvh_memory_delayed()
+ {
+ thread_scoped_lock lock(delayed_free_bvh_mutex);
+ delayed_free_bvh_memory.free_memory();
+ }
+
void const_copy_to(const char *name, void *host, size_t size) override
{
// Set constant memory for CUDA module