diff options
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 83 |
1 files changed, 72 insertions, 11 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 51e1a0033ba..b008dfa376f 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -193,6 +193,9 @@ class OptiXDevice : public CUDADevice { device_only_memory<unsigned char> denoiser_state; int denoiser_input_passes = 0; + vector<device_only_memory<char>> delayed_free_bvh_memory; + thread_mutex delayed_free_bvh_mutex; + public: OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) : CUDADevice(info_, stats_, profiler_, background_), @@ -258,6 +261,8 @@ class OptiXDevice : public CUDADevice { // Make CUDA context current const CUDAContextScope scope(cuContext); + free_bvh_memory_delayed(); + sbt_data.free(); texture_info.free(); launch_params.free(); @@ -362,7 +367,7 @@ class OptiXDevice : public CUDADevice { } } - OptixModuleCompileOptions module_options; + OptixModuleCompileOptions module_options = {}; module_options.maxRegisterCount = 0; // Do not set an explicit register limit # ifdef WITH_CYCLES_DEBUG module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; @@ -377,7 +382,7 @@ class OptiXDevice : public CUDADevice { module_options.numBoundValues = 0; # endif - OptixPipelineCompileOptions pipeline_options; + OptixPipelineCompileOptions pipeline_options = {}; // Default to no motion blur and two-level graph, since it is the fastest option pipeline_options.usesMotionBlur = false; pipeline_options.traversableGraphFlags = @@ -477,7 +482,7 @@ class OptiXDevice : public CUDADevice { # if OPTIX_ABI_VERSION >= 36 if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { - OptixBuiltinISOptions builtin_options; + OptixBuiltinISOptions builtin_options = {}; builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; builtin_options.usesMotionBlur = false; @@ -571,7 +576,7 @@ class OptiXDevice : public CUDADevice { stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); # endif - OptixPipelineLinkOptions link_options; + OptixPipelineLinkOptions link_options = {}; link_options.maxTraceDepth = 1; # ifdef WITH_CYCLES_DEBUG link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; @@ -721,7 +726,11 @@ class OptiXDevice : public CUDADevice { } } else if (task.type == DeviceTask::SHADER) { - launch_shader_eval(task, thread_index); + // CUDA kernels are used when doing baking + if (optix_module == NULL) + CUDADevice::shader(task); + else + launch_shader_eval(task, thread_index); } else if (task.type == DeviceTask::DENOISE_BUFFER) { // Set up a single tile that covers the whole task and denoise it @@ -953,16 +962,23 @@ class OptiXDevice : public CUDADevice { } // Create OptiX denoiser handle on demand when it is first used - OptixDenoiserOptions denoiser_options; + OptixDenoiserOptions denoiser_options = {}; assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3); +# if OPTIX_ABI_VERSION >= 47 + denoiser_options.guideAlbedo = task.denoising.input_passes >= 2; + denoiser_options.guideNormal = task.denoising.input_passes >= 3; + check_result_optix_ret(optixDenoiserCreate( + context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser)); +# else denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>( OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1)); -# if OPTIX_ABI_VERSION < 28 +# if OPTIX_ABI_VERSION < 28 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; -# endif +# endif check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); +# endif // OptiX denoiser handle was created with the requested number of input passes denoiser_input_passes = task.denoising.input_passes; @@ -1032,10 +1048,34 @@ class OptiXDevice : public CUDADevice { # endif output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3; +# if OPTIX_ABI_VERSION >= 47 + OptixDenoiserLayer image_layers = {}; + image_layers.input = input_layers[0]; + image_layers.output = output_layers[0]; + + OptixDenoiserGuideLayer guide_layers = {}; + guide_layers.albedo = input_layers[1]; + guide_layers.normal = input_layers[2]; +# endif + // Finally run denonising OptixDenoiserParams params = {}; // All parameters are disabled/zero +# if OPTIX_ABI_VERSION >= 47 check_result_optix_ret(optixDenoiserInvoke(denoiser, - 0, + NULL, + ¶ms, + denoiser_state.device_pointer, + scratch_offset, + &guide_layers, + &image_layers, + 1, + overlap_offset.x, + overlap_offset.y, + denoiser_state.device_pointer + scratch_offset, + scratch_size)); +# else + check_result_optix_ret(optixDenoiserInvoke(denoiser, + NULL, ¶ms, denoiser_state.device_pointer, scratch_offset, @@ -1046,6 +1086,7 @@ class OptiXDevice : public CUDADevice { output_layers, denoiser_state.device_pointer + scratch_offset, scratch_size)); +# endif # if OPTIX_DENOISER_NO_PIXEL_STRIDE void *output_args[] = {&input_ptr, @@ -1157,7 +1198,7 @@ class OptiXDevice : public CUDADevice { // Compute memory usage OptixAccelBufferSizes sizes = {}; - OptixAccelBuildOptions options; + OptixAccelBuildOptions options = {}; options.operation = operation; if (background) { // Prefer best performance and lowest memory consumption in background @@ -1195,7 +1236,7 @@ class OptiXDevice : public CUDADevice { } // Finally build the acceleration structure - OptixAccelEmitDesc compacted_size_prop; + OptixAccelEmitDesc compacted_size_prop = {}; compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; // A tiny space was allocated for this property at the end of the temporary buffer above // Make sure this pointer is 8-byte aligned @@ -1265,6 +1306,8 @@ class OptiXDevice : public CUDADevice { return; } + free_bvh_memory_delayed(); + BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh); progress.set_substatus("Building OptiX acceleration structure"); @@ -1735,6 +1778,24 @@ class OptiXDevice : public CUDADevice { } } + void release_optix_bvh(BVH *bvh) override + { + thread_scoped_lock lock(delayed_free_bvh_mutex); + /* Do delayed free of BVH memory, since geometry holding BVH might be deleted + * while GPU is still rendering. */ + BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh); + + delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data)); + delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data)); + bvh_optix->traversable_handle = 0; + } + + void free_bvh_memory_delayed() + { + thread_scoped_lock lock(delayed_free_bvh_mutex); + delayed_free_bvh_memory.free_memory(); + } + void const_copy_to(const char *name, void *host, size_t size) override { // Set constant memory for CUDA module |