diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-02-11 18:30:01 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2020-02-11 20:03:43 +0300 |
commit | 38589de10c098cfe32ac7716f4d7844abf959753 (patch) | |
tree | d28b007bdc75e4eefd1d7ded5115655c50a72140 /intern/cycles/device/device_optix.cpp | |
parent | 35490c3ead03d472dbcba36c85d428e81b442520 (diff) |
Cycles: Add support for denoising in the viewport
The OptiX denoiser can be a great help when rendering in the viewport, since it is really fast
and needs few samples to produce convincing results. This patch therefore adds support for
using any Cycles denoiser in the viewport also (but only the OptiX one is selectable because
the NLM one is too slow to be usable currently). It also adds support for denoising on a
different device than rendering (so one can e.g. render with the CPU but denoise with OptiX).
Reviewed By: #cycles, brecht
Differential Revision: https://developer.blender.org/D6554
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 64 |
1 files changed, 48 insertions, 16 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 98469fb37b0..ac119a723e3 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -213,6 +213,7 @@ class OptiXDevice : public Device { OptixDenoiser denoiser = NULL; vector<pair<int2, CUdeviceptr>> denoiser_state; + int denoiser_input_passes = 0; public: OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) @@ -632,7 +633,7 @@ class OptiXDevice : public Device { if (have_error()) return; // Abort early if there was an error previously - if (task.type == DeviceTask::RENDER) { + if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) { RenderTile tile; while (task.acquire_tile(this, tile)) { if (tile.task == RenderTile::PATH_TRACE) @@ -652,6 +653,22 @@ class OptiXDevice : public Device { else if (task.type == DeviceTask::FILM_CONVERT) { launch_film_convert(task, thread_index); } + else if (task.type == DeviceTask::DENOISE_BUFFER) { + // Set up a single tile that covers the whole task and denoise it + RenderTile tile; + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + launch_denoise(task, tile, thread_index); + } } void launch_render(DeviceTask &task, RenderTile &rtile, int thread_index) @@ -740,6 +757,7 @@ class OptiXDevice : public Device { RenderTile rtiles[10]; rtiles[4] = rtile; task.map_neighbor_tiles(rtiles, this); + rtile = rtiles[4]; // Tile may have been modified by mapping code // Calculate size of the tile to denoise (including overlap) int4 rect = make_int4( @@ -846,7 +864,14 @@ class OptiXDevice : public Device { } # endif - if (denoiser == NULL) { + const bool recreate_denoiser = (denoiser == NULL) || + (task.denoising.optix_input_passes != denoiser_input_passes); + if (recreate_denoiser) { + // Destroy existing handle before creating new one + if (denoiser != NULL) { + optixDenoiserDestroy(denoiser); + } + // Create OptiX denoiser handle on demand when it is first used OptixDenoiserOptions denoiser_options; assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); @@ -856,6 +881,9 @@ class OptiXDevice : public Device { check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); + + // OptiX denoiser handle was created with the requested number of input passes + denoiser_input_passes = task.denoising.optix_input_passes; } OptixDenoiserSizes sizes = {}; @@ -868,13 +896,16 @@ class OptiXDevice : public Device { const size_t scratch_offset = sizes.stateSizeInBytes; // Allocate denoiser state if tile size has changed since last setup - if (state_size.x != rect_size.x || state_size.y != rect_size.y) { + if (state_size.x != rect_size.x || state_size.y != rect_size.y || recreate_denoiser) { + // Free existing state before allocating new one if (state) { cuMemFree(state); state = 0; } + check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size)); + // Initialize denoiser state for the current tile size check_result_optix_ret(optixDenoiserSetup(denoiser, cuda_stream[thread_index], rect_size.x, @@ -1972,17 +2003,17 @@ class OptiXDevice : public Device { else if (mem.type == MEM_TEXTURE) { assert(!"mem_copy_from not supported for textures."); } - else { + else if (mem.host_pointer) { // Calculate linear memory offset and size const size_t size = elem * w * h; const size_t offset = elem * y * w; - if (mem.host_pointer && mem.device_pointer) { + if (mem.device_pointer) { const CUDAContextScope scope(cuda_context); check_result_cuda(cuMemcpyDtoH( (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size)); } - else if (mem.host_pointer) { + else { memset((char *)mem.host_pointer + offset, 0, size); } } @@ -1990,21 +2021,22 @@ class OptiXDevice : public Device { void mem_zero(device_memory &mem) override { - if (mem.host_pointer) - memset(mem.host_pointer, 0, mem.memory_size()); - - if (!mem.device_pointer) + if (!mem.device_pointer) { mem_alloc(mem); // Need to allocate memory first if it does not exist yet + } + if (!mem.device_pointer) { + return; + } - /* If use_mapped_host of mem is false, mem.device_pointer currently - * refers to device memory regardless of mem.host_pointer and - * mem.shared_pointer. */ - - if (mem.device_pointer && - (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { + /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory + * regardless of mem.host_pointer and mem.shared_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { const CUDAContextScope scope(cuda_context); check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); } + else if (mem.host_pointer) { + memset(mem.host_pointer, 0, mem.memory_size()); + } } void mem_free(device_memory &mem) override |