Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2021-12-01 13:54:42 +0300
committerPatrick Mours <pmours@nvidia.com>2021-12-02 14:10:46 +0300
commit17665494186816cebb9e8304199e40f9ee033990 (patch)
treeb42726bfc1b9c86841d4e03d081ad5fd804ca71c
parent7da979c0700fc255569d86ed18d9038470daeb54 (diff)
Fix T92308: OptiX denoising fails with high resolutions
The OptiX denoiser does have an upper limit as to how many pixels it can denoise at once, so this changes the OptiX denoising process to use tiles for high resolution images. The OptiX SDK does have an utility function for this purpose, so changes are minor, adjusting the configured tile size and including enough overlap. Maniphest Tasks: T92308 Differential Revision: https://developer.blender.org/D13436
-rw-r--r--intern/cycles/device/optix/device_impl.cpp61
-rw-r--r--intern/cycles/device/optix/device_impl.h3
2 files changed, 32 insertions, 32 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 7a78504f458..a0c748fb6cd 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -41,6 +41,8 @@
# define __KERNEL_OPTIX__
# include "kernel/device/optix/globals.h"
+# include <optix_denoiser_tiling.h>
+
CCL_NAMESPACE_BEGIN
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
@@ -884,35 +886,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
{
- if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
- denoiser_.configured_size.y == context.buffer_params.height)) {
+ /* Limit maximum tile size denoiser can be invoked with. */
+ const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
+ min(context.buffer_params.height, 4096));
+
+ if (denoiser_.is_configured &&
+ (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
return true;
}
- const BufferParams &buffer_params = context.buffer_params;
-
- OptixDenoiserSizes sizes = {};
optix_assert(optixDenoiserComputeMemoryResources(
- denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
-
- /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
- denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
- denoiser_.scratch_offset = sizes.stateSizeInBytes;
+ denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
/* Allocate denoiser state if tile size has changed since last setup. */
- denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
+ denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
/* Initialize denoiser state for the current tile size. */
const OptixResult result = optixDenoiserSetup(
denoiser_.optix_denoiser,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
on a stream that is not the default stream */
- buffer_params.width,
- buffer_params.height,
+ tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+ tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- denoiser_.state.device_pointer + denoiser_.scratch_offset,
- denoiser_.scratch_size);
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
if (result != OPTIX_SUCCESS) {
set_error("Failed to set up OptiX denoiser");
return false;
@@ -921,8 +921,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
cuda_assert(cuCtxSynchronize());
denoiser_.is_configured = true;
- denoiser_.configured_size.x = buffer_params.width;
- denoiser_.configured_size.y = buffer_params.height;
+ denoiser_.configured_size = tile_size;
return true;
}
@@ -993,18 +992,20 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
guide_layers.albedo = albedo_layer;
guide_layers.normal = normal_layer;
- optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
- denoiser_.queue.stream(),
- &params,
- denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- &guide_layers,
- &image_layers,
- 1,
- 0,
- 0,
- denoiser_.state.device_pointer + denoiser_.scratch_offset,
- denoiser_.scratch_size));
+ optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
+ denoiser_.queue.stream(),
+ &params,
+ denoiser_.state.device_pointer,
+ denoiser_.sizes.stateSizeInBytes,
+ &guide_layers,
+ &image_layers,
+ 1,
+ denoiser_.state.device_pointer +
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes,
+ denoiser_.sizes.overlapWindowSizeInPixels,
+ denoiser_.configured_size.x,
+ denoiser_.configured_size.y));
return true;
}
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 1b43972d99f..cf4afb46280 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -98,8 +98,7 @@ class OptiXDevice : public CUDADevice {
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
* The memory layout goes as following: [denoiser state][scratch buffer]. */
device_only_memory<unsigned char> state;
- size_t scratch_offset = 0;
- size_t scratch_size = 0;
+ OptixDenoiserSizes sizes = {};
bool use_pass_albedo = false;
bool use_pass_normal = false;