Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2020-06-01 01:11:17 +0300
committerBrecht Van Lommel <brecht@blender.org>2020-06-24 16:17:36 +0300
commit669befdfbe487f76c65f54e3da0013d140d56893 (patch)
tree9aa766cca084b9d65fbc694f5fa0a08cbcbcce70 /intern/cycles/device/device_cpu.cpp
parent0a3bde63006c66b8b8531ed5eccca9bdf5e5dc20 (diff)
Cycles: add Intel OpenImageDenoise support for viewport denoising
Compared to Optix denoise, this is usually slower since there is no GPU acceleration. Some optimizations may still be possible, in avoid copies to the GPU and/or denoising less often. The main thing is that this adds viewport denoising support for computers without an NVIDIA GPU (as long as the CPU supports SSE 4.1, which is nearly all of them). Ref T76259
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r--intern/cycles/device/device_cpu.cpp108
1 files changed, 98 insertions, 10 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 1f760a15530..8f68e66a1b4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -51,6 +51,7 @@
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
@@ -177,6 +178,10 @@ class CPUDevice : public Device {
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
+#ifdef WITH_OPENIMAGEDENOISE
+ oidn::DeviceRef oidn_device;
+ oidn::FilterRef oidn_filter;
+#endif
bool use_split_kernel;
@@ -943,6 +948,70 @@ class CPUDevice : public Device {
}
}
+ void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
+ {
+#ifdef WITH_OPENIMAGEDENOISE
+ assert(openimagedenoise_supported());
+
+ /* Only one at a time, since OpenImageDenoise itself is multithreaded. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
+ /* Create device and filter, cached for reuse. */
+ if (!oidn_device) {
+ oidn_device = oidn::newDevice();
+ oidn_device.commit();
+ }
+ if (!oidn_filter) {
+ oidn_filter = oidn_device.newFilter("RT");
+ }
+
+ /* Copy pixels from compute device to CPU (no-op for CPU device). */
+ rtile.buffers->buffer.copy_from_device();
+
+ /* Set images with appropriate stride for our interleaved pass storage. */
+ const struct {
+ const char *name;
+ int offset;
+ } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR},
+ {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL},
+ {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO},
+ {"output", 0},
+ { NULL,
+ 0 }};
+
+ for (int i = 0; passes[i].name; i++) {
+ const int64_t offset = rtile.offset + rtile.x + rtile.y * rtile.stride;
+ const int64_t buffer_offset = (offset * task.pass_stride + passes[i].offset) * sizeof(float);
+ const int64_t pixel_stride = task.pass_stride * sizeof(float);
+ const int64_t row_stride = rtile.stride * pixel_stride;
+
+ oidn_filter.setImage(passes[i].name,
+ (char *)rtile.buffer + buffer_offset,
+ oidn::Format::Float3,
+ rtile.w,
+ rtile.h,
+ 0,
+ pixel_stride,
+ row_stride);
+ }
+
+ /* Execute filter. */
+ oidn_filter.set("hdr", true);
+ oidn_filter.set("srgb", false);
+ oidn_filter.commit();
+ oidn_filter.execute();
+
+ /* todo: it may be possible to avoid this copy, but we have to ensure that
+ * when other code copies data from the device it doesn't overwrite the
+ * denoiser buffers. */
+ rtile.buffers->buffer.copy_to_device();
+#else
+ (void)task;
+ (void)rtile;
+#endif
+ }
+
void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
{
ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
@@ -1018,7 +1087,10 @@ class CPUDevice : public Device {
render(task, tile, kg);
}
else if (tile.task == RenderTile::DENOISE) {
- if (task.denoising.type == DENOISER_NLM) {
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else if (task.denoising.type == DENOISER_NLM) {
if (denoising == NULL) {
denoising = new DenoisingTask(this, task);
denoising->profiler = &kg->profiler;
@@ -1060,16 +1132,22 @@ class CPUDevice : public Device {
tile.stride = task.stride;
tile.buffers = task.buffers;
- DenoisingTask denoising(this, task);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else {
+ DenoisingTask denoising(this, task);
- ProfilingState denoising_profiler_state;
- profiler.add_state(&denoising_profiler_state);
- denoising.profiler = &denoising_profiler_state;
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
- denoise_nlm(denoising, tile);
- task.update_progress(&tile, tile.w * tile.h);
+ denoise_nlm(denoising, tile);
- profiler.remove_state(&denoising_profiler_state);
+ profiler.remove_state(&denoising_profiler_state);
+ }
+
+ task.update_progress(&tile, tile.w * tile.h);
}
void thread_film_convert(DeviceTask &task)
@@ -1143,10 +1221,17 @@ class CPUDevice : public Device {
/* split task into smaller ones */
list<DeviceTask> tasks;
- if (task.type == DeviceTask::SHADER)
+ if (task.type == DeviceTask::DENOISE_BUFFER &&
+ task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ /* Denoise entire buffer at once with OIDN, it has own threading. */
+ tasks.push_back(task);
+ }
+ else if (task.type == DeviceTask::SHADER) {
task.split(tasks, info.cpu_threads, 256);
- else
+ }
+ else {
task.split(tasks, info.cpu_threads);
+ }
foreach (DeviceTask &task, tasks) {
task_pool.push([=] {
@@ -1351,6 +1436,9 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.has_half_images = true;
info.has_profiling = true;
info.denoisers = DENOISER_NLM;
+ if (openimagedenoise_supported()) {
+ info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+ }
devices.insert(devices.begin(), info);
}