Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r--intern/cycles/device/device_cpu.cpp177
1 files changed, 129 insertions, 48 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index fc6febd8cee..8f68e66a1b4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -51,10 +51,12 @@
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
+#include "util/util_task.h"
#include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
@@ -161,7 +163,7 @@ class CPUSplitKernel : public DeviceSplitKernel {
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &);
virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
};
@@ -176,6 +178,10 @@ class CPUDevice : public Device {
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
+#ifdef WITH_OPENIMAGEDENOISE
+ oidn::DeviceRef oidn_device;
+ oidn::FilterRef oidn_filter;
+#endif
bool use_split_kernel;
@@ -332,7 +338,7 @@ class CPUDevice : public Device {
~CPUDevice()
{
- task_pool.stop();
+ task_pool.cancel();
texture_info.free();
}
@@ -344,17 +350,6 @@ class CPUDevice : public Device {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
- if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH4;
- }
- /* MSVC does not support the -march=native switch and you always end up */
- /* with an sse2 kernel when you use WITH_KERNEL_NATIVE. We *cannot* feed */
- /* that kernel BVH8 even if the CPU flags would allow for it. */
-#if (defined(__x86_64__) || defined(_M_X64)) && !(defined(_MSC_VER) && defined(WITH_KERNEL_NATIVE))
- if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH8;
- }
-#endif
#ifdef WITH_EMBREE
bvh_layout_mask |= BVH_LAYOUT_EMBREE;
#endif /* WITH_EMBREE */
@@ -527,26 +522,18 @@ class CPUDevice : public Device {
#endif
}
- void thread_run(DeviceTask *task)
+ void thread_run(DeviceTask &task)
{
- if (task->type == DeviceTask::RENDER)
- thread_render(*task);
- else if (task->type == DeviceTask::SHADER)
- thread_shader(*task);
- else if (task->type == DeviceTask::FILM_CONVERT)
- thread_film_convert(*task);
- else if (task->type == DeviceTask::DENOISE_BUFFER)
- thread_denoise(*task);
+ if (task.type == DeviceTask::RENDER)
+ thread_render(task);
+ else if (task.type == DeviceTask::SHADER)
+ thread_shader(task);
+ else if (task.type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(task);
+ else if (task.type == DeviceTask::DENOISE_BUFFER)
+ thread_denoise(task);
}
- class CPUDeviceTask : public DeviceTask {
- public:
- CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CPUDevice::thread_run, device, this);
- }
- };
-
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
@@ -961,7 +948,71 @@ class CPUDevice : public Device {
}
}
- void denoise(DenoisingTask &denoising, RenderTile &tile)
+ void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
+ {
+#ifdef WITH_OPENIMAGEDENOISE
+ assert(openimagedenoise_supported());
+
+ /* Only one at a time, since OpenImageDenoise itself is multithreaded. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
+ /* Create device and filter, cached for reuse. */
+ if (!oidn_device) {
+ oidn_device = oidn::newDevice();
+ oidn_device.commit();
+ }
+ if (!oidn_filter) {
+ oidn_filter = oidn_device.newFilter("RT");
+ }
+
+ /* Copy pixels from compute device to CPU (no-op for CPU device). */
+ rtile.buffers->buffer.copy_from_device();
+
+ /* Set images with appropriate stride for our interleaved pass storage. */
+ const struct {
+ const char *name;
+ int offset;
+ } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR},
+ {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL},
+ {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO},
+ {"output", 0},
+ { NULL,
+ 0 }};
+
+ for (int i = 0; passes[i].name; i++) {
+ const int64_t offset = rtile.offset + rtile.x + rtile.y * rtile.stride;
+ const int64_t buffer_offset = (offset * task.pass_stride + passes[i].offset) * sizeof(float);
+ const int64_t pixel_stride = task.pass_stride * sizeof(float);
+ const int64_t row_stride = rtile.stride * pixel_stride;
+
+ oidn_filter.setImage(passes[i].name,
+ (char *)rtile.buffer + buffer_offset,
+ oidn::Format::Float3,
+ rtile.w,
+ rtile.h,
+ 0,
+ pixel_stride,
+ row_stride);
+ }
+
+ /* Execute filter. */
+ oidn_filter.set("hdr", true);
+ oidn_filter.set("srgb", false);
+ oidn_filter.commit();
+ oidn_filter.execute();
+
+ /* todo: it may be possible to avoid this copy, but we have to ensure that
+ * when other code copies data from the device it doesn't overwrite the
+ * denoiser buffers. */
+ rtile.buffers->buffer.copy_to_device();
+#else
+ (void)task;
+ (void)rtile;
+#endif
+ }
+
+ void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
{
ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
@@ -1019,15 +1070,14 @@ class CPUDevice : public Device {
}
}
- RenderTile tile;
- DenoisingTask denoising(this, task);
- denoising.profiler = &kg->profiler;
+ DenoisingTask *denoising = NULL;
+ RenderTile tile;
while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
- split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
+ split_kernel->path_trace(task, tile, kgbuffer, void_buffer);
}
else {
render(task, tile, kg);
@@ -1037,7 +1087,16 @@ class CPUDevice : public Device {
render(task, tile, kg);
}
else if (tile.task == RenderTile::DENOISE) {
- denoise(denoising, tile);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else if (task.denoising.type == DENOISER_NLM) {
+ if (denoising == NULL) {
+ denoising = new DenoisingTask(this, task);
+ denoising->profiler = &kg->profiler;
+ }
+ denoise_nlm(*denoising, tile);
+ }
task.update_progress(&tile, tile.w * tile.h);
}
@@ -1055,6 +1114,7 @@ class CPUDevice : public Device {
kg->~KernelGlobals();
kgbuffer.free();
delete split_kernel;
+ delete denoising;
}
void thread_denoise(DeviceTask &task)
@@ -1072,16 +1132,22 @@ class CPUDevice : public Device {
tile.stride = task.stride;
tile.buffers = task.buffers;
- DenoisingTask denoising(this, task);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else {
+ DenoisingTask denoising(this, task);
- ProfilingState denoising_profiler_state;
- profiler.add_state(&denoising_profiler_state);
- denoising.profiler = &denoising_profiler_state;
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
- denoise(denoising, tile);
- task.update_progress(&tile, tile.w * tile.h);
+ denoise_nlm(denoising, tile);
+
+ profiler.remove_state(&denoising_profiler_state);
+ }
- profiler.remove_state(&denoising_profiler_state);
+ task.update_progress(&tile, tile.w * tile.h);
}
void thread_film_convert(DeviceTask &task)
@@ -1155,13 +1221,24 @@ class CPUDevice : public Device {
/* split task into smaller ones */
list<DeviceTask> tasks;
- if (task.type == DeviceTask::SHADER)
+ if (task.type == DeviceTask::DENOISE_BUFFER &&
+ task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ /* Denoise entire buffer at once with OIDN, it has own threading. */
+ tasks.push_back(task);
+ }
+ else if (task.type == DeviceTask::SHADER) {
task.split(tasks, info.cpu_threads, 256);
- else
+ }
+ else {
task.split(tasks, info.cpu_threads);
+ }
- foreach (DeviceTask &task, tasks)
- task_pool.push(new CPUDeviceTask(this, task));
+ foreach (DeviceTask &task, tasks) {
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
+ }
}
void task_wait()
@@ -1326,7 +1403,7 @@ int2 CPUSplitKernel::split_kernel_local_size()
int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
device_memory & /*data*/,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
return make_int2(1, 1);
}
@@ -1358,6 +1435,10 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.has_osl = true;
info.has_half_images = true;
info.has_profiling = true;
+ info.denoisers = DENOISER_NLM;
+ if (openimagedenoise_supported()) {
+ info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+ }
devices.insert(devices.begin(), info);
}