Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/blender/addon/properties.py44
-rw-r--r--intern/cycles/blender/addon/ui.py32
-rw-r--r--intern/cycles/blender/blender_camera.cpp9
-rw-r--r--intern/cycles/blender/blender_device.cpp53
-rw-r--r--intern/cycles/blender/blender_session.cpp35
-rw-r--r--intern/cycles/blender/blender_sync.cpp18
-rw-r--r--intern/cycles/blender/blender_sync.h3
-rw-r--r--intern/cycles/blender/blender_viewport.cpp16
-rw-r--r--intern/cycles/blender/blender_viewport.h4
-rw-r--r--intern/cycles/device/device.cpp24
-rw-r--r--intern/cycles/device/device.h1
-rw-r--r--intern/cycles/device/device_cpu.cpp36
-rw-r--r--intern/cycles/device/device_cuda.cpp53
-rw-r--r--intern/cycles/device/device_memory.h5
-rw-r--r--intern/cycles/device/device_multi.cpp136
-rw-r--r--intern/cycles/device/device_optix.cpp64
-rw-r--r--intern/cycles/device/device_task.cpp6
-rw-r--r--intern/cycles/device/device_task.h9
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp32
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu6
-rw-r--r--intern/cycles/render/buffers.cpp2
-rw-r--r--intern/cycles/render/session.cpp246
-rw-r--r--intern/cycles/render/session.h11
-rw-r--r--intern/cycles/render/tile.cpp131
-rw-r--r--intern/cycles/render/tile.h10
25 files changed, 729 insertions, 257 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 5f163c2510b..eafe37618b3 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -197,7 +197,12 @@ enum_aov_types = (
('COLOR', "Color", "Write a Color pass", 1),
)
-enum_denoising_optix_input_passes= (
+enum_viewport_denoising = (
+ ('NONE', "None", "Disable viewport denoising", 0),
+ ('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1),
+)
+
+enum_denoising_optix_input_passes = (
('RGB', "Color", "Use only color as input", 1),
('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
@@ -229,6 +234,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PATH',
)
+ preview_pause: BoolProperty(
+ name="Pause Preview",
+ description="Pause all viewport preview renders",
+ default=False,
+ )
+ preview_denoising: EnumProperty(
+ name="Viewport Denoising",
+ description="Denoise the image after each preview update with the selected denoiser engine",
+ items=enum_viewport_denoising,
+ default='NONE',
+ )
+
use_square_samples: BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
@@ -247,11 +264,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=(1 << 24),
default=32,
)
- preview_pause: BoolProperty(
- name="Pause Preview",
- description="Pause all viewport preview renders",
- default=False,
- )
aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render for each pixel",
@@ -264,6 +276,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=2097151,
default=32,
)
+
diffuse_samples: IntProperty(
name="Diffuse Samples",
description="Number of diffuse bounce samples to render for each AA sample",
@@ -294,14 +307,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=1, max=1024,
default=1,
)
-
subsurface_samples: IntProperty(
name="Subsurface Samples",
description="Number of subsurface scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
-
volume_samples: IntProperty(
name="Volume Samples",
description="Number of volume scattering samples to render for each AA sample",
@@ -1305,12 +1316,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
- use_optix_denoising: BoolProperty(
- name="Use OptiX AI Denoising",
- description="Denoise the rendered image with the OptiX AI denoiser",
- default=False,
- update=update_render_passes,
- )
denoising_diffuse_direct: BoolProperty(
name="Diffuse Direct",
description="Denoise the direct diffuse lighting",
@@ -1387,11 +1392,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
min=0, max=7,
default=0,
)
+
+ use_optix_denoising: BoolProperty(
+ name="OptiX AI-Accelerated",
+ description="Use the OptiX denoiser to denoise the rendered image",
+ default=False,
+ update=update_render_passes,
+ )
denoising_optix_input_passes: EnumProperty(
name="Input Passes",
- description="Controls which passes the OptiX AI denoiser should use as input, which can have different effects on the denoised image",
+ description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)",
items=enum_denoising_optix_input_passes,
- default='RGB',
+ default='RGB_ALBEDO',
)
use_pass_crypto_object: BoolProperty(
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 35d5d3801d2..f23d141e3da 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -112,6 +112,10 @@ def show_device_active(context):
return True
return context.preferences.addons[__package__].preferences.has_active_device()
+def show_optix_denoising(context):
+ # OptiX AI denoiser can be used when at least one device supports OptiX
+ return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'))
+
def draw_samples_info(layout, context):
cscene = context.scene.cycles
@@ -177,17 +181,23 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
if not use_optix(context):
layout.prop(cscene, "progressive")
- if cscene.progressive == 'PATH' or use_branched_path(context) is False:
+ if not use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "samples", text="Render")
col.prop(cscene, "preview_samples", text="Viewport")
-
- draw_samples_info(layout, context)
else:
col = layout.column(align=True)
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
+ # Viewport denoising is currently only supported with OptiX
+ if show_optix_denoising(context):
+ col = layout.column()
+ col.prop(cscene, "preview_denoising")
+
+ if not use_branched_path(context):
+ draw_samples_info(layout, context)
+
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
@@ -195,9 +205,7 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
- scene = context.scene
- cscene = scene.cycles
- return cscene.progressive != 'PATH' and use_branched_path(context)
+ return use_branched_path(context)
def draw(self, context):
layout = self.layout
@@ -635,9 +643,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = not rd.use_save_buffers
- for view_layer in scene.view_layers:
- if view_layer.cycles.use_denoising:
- sub.active = False
sub.prop(cscene, "use_progressive_refine")
@@ -981,15 +986,14 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
col = split.column(align=True)
- if use_optix(context):
- col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising")
+ if show_optix_denoising(context):
+ col.prop(cycles_view_layer, "use_optix_denoising")
+ col.separator(factor=2.0)
if cycles_view_layer.use_optix_denoising:
col.prop(cycles_view_layer, "denoising_optix_input_passes")
return
- col.separator(factor=2.0)
-
col.prop(cycles_view_layer, "denoising_radius", text="Radius")
col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
@@ -2192,8 +2196,6 @@ def draw_device(self, context):
col = layout.column()
col.prop(cscene, "feature_set")
- scene = context.scene
-
col = layout.column()
col.active = show_device_active(context)
col.prop(cscene, "device")
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index c84d6e1572b..990061dd9f1 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -863,7 +863,8 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
}
}
-BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
+BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene,
+ BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
@@ -899,7 +900,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
params.height = height;
}
- update_viewport_display_passes(b_v3d, params.passes);
+ PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
+
+ /* Can only denoise the combined image pass */
+ params.denoising_data_pass = display_pass == PASS_COMBINED &&
+ update_viewport_display_denoising(b_v3d, b_scene);
return params;
}
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
index 111fc8d5192..c3c307318a8 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -19,6 +19,22 @@
CCL_NAMESPACE_BEGIN
+enum DenoiserType {
+ DENOISER_NONE = 0,
+ DENOISER_OPTIX = 1,
+
+ DENOISER_NUM
+};
+
+enum ComputeDevice {
+ COMPUTE_DEVICE_CPU = 0,
+ COMPUTE_DEVICE_CUDA = 1,
+ COMPUTE_DEVICE_OPENCL = 2,
+ COMPUTE_DEVICE_OPTIX = 3,
+
+ COMPUTE_DEVICE_NUM
+};
+
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
@@ -40,7 +56,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
- device = devices.front();
+ return devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
@@ -57,14 +73,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
/* Test if we are using GPU devices. */
- enum ComputeDevice {
- COMPUTE_DEVICE_CPU = 0,
- COMPUTE_DEVICE_CUDA = 1,
- COMPUTE_DEVICE_OPENCL = 2,
- COMPUTE_DEVICE_OPTIX = 3,
- COMPUTE_DEVICE_NUM = 4,
- };
-
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
@@ -106,6 +114,33 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
}
+ /* Ensure there is an OptiX device when using the OptiX denoiser. */
+ bool use_optix_denoising = DENOISER_OPTIX ==
+ get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE);
+ BL::Scene::view_layers_iterator b_view_layer;
+ for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
+ ++b_view_layer) {
+ PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
+ if (get_boolean(crl, "use_optix_denoising")) {
+ use_optix_denoising = true;
+ }
+ }
+
+ if (use_optix_denoising && device.type != DEVICE_OPTIX) {
+ vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
+ if (!optix_devices.empty()) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (device.multi_devices.empty()) {
+ device.multi_devices.push_back(device);
+ }
+
+ /* Simply use the first available OptiX device. */
+ const DeviceInfo optix_device = optix_devices.front();
+ device.id += optix_device.id; /* Uniquely identify this special multi device. */
+ device.denoising_devices.push_back(optix_device);
+ }
+ }
+
return device;
}
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 1490348743e..2e7a72d8072 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -166,7 +166,7 @@ void BlenderSession::create_session()
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -244,7 +244,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+ b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -460,7 +460,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
/* render each layer */
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@@ -706,7 +706,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
@@ -851,7 +851,6 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
- return;
}
/* increase samples, but never decrease */
@@ -886,10 +885,28 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
else
sync->sync_camera(b_render, b_camera_override, width, height, "");
+ /* get buffer parameters */
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+ if (session_params.device.type != DEVICE_OPTIX &&
+ session_params.device.denoising_devices.empty()) {
+ /* cannot use OptiX denoising when it is not supported by the device. */
+ buffer_params.denoising_data_pass = false;
+ }
+ else {
+ session->set_denoising(buffer_params.denoising_data_pass, true);
+ }
+
+ if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) {
+ scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+
+ /* Force a scene and session reset below. */
+ scene->film->tag_update(scene);
+ }
+
/* reset if needed */
if (scene->need_reset()) {
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
/* After session reset, so device is not accessing image data anymore. */
@@ -956,7 +973,7 @@ bool BlenderSession::draw(int w, int h)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@@ -974,7 +991,7 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 8960c84567e..0412654d3bd 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -846,20 +846,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* progressive refine */
BL::RenderSettings b_r = b_scene.render();
- params.progressive_refine = (b_engine.is_preview() ||
- get_boolean(cscene, "use_progressive_refine")) &&
- !b_r.use_save_buffers();
-
- if (params.progressive_refine) {
- BL::Scene::view_layers_iterator b_view_layer;
- for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
- ++b_view_layer) {
- PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
- if (get_boolean(crl, "use_denoising")) {
- params.progressive_refine = false;
- }
- }
- }
+ params.progressive_refine = b_engine.is_preview() ||
+ get_boolean(cscene, "use_progressive_refine");
+ if (b_r.use_save_buffers())
+ params.progressive_refine = false;
if (background) {
if (params.progressive_refine)
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index f8134ff8b5c..efd4511eb1e 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -95,7 +95,8 @@ class BlenderSync {
BL::Scene &b_scene,
bool background);
static bool get_session_pause(BL::Scene &b_scene, bool background);
- static BufferParams get_buffer_params(BL::RenderSettings &b_render,
+ static BufferParams get_buffer_params(BL::Scene &b_scene,
+ BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp
index 73ef5f94720..93e84e28032 100644
--- a/intern/cycles/blender/blender_viewport.cpp
+++ b/intern/cycles/blender/blender_viewport.cpp
@@ -61,6 +61,17 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const
return !(use_scene_world && use_scene_lights);
}
+bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d,
+ BL::Scene &b_scene)
+{
+ bool use_denoising = false;
+ if (b_v3d) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ use_denoising = get_enum(cscene, "preview_denoising") != 0;
+ }
+ return use_denoising;
+}
+
PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d)
{
PassType display_pass = PASS_NONE;
@@ -72,6 +83,11 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi
return display_pass;
}
+bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene)
+{
+ return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene);
+}
+
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
{
if (b_v3d) {
diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h
index f26d0d38115..d2b55358c27 100644
--- a/intern/cycles/blender/blender_viewport.h
+++ b/intern/cycles/blender/blender_viewport.h
@@ -44,11 +44,15 @@ class BlenderViewportParameters {
friend class BlenderSync;
public:
+ /* Get whether to enable denoising data pass in viewport. */
+ static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
/* Retrieve the render pass that needs to be displayed on the given `SpaceView3D`
* When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */
static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d);
};
+bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
+
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 76670351734..c0d02e0605f 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -366,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba,
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
+#ifdef WITH_MULTI
+ if (!info.multi_devices.empty()) {
+ /* Always create a multi device when info contains multiple devices.
+ * This is done so that the type can still be e.g. DEVICE_CPU to indicate
+ * that it is a homogeneous collection of devices, which simplifies checks. */
+ return device_multi_create(info, stats, profiler, background);
+ }
+#endif
+
Device *device;
switch (info.type) {
@@ -388,11 +397,6 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
device = NULL;
break;
#endif
-#ifdef WITH_MULTI
- case DEVICE_MULTI:
- device = device_multi_create(info, stats, profiler, background);
- break;
-#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
@@ -586,7 +590,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
DeviceInfo info;
- info.type = DEVICE_MULTI;
+ info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
@@ -624,6 +628,14 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.multi_devices.push_back(device);
}
+ /* Create unique ID for this combination of devices. */
+ info.id += device.id;
+
+ /* Set device type to MULTI if subdevices are not of a common type. */
+ if (device.type != info.type) {
+ info.type = DEVICE_MULTI;
+ }
+
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 66fcac921d3..2aeed3f0e89 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -83,6 +83,7 @@ class DeviceInfo {
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
+ vector<DeviceInfo> denoising_devices;
DeviceInfo()
{
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c2843a61e6d..42ebf3a8399 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -508,13 +508,14 @@ class CPUDevice : public Device {
void thread_run(DeviceTask *task)
{
- if (task->type == DeviceTask::RENDER) {
+ if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE)
thread_render(*task);
- }
- else if (task->type == DeviceTask::FILM_CONVERT)
- thread_film_convert(*task);
else if (task->type == DeviceTask::SHADER)
thread_shader(*task);
+ else if (task->type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(*task);
+ else if (task->type == DeviceTask::DENOISE_BUFFER)
+ thread_denoise(*task);
}
class CPUDeviceTask : public DeviceTask {
@@ -954,6 +955,33 @@ class CPUDevice : public Device {
delete split_kernel;
}
+ void thread_denoise(DeviceTask &task)
+ {
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
+
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
+
+ denoise(denoising, tile);
+ task.update_progress(&tile, tile.w * tile.h);
+
+ profiler.remove_state(&denoising_profiler_state);
+ }
+
void thread_film_convert(DeviceTask &task)
{
float sample_scale = 1.0f / (task.sample + 1);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index dfd80d678fd..e3c737cc2e7 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -994,16 +994,16 @@ class CUDADevice : public Device {
else if (mem.type == MEM_TEXTURE) {
assert(!"mem_copy_from not supported for textures.");
}
- else {
- CUDAContextScope scope(this);
- size_t offset = elem * y * w;
- size_t size = elem * w * h;
+ else if (mem.host_pointer) {
+ const size_t size = elem * w * h;
+ const size_t offset = elem * y * w;
- if (mem.host_pointer && mem.device_pointer) {
+ if (mem.device_pointer) {
+ const CUDAContextScope scope(this);
cuda_assert(cuMemcpyDtoH(
- (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
+ (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
}
- else if (mem.host_pointer) {
+ else {
memset((char *)mem.host_pointer + offset, 0, size);
}
}
@@ -1014,20 +1014,19 @@ class CUDADevice : public Device {
if (!mem.device_pointer) {
mem_alloc(mem);
}
-
- if (mem.host_pointer) {
- memset(mem.host_pointer, 0, mem.memory_size());
+ if (!mem.device_pointer) {
+ return;
}
- /* If use_mapped_host of mem is false, mem.device_pointer currently
- * refers to device memory regardless of mem.host_pointer and
- * mem.shared_pointer. */
-
- if (mem.device_pointer &&
- (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
- CUDAContextScope scope(this);
+ /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
+ * regardless of mem.host_pointer and mem.shared_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+ const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
}
+ else if (mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
+ }
}
void mem_free(device_memory &mem)
@@ -2240,7 +2239,7 @@ class CUDADevice : public Device {
{
CUDAContextScope scope(this);
- if (task->type == DeviceTask::RENDER) {
+ if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
DeviceRequestedFeatures requested_features;
if (use_split_kernel()) {
if (split_kernel == NULL) {
@@ -2288,6 +2287,24 @@ class CUDADevice : public Device {
cuda_assert(cuCtxSynchronize());
}
+ else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task->x;
+ tile.y = task->y;
+ tile.w = task->w;
+ tile.h = task->h;
+ tile.buffer = task->buffer;
+ tile.sample = task->sample + task->num_samples;
+ tile.num_samples = task->num_samples;
+ tile.start_sample = task->sample;
+ tile.offset = task->offset;
+ tile.stride = task->stride;
+ tile.buffers = task->buffers;
+
+ DenoisingTask denoising(this, *task);
+ denoise(tile, denoising);
+ task->update_progress(&tile, tile.w * tile.h);
+ }
}
class CUDADeviceTask : public DeviceTask {
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 60740807568..2949773ef0c 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -427,6 +427,11 @@ template<typename T> class device_vector : public device_memory {
device_copy_to();
}
+ void copy_from_device()
+ {
+ device_copy_from(0, data_width, data_height, sizeof(T));
+ }
+
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index b8587eb0a62..9cbf69a191a 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -42,7 +42,7 @@ class MultiDevice : public Device {
map<device_ptr, device_ptr> ptr_map;
};
- list<SubDevice> devices;
+ list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
@@ -61,6 +61,12 @@ class MultiDevice : public Device {
}
}
+ foreach (DeviceInfo &subinfo, info.denoising_devices) {
+ Device *device = Device::create(subinfo, sub_stats_, profiler, background);
+
+ denoising_devices.push_back(SubDevice(device));
+ }
+
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
@@ -80,17 +86,18 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
delete sub.device;
+ foreach (SubDevice &sub, denoising_devices)
+ delete sub.device;
}
const string &error_message()
{
- foreach (SubDevice &sub, devices) {
- if (sub.device->error_message() != "") {
- if (error_msg == "")
- error_msg = sub.device->error_message();
- break;
- }
- }
+ error_msg.clear();
+
+ foreach (SubDevice &sub, devices)
+ error_msg += sub.device->error_message();
+ foreach (SubDevice &sub, denoising_devices)
+ error_msg += sub.device->error_message();
return error_msg;
}
@@ -118,6 +125,12 @@ class MultiDevice : public Device {
if (!sub.device->load_kernels(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->load_kernels(requested_features))
+ return false;
+ }
+
return true;
}
@@ -127,6 +140,12 @@ class MultiDevice : public Device {
if (!sub.device->wait_for_availability(requested_features))
return false;
+ if (requested_features.use_denoising) {
+ foreach (SubDevice &sub, denoising_devices)
+ if (!sub.device->wait_for_availability(requested_features))
+ return false;
+ }
+
return true;
}
@@ -150,16 +169,17 @@ class MultiDevice : public Device {
break;
}
}
+
return result;
}
bool build_optix_bvh(BVH *bvh)
{
- // Broadcast acceleration structure build to all devices
- foreach (SubDevice &sub, devices) {
+ // Broadcast acceleration structure build to all render devices
+ foreach (SubDevice &sub, devices)
if (!sub.device->build_optix_bvh(bvh))
return false;
- }
+
return true;
}
@@ -236,6 +256,17 @@ class MultiDevice : public Device {
sub.ptr_map[key] = mem.device_pointer;
}
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ foreach (SubDevice &sub, denoising_devices) {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
+
+ sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
+ }
+
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
@@ -255,6 +286,17 @@ class MultiDevice : public Device {
sub.ptr_map.erase(sub.ptr_map.find(key));
}
+ if (strcmp(mem.name, "RenderBuffers") == 0) {
+ foreach (SubDevice &sub, denoising_devices) {
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+ mem.device_size = existing_size;
+
+ sub.device->mem_free(mem);
+ sub.ptr_map.erase(sub.ptr_map.find(key));
+ }
+ }
+
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
@@ -302,10 +344,21 @@ class MultiDevice : public Device {
void map_tile(Device *sub_device, RenderTile &tile)
{
+ if (!tile.buffer) {
+ return;
+ }
+
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
- if (tile.buffer)
- tile.buffer = sub.ptr_map[tile.buffer];
+ tile.buffer = sub.ptr_map[tile.buffer];
+ return;
+ }
+ }
+
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device) {
+ tile.buffer = sub.ptr_map[tile.buffer];
+ return;
}
}
}
@@ -320,6 +373,12 @@ class MultiDevice : public Device {
i++;
}
+ foreach (SubDevice &sub, denoising_devices) {
+ if (sub.device == sub_device)
+ return i;
+ i++;
+ }
+
return -1;
}
@@ -330,11 +389,20 @@ class MultiDevice : public Device {
continue;
}
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+ tiles[i].buffer = mem.device_pointer;
+
+ if (mem.device == this && denoising_devices.empty()) {
+ /* Skip unnecessary copies in viewport mode (buffer covers the
+ * whole image), but still need to fix up the tile evice pointer. */
+ map_tile(sub_device, tiles[i]);
+ continue;
+ }
+
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
- device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
@@ -342,12 +410,20 @@ class MultiDevice : public Device {
* overwriting the buffer being denoised by the CPU thread. */
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
- mem.copy_from_device(0, mem.data_size, 1);
+ mem.copy_from_device();
}
- mem.swap_device(sub_device, 0, 0);
+ if (mem.device == this) {
+ /* Can re-use memory if tile is already allocated on the sub device. */
+ map_tile(sub_device, tiles[i]);
+ mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
+ }
+ else {
+ mem.swap_device(sub_device, 0, 0);
+ }
mem.copy_to_device();
+
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
@@ -358,11 +434,17 @@ class MultiDevice : public Device {
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
- /* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
+
+ if (mem.device == this && denoising_devices.empty()) {
+ return;
+ }
+
+ /* Copy denoised result back to the host. */
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
- mem.copy_from_device(0, mem.data_size, 1);
+ mem.copy_from_device();
mem.restore_device();
+
/* Copy denoised result to the original device. */
mem.copy_to_device();
@@ -372,7 +454,9 @@ class MultiDevice : public Device {
}
device_vector<float> &mem = tiles[i].buffers->buffer;
- if (mem.device != sub_device) {
+
+ if (mem.device != sub_device && mem.device != this) {
+ /* Free up memory again if it was allocated for the copy above. */
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
@@ -398,10 +482,16 @@ class MultiDevice : public Device {
void task_add(DeviceTask &task)
{
+ list<SubDevice> &task_devices = denoising_devices.empty() ||
+ (task.type != DeviceTask::DENOISE &&
+ task.type != DeviceTask::DENOISE_BUFFER) ?
+ devices :
+ denoising_devices;
+
list<DeviceTask> tasks;
- task.split(tasks, devices.size());
+ task.split(tasks, task_devices.size());
- foreach (SubDevice &sub, devices) {
+ foreach (SubDevice &sub, task_devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
@@ -426,12 +516,16 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_wait();
}
void task_cancel()
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
+ foreach (SubDevice &sub, denoising_devices)
+ sub.device->task_cancel();
}
protected:
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 98469fb37b0..ac119a723e3 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -213,6 +213,7 @@ class OptiXDevice : public Device {
OptixDenoiser denoiser = NULL;
vector<pair<int2, CUdeviceptr>> denoiser_state;
+ int denoiser_input_passes = 0;
public:
OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
@@ -632,7 +633,7 @@ class OptiXDevice : public Device {
if (have_error())
return; // Abort early if there was an error previously
- if (task.type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) {
RenderTile tile;
while (task.acquire_tile(this, tile)) {
if (tile.task == RenderTile::PATH_TRACE)
@@ -652,6 +653,22 @@ class OptiXDevice : public Device {
else if (task.type == DeviceTask::FILM_CONVERT) {
launch_film_convert(task, thread_index);
}
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
+ // Set up a single tile that covers the whole task and denoise it
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ launch_denoise(task, tile, thread_index);
+ }
}
void launch_render(DeviceTask &task, RenderTile &rtile, int thread_index)
@@ -740,6 +757,7 @@ class OptiXDevice : public Device {
RenderTile rtiles[10];
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
+ rtile = rtiles[4]; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
int4 rect = make_int4(
@@ -846,7 +864,14 @@ class OptiXDevice : public Device {
}
# endif
- if (denoiser == NULL) {
+ const bool recreate_denoiser = (denoiser == NULL) ||
+ (task.denoising.optix_input_passes != denoiser_input_passes);
+ if (recreate_denoiser) {
+ // Destroy existing handle before creating new one
+ if (denoiser != NULL) {
+ optixDenoiserDestroy(denoiser);
+ }
+
// Create OptiX denoiser handle on demand when it is first used
OptixDenoiserOptions denoiser_options;
assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
@@ -856,6 +881,9 @@ class OptiXDevice : public Device {
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
+
+ // OptiX denoiser handle was created with the requested number of input passes
+ denoiser_input_passes = task.denoising.optix_input_passes;
}
OptixDenoiserSizes sizes = {};
@@ -868,13 +896,16 @@ class OptiXDevice : public Device {
const size_t scratch_offset = sizes.stateSizeInBytes;
// Allocate denoiser state if tile size has changed since last setup
- if (state_size.x != rect_size.x || state_size.y != rect_size.y) {
+ if (state_size.x != rect_size.x || state_size.y != rect_size.y || recreate_denoiser) {
+ // Free existing state before allocating new one
if (state) {
cuMemFree(state);
state = 0;
}
+
check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size));
+ // Initialize denoiser state for the current tile size
check_result_optix_ret(optixDenoiserSetup(denoiser,
cuda_stream[thread_index],
rect_size.x,
@@ -1972,17 +2003,17 @@ class OptiXDevice : public Device {
else if (mem.type == MEM_TEXTURE) {
assert(!"mem_copy_from not supported for textures.");
}
- else {
+ else if (mem.host_pointer) {
// Calculate linear memory offset and size
const size_t size = elem * w * h;
const size_t offset = elem * y * w;
- if (mem.host_pointer && mem.device_pointer) {
+ if (mem.device_pointer) {
const CUDAContextScope scope(cuda_context);
check_result_cuda(cuMemcpyDtoH(
(char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
}
- else if (mem.host_pointer) {
+ else {
memset((char *)mem.host_pointer + offset, 0, size);
}
}
@@ -1990,21 +2021,22 @@ class OptiXDevice : public Device {
void mem_zero(device_memory &mem) override
{
- if (mem.host_pointer)
- memset(mem.host_pointer, 0, mem.memory_size());
-
- if (!mem.device_pointer)
+ if (!mem.device_pointer) {
mem_alloc(mem); // Need to allocate memory first if it does not exist yet
+ }
+ if (!mem.device_pointer) {
+ return;
+ }
- /* If use_mapped_host of mem is false, mem.device_pointer currently
- * refers to device memory regardless of mem.host_pointer and
- * mem.shared_pointer. */
-
- if (mem.device_pointer &&
- (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
+ /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
+ * regardless of mem.host_pointer and mem.shared_pointer. */
+ if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(cuda_context);
check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
}
+ else if (mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
+ }
}
void mem_free(device_memory &mem) override
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 376ad06a734..8f15e8c8c1e 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
if (type == SHADER) {
num = min(shader_w, num);
}
- else if (type == RENDER) {
+ else if (type == RENDER || type == DENOISE) {
}
else {
num = min(h, num);
@@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
tasks.push_back(task);
}
}
- else if (type == RENDER) {
+ else if (type == RENDER || type == DENOISE) {
for (int i = 0; i < num; i++)
tasks.push_back(*this);
}
@@ -115,7 +115,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
- if ((type != RENDER) && (type != SHADER))
+ if (type == FILM_CONVERT)
return;
if (update_progress_sample) {
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 1b1e97cdb10..0f718528b86 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -47,7 +47,7 @@ class DenoiseParams {
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
- /* Controls which passes the OptiX AI denoiser should use as input. */
+ /* Passes handed over to the OptiX denoiser (default to color + albedo). */
int optix_input_passes;
DenoiseParams()
@@ -58,13 +58,13 @@ class DenoiseParams {
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
- optix_input_passes = 1;
+ optix_input_passes = 2;
}
};
class DeviceTask : public Task {
public:
- typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
+ typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type;
Type type;
int x, y, w, h;
@@ -81,7 +81,7 @@ class DeviceTask : public Task {
int shader_filter;
int shader_x, shader_w;
- int passes_size;
+ RenderBuffers *buffers;
explicit DeviceTask(Type type = RENDER);
@@ -114,7 +114,6 @@ class DeviceTask : public Task {
bool need_finish_queue;
bool integrator_branched;
- int2 requested_tile_size;
protected:
double last_update_time;
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 76f9ce7a18f..af40aa89db4 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -1308,13 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
- }
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
- }
- else if (task->type == DeviceTask::RENDER) {
+ if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
RenderTile tile;
DenoisingTask denoising(this, *task);
@@ -1352,6 +1346,30 @@ void OpenCLDevice::thread_run(DeviceTask *task)
kgbuffer.free();
}
+ else if (task->type == DeviceTask::SHADER) {
+ shader(*task);
+ }
+ else if (task->type == DeviceTask::FILM_CONVERT) {
+ film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ }
+ else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ RenderTile tile;
+ tile.x = task->x;
+ tile.y = task->y;
+ tile.w = task->w;
+ tile.h = task->h;
+ tile.buffer = task->buffer;
+ tile.sample = task->sample + task->num_samples;
+ tile.num_samples = task->num_samples;
+ tile.start_sample = task->sample;
+ tile.offset = task->offset;
+ tile.stride = task->stride;
+ tile.buffers = task->buffers;
+
+ DenoisingTask denoising(this, *task);
+ denoise(tile, denoising);
+ task->update_progress(&tile, tile.w * tile.h);
+ }
}
void OpenCLDevice::film_convert(DeviceTask &task,
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index fbb773533ce..22fd5ea5634 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -57,9 +57,9 @@ kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int st
if (num_inputs > 0) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float);
float *out = rgb + (x + y * sw) * 3;
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
+ out[0] = clamp(in[0], 0.0f, 10000.0f);
+ out[1] = clamp(in[1], 0.0f, 10000.0f);
+ out[2] = clamp(in[2], 0.0f, 10000.0f);
}
if (num_inputs > 1) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float);
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index b08b6c84c1d..41e1b73fdac 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -146,7 +146,7 @@ void RenderBuffers::reset(BufferParams &params_)
params = params_;
/* re-allocate buffer */
- buffer.alloc(params.width * params.height * params.get_passes_size());
+ buffer.alloc(params.width * params.get_passes_size(), params.height);
buffer.zero_to_device();
}
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index dac7fbac806..160b77d5f14 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -183,7 +183,8 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (gpu_draw_ready) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
/* for CUDA we need to do tone-mapping still, since we can
* only access GL buffers from the main thread. */
if (gpu_need_display_buffer_update) {
@@ -211,6 +212,7 @@ void Session::run_gpu()
reset_time = time_dt();
last_update_time = time_dt();
+ last_display_time = last_update_time;
progress.set_render_start_time();
@@ -291,12 +293,21 @@ void Session::run_gpu()
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers_mutex);
+ /* avoid excessive denoising in viewport after reaching a certain amount of samples */
+ bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
+ (time_dt() - last_display_time) >= params.progressive_update_timeout;
+
/* update status and timing */
update_status_time();
/* render */
render();
+ /* denoise */
+ if (need_denoise) {
+ denoise();
+ }
+
device->task_wait();
if (!device->error_message().empty())
@@ -305,7 +316,7 @@ void Session::run_gpu()
/* update status and timing */
update_status_time();
- gpu_need_display_buffer_update = true;
+ gpu_need_display_buffer_update = need_denoise || !params.run_denoising;
gpu_draw_ready = true;
progress.set_update();
@@ -359,7 +370,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (display->draw_ready()) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
- if (!buffer_params.modified(display->params)) {
+ if (buffer_params.width == display->params.width &&
+ buffer_params.height == display->params.height) {
display->draw(device, draw_params);
if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
@@ -372,7 +384,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
-bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
+bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task)
{
if (progress.get_cancel()) {
if (params.progressive_refine == false) {
@@ -387,8 +399,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
Tile *tile;
int device_num = device->device_number(tile_device);
- if (!tile_manager.next_tile(tile, device_num))
+ while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) {
+ /* Wait for denoising tiles to become available */
+ if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) {
+ denoising_cond.wait(tile_lock);
+ continue;
+ }
return false;
+ }
/* fill render tile */
rtile.x = tile_manager.state.buffer.full_x + tile->x;
@@ -399,7 +417,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
- rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+ rtile.task = task;
tile_lock.unlock();
@@ -413,6 +431,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
device->map_tile(tile_device, rtile);
+ /* Reset copy state, since buffer contents change after the tile was acquired */
+ buffers->map_neighbor_copied = false;
+
return true;
}
@@ -429,6 +450,8 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
tile->buffers->reset(buffer_params);
}
+ tile->buffers->map_neighbor_copied = false;
+
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = tile->buffers->buffer.device_pointer;
@@ -484,45 +507,75 @@ void Session::release_tile(RenderTile &rtile)
}
update_status_time();
+
+ /* Notify denoising thread that a tile was finished. */
+ denoising_cond.notify_all();
}
void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
- int center_idx = tiles[4].tile_index;
- assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
- BufferParams buffer_params = tile_manager.params;
- int4 image_region = make_int4(buffer_params.full_x,
- buffer_params.full_y,
- buffer_params.full_x + buffer_params.width,
- buffer_params.full_y + buffer_params.height);
-
- for (int dy = -1, i = 0; dy <= 1; dy++) {
- for (int dx = -1; dx <= 1; dx++, i++) {
- int px = tiles[4].x + dx * params.tile_size.x;
- int py = tiles[4].y + dy * params.tile_size.y;
- if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
- py < image_region.w) {
- int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
- Tile *tile = &tile_manager.state.tiles[tile_index];
- assert(tile->buffers);
-
- tiles[i].buffer = tile->buffers->buffer.device_pointer;
- tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
- tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
- tiles[i].w = tile->w;
- tiles[i].h = tile->h;
- tiles[i].buffers = tile->buffers;
-
- tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
- }
- else {
- tiles[i].buffer = (device_ptr)NULL;
- tiles[i].buffers = NULL;
- tiles[i].x = clamp(px, image_region.x, image_region.z);
- tiles[i].y = clamp(py, image_region.y, image_region.w);
- tiles[i].w = tiles[i].h = 0;
+ const int4 image_region = make_int4(
+ tile_manager.state.buffer.full_x,
+ tile_manager.state.buffer.full_y,
+ tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
+ tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
+
+ if (!tile_manager.schedule_denoising) {
+ /* Fix up tile slices with overlap. */
+ if (tile_manager.slice_overlap != 0) {
+ int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y);
+ tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y;
+ tiles[4].y = y;
+ }
+
+ /* Tiles are not being denoised individually, which means the entire image is processed. */
+ tiles[3].x = tiles[4].x;
+ tiles[1].y = tiles[4].y;
+ tiles[5].x = tiles[4].x + tiles[4].w;
+ tiles[7].y = tiles[4].y + tiles[4].h;
+ }
+ else {
+ int center_idx = tiles[4].tile_index;
+ assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
+
+ for (int dy = -1, i = 0; dy <= 1; dy++) {
+ for (int dx = -1; dx <= 1; dx++, i++) {
+ int nindex = tile_manager.get_neighbor_index(center_idx, i);
+ if (nindex >= 0) {
+ Tile *tile = &tile_manager.state.tiles[nindex];
+
+ tiles[i].x = image_region.x + tile->x;
+ tiles[i].y = image_region.y + tile->y;
+ tiles[i].w = tile->w;
+ tiles[i].h = tile->h;
+
+ if (buffers) {
+ tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride);
+
+ tiles[i].buffer = buffers->buffer.device_pointer;
+ tiles[i].buffers = buffers;
+ }
+ else {
+ assert(tile->buffers);
+ tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
+
+ tiles[i].buffer = tile->buffers->buffer.device_pointer;
+ tiles[i].buffers = tile->buffers;
+ }
+ }
+ else {
+ int px = tiles[4].x + dx * params.tile_size.x;
+ int py = tiles[4].y + dy * params.tile_size.y;
+
+ tiles[i].x = clamp(px, image_region.x, image_region.z);
+ tiles[i].y = clamp(py, image_region.y, image_region.w);
+ tiles[i].w = tiles[i].h = 0;
+
+ tiles[i].buffer = (device_ptr)NULL;
+ tiles[i].buffers = NULL;
+ }
}
}
}
@@ -545,6 +598,7 @@ void Session::run_cpu()
bool tiles_written = false;
last_update_time = time_dt();
+ last_display_time = last_update_time;
{
/* reset once to start */
@@ -620,11 +674,6 @@ void Session::run_cpu()
}
if (!no_tiles) {
- /* buffers mutex is locked entirely while rendering each
- * sample, and released/reacquired on each iteration to allow
- * reset and draw in between */
- thread_scoped_lock buffers_lock(buffers_mutex);
-
/* update scene */
scoped_timer update_timer;
if (update_scene()) {
@@ -638,17 +687,31 @@ void Session::run_cpu()
if (progress.get_cancel())
break;
+ /* buffers mutex is locked entirely while rendering each
+ * sample, and released/reacquired on each iteration to allow
+ * reset and draw in between */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ /* avoid excessive denoising in viewport after reaching a certain amount of samples */
+ bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
+ (time_dt() - last_display_time) >= params.progressive_update_timeout;
+
/* update status and timing */
update_status_time();
/* render */
render();
+ /* denoise */
+ if (need_denoise) {
+ denoise();
+ }
+
/* update status and timing */
update_status_time();
if (!params.background)
- need_copy_to_display_buffer = true;
+ need_copy_to_display_buffer = need_denoise || !params.run_denoising;
if (!device->error_message().empty())
progress.set_error(device->error_message());
@@ -869,6 +932,20 @@ void Session::set_pause(bool pause_)
pause_cond.notify_all();
}
+void Session::set_denoising(bool denoising, bool optix_denoising)
+{
+ /* Lock buffers so no denoising operation is triggered while the settings are changed here. */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ params.run_denoising = denoising;
+ params.full_denoising = !optix_denoising;
+ params.optix_denoising = optix_denoising;
+
+ // TODO(pmours): Query the required overlap value for denoising from the device?
+ tile_manager.slice_overlap = denoising && !params.background ? 64 : 0;
+ tile_manager.schedule_denoising = denoising && !buffers;
+}
+
void Session::wait()
{
if (session_thread) {
@@ -1016,33 +1093,74 @@ void Session::render()
/* Add path trace task. */
DeviceTask task(DeviceTask::RENDER);
- task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::PATH_TRACE);
task.release_tile = function_bind(&Session::release_tile, this, _1);
- task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
- task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
- task.requested_tile_size = params.tile_size;
- task.passes_size = tile_manager.params.get_passes_size();
- if (params.run_denoising) {
- task.denoising = params.denoising;
-
- assert(!scene->film->need_update);
- task.pass_stride = scene->film->pass_stride;
- task.target_pass_stride = task.pass_stride;
- task.pass_denoising_data = scene->film->denoising_data_offset;
- task.pass_denoising_clean = scene->film->denoising_clean_offset;
-
- task.denoising_from_render = true;
- task.denoising_do_filter = params.full_denoising;
- task.denoising_use_optix = params.optix_denoising;
- task.denoising_write_passes = params.write_denoising_passes;
+ device->task_add(task);
+}
+
+void Session::denoise()
+{
+ if (!params.run_denoising) {
+ return;
+ }
+
+ /* It can happen that denoising was already enabled, but the scene still needs an update. */
+ if (scene->film->need_update || !scene->film->denoising_data_offset) {
+ return;
}
+ /* Add separate denoising task. */
+ DeviceTask task(DeviceTask::DENOISE);
+
+ if (tile_manager.schedule_denoising) {
+ /* Run denoising on each tile. */
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::DENOISE);
+ task.release_tile = function_bind(&Session::release_tile, this, _1);
+ task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
+ task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
+ }
+ else {
+ assert(buffers);
+
+ /* Wait for rendering to finish. */
+ device->task_wait();
+
+ /* Run denoising on the whole image at once. */
+ task.type = DeviceTask::DENOISE_BUFFER;
+ task.x = tile_manager.state.buffer.full_x;
+ task.y = tile_manager.state.buffer.full_y;
+ task.w = tile_manager.state.buffer.width;
+ task.h = tile_manager.state.buffer.height;
+ task.buffer = buffers->buffer.device_pointer;
+ task.sample = tile_manager.state.sample;
+ task.num_samples = tile_manager.state.num_samples;
+ tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
+ task.buffers = buffers;
+ }
+
+ task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
+ task.need_finish_queue = params.progressive_refine;
+ task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
+ task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
+
+ task.denoising = params.denoising;
+
+ task.pass_stride = scene->film->pass_stride;
+ task.target_pass_stride = task.pass_stride;
+ task.pass_denoising_data = scene->film->denoising_data_offset;
+ task.pass_denoising_clean = scene->film->denoising_clean_offset;
+
+ task.denoising_from_render = true;
+ task.denoising_do_filter = params.full_denoising;
+ task.denoising_use_optix = params.optix_denoising;
+ task.denoising_write_passes = params.write_denoising_passes;
+
device->task_add(task);
}
@@ -1067,6 +1185,8 @@ void Session::copy_to_display_buffer(int sample)
/* set display to new size */
display->draw_set(task.w, task.h);
+
+ last_display_time = time_dt();
}
display_outdated = false;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index ec465601541..3ef2b70879a 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -154,6 +154,7 @@ class Session {
void reset(BufferParams &params, int samples);
void set_samples(int samples);
void set_pause(bool pause);
+ void set_denoising(bool denoising, bool optix_denoising);
bool update_scene();
bool load_kernels(bool lock_scene = true);
@@ -178,8 +179,10 @@ class Session {
void update_status_time(bool show_pause = false, bool show_done = false);
- void copy_to_display_buffer(int sample);
void render();
+ void denoise();
+ void copy_to_display_buffer(int sample);
+
void reset_(BufferParams &params, int samples);
void run_cpu();
@@ -190,7 +193,7 @@ class Session {
bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
void reset_gpu(BufferParams &params, int samples);
- bool acquire_tile(Device *tile_device, RenderTile &tile);
+ bool acquire_tile(Device *tile_device, RenderTile &tile, RenderTile::Task task);
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile);
@@ -213,14 +216,16 @@ class Session {
thread_mutex tile_mutex;
thread_mutex buffers_mutex;
thread_mutex display_mutex;
+ thread_condition_variable denoising_cond;
bool kernels_loaded;
DeviceRequestedFeatures loaded_kernel_features;
double reset_time;
+ double last_update_time;
+ double last_display_time;
/* progressive refine */
- double last_update_time;
bool update_progressive_refine(bool cancel);
DeviceRequestedFeatures get_requested_device_features();
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 9ef0c695667..4ddfd56cd01 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_,
tile_order = tile_order_;
start_resolution = start_resolution_;
pixel_size = pixel_size_;
+ slice_overlap = 0;
num_samples = num_samples_;
num_devices = num_devices_;
preserve_tile_device = preserve_tile_device_;
@@ -201,8 +202,7 @@ int TileManager::gen_tiles(bool sliced)
int image_h = max(1, params.height / resolution);
int2 center = make_int2(image_w / 2, image_h / 2);
- int num_logical_devices = preserve_tile_device ? num_devices : 1;
- int num = min(image_h, num_logical_devices);
+ int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1;
int slice_num = sliced ? num : 1;
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
@@ -216,7 +216,7 @@ int TileManager::gen_tiles(bool sliced)
tile_list = state.render_tiles.begin();
if (tile_order == TILE_HILBERT_SPIRAL) {
- assert(!sliced);
+ assert(!sliced && slice_overlap == 0);
int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
state.tiles.resize(tile_w * tile_h);
@@ -319,6 +319,12 @@ int TileManager::gen_tiles(bool sliced)
int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) :
image_h / slice_num;
+ if (slice_overlap != 0) {
+ int slice_y_offset = max(slice_y - slice_overlap, 0);
+ slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset;
+ slice_y = slice_y_offset;
+ }
+
int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
int tiles_per_device = divide_up(tile_w * tile_h, num);
@@ -363,6 +369,7 @@ void TileManager::gen_render_tiles()
{
/* Regenerate just the render tiles for progressive render. */
foreach (Tile &tile, state.tiles) {
+ tile.state = Tile::RENDER;
state.render_tiles[tile.device].push_back(tile.index);
}
}
@@ -386,17 +393,29 @@ void TileManager::set_tiles()
int TileManager::get_neighbor_index(int index, int neighbor)
{
- static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0};
+ /* Neighbor indices:
+ * 0 1 2
+ * 3 4 5
+ * 6 7 8
+ */
+ static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1};
+ static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1};
int resolution = state.resolution_divider;
int image_w = max(1, params.width / resolution);
int image_h = max(1, params.height / resolution);
+
+ int num = min(image_h, num_devices);
+ int slice_num = !background ? num : 1;
+ int slice_h = image_h / slice_num;
+
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
- int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
+ int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
- int nx = state.tiles[index].x / tile_size.x + dx[neighbor],
- ny = state.tiles[index].y / tile_size.y + dy[neighbor];
- if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h)
+ /* Tiles in the state tile list are always indexed from left to right, top to bottom. */
+ int nx = (index % tile_w) + dx[neighbor];
+ int ny = (index / tile_w) + dy[neighbor];
+ if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num)
return -1;
return ny * state.tile_stride + nx;
@@ -426,15 +445,11 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
{
delete_tile = false;
- if (progressive) {
- return true;
- }
-
switch (state.tiles[index].state) {
case Tile::RENDER: {
if (!schedule_denoising) {
state.tiles[index].state = Tile::DONE;
- delete_tile = true;
+ delete_tile = !progressive;
return true;
}
state.tiles[index].state = Tile::RENDERED;
@@ -457,15 +472,18 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::DENOISED)) {
state.tiles[nindex].state = Tile::DONE;
- /* It can happen that the tile just finished denoising and already can be freed here.
- * However, in that case it still has to be written before deleting, so we can't delete
- * it yet. */
- if (neighbor == 8) {
- delete_tile = true;
- }
- else {
- delete state.tiles[nindex].buffers;
- state.tiles[nindex].buffers = NULL;
+ /* Do not delete finished tiles in progressive mode. */
+ if (!progressive) {
+ /* It can happen that the tile just finished denoising and already can be freed here.
+ * However, in that case it still has to be written before deleting, so we can't delete
+ * it yet. */
+ if (neighbor == 4) {
+ delete_tile = true;
+ }
+ else {
+ delete state.tiles[nindex].buffers;
+ state.tiles[nindex].buffers = NULL;
+ }
}
}
}
@@ -477,27 +495,56 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
}
-bool TileManager::next_tile(Tile *&tile, int device)
+bool TileManager::next_tile(Tile *&tile, int device, bool denoising)
{
- int logical_device = preserve_tile_device ? device : 0;
+ /* Preserve device if requested, unless this is a separate denoising device that just wants to
+ * grab any available tile. */
+ const bool preserve_device = preserve_tile_device && device < num_devices;
+
+ int tile_index = -1;
+ int logical_device = preserve_device ? device : 0;
+
+ if (denoising) {
+ while (logical_device < state.denoising_tiles.size()) {
+ if (state.denoising_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ return false;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
- if (logical_device >= state.render_tiles.size())
- return false;
+ tile_index = state.denoising_tiles[logical_device].front();
+ state.denoising_tiles[logical_device].pop_front();
+ break;
+ }
+ }
+ else {
+ while (logical_device < state.render_tiles.size()) {
+ if (state.render_tiles[logical_device].empty()) {
+ if (preserve_device) {
+ return false;
+ }
+ else {
+ logical_device++;
+ continue;
+ }
+ }
- if (!state.denoising_tiles[logical_device].empty()) {
- int idx = state.denoising_tiles[logical_device].front();
- state.denoising_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ tile_index = state.render_tiles[logical_device].front();
+ state.render_tiles[logical_device].pop_front();
+ break;
+ }
}
- if (state.render_tiles[logical_device].empty())
- return false;
+ if (tile_index >= 0) {
+ tile = &state.tiles[tile_index];
+ return true;
+ }
- int idx = state.render_tiles[logical_device].front();
- state.render_tiles[logical_device].pop_front();
- tile = &state.tiles[idx];
- return true;
+ return false;
}
bool TileManager::done()
@@ -508,6 +555,16 @@ bool TileManager::done()
(state.sample + state.num_samples >= end_sample);
}
+bool TileManager::has_tiles()
+{
+ foreach (Tile &tile, state.tiles) {
+ if (tile.state != Tile::DONE) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool TileManager::next()
{
if (done())
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index 017c1af0ead..14c693683c4 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -89,6 +89,7 @@ class TileManager {
} state;
int num_samples;
+ int slice_overlap;
TileManager(bool progressive,
int num_samples,
@@ -105,15 +106,19 @@ class TileManager {
void reset(BufferParams &params, int num_samples);
void set_samples(int num_samples);
bool next();
- bool next_tile(Tile *&tile, int device = 0);
+ bool next_tile(Tile *&tile, int device, bool denoising);
bool finish_tile(int index, bool &delete_tile);
bool done();
+ bool has_tiles();
void set_tile_order(TileOrder tile_order_)
{
tile_order = tile_order_;
}
+ int get_neighbor_index(int index, int neighbor);
+ bool check_neighbor_state(int index, Tile::State state);
+
/* ** Sample range rendering. ** */
/* Start sample in the range. */
@@ -160,9 +165,6 @@ class TileManager {
/* Generate tile list, return number of tiles. */
int gen_tiles(bool sliced);
void gen_render_tiles();
-
- int get_neighbor_index(int index, int neighbor);
- bool check_neighbor_state(int index, Tile::State state);
};
CCL_NAMESPACE_END