diff options
25 files changed, 729 insertions, 257 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 5f163c2510b..eafe37618b3 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -197,7 +197,12 @@ enum_aov_types = ( ('COLOR', "Color", "Write a Color pass", 1), ) -enum_denoising_optix_input_passes= ( +enum_viewport_denoising = ( + ('NONE', "None", "Disable viewport denoising", 0), + ('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1), +) + +enum_denoising_optix_input_passes = ( ('RGB', "Color", "Use only color as input", 1), ('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2), ('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3), @@ -229,6 +234,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default='PATH', ) + preview_pause: BoolProperty( + name="Pause Preview", + description="Pause all viewport preview renders", + default=False, + ) + preview_denoising: EnumProperty( + name="Viewport Denoising", + description="Denoise the image after each preview update with the selected denoiser engine", + items=enum_viewport_denoising, + default='NONE', + ) + use_square_samples: BoolProperty( name="Square Samples", description="Square sampling values for easier artist control", @@ -247,11 +264,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=0, max=(1 << 24), default=32, ) - preview_pause: BoolProperty( - name="Pause Preview", - description="Pause all viewport preview renders", - default=False, - ) aa_samples: IntProperty( name="AA Samples", description="Number of antialiasing samples to render for each pixel", @@ -264,6 +276,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=0, max=2097151, default=32, ) + diffuse_samples: IntProperty( name="Diffuse Samples", description="Number of diffuse bounce samples to render for each AA sample", @@ -294,14 +307,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=1, max=1024, default=1, ) - subsurface_samples: IntProperty( name="Subsurface Samples", description="Number of subsurface scattering samples to render for each AA sample", min=1, max=1024, default=1, ) - volume_samples: IntProperty( name="Volume Samples", description="Number of volume scattering samples to render for each AA sample", @@ -1305,12 +1316,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) - use_optix_denoising: BoolProperty( - name="Use OptiX AI Denoising", - description="Denoise the rendered image with the OptiX AI denoiser", - default=False, - update=update_render_passes, - ) denoising_diffuse_direct: BoolProperty( name="Diffuse Direct", description="Denoise the direct diffuse lighting", @@ -1387,11 +1392,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): min=0, max=7, default=0, ) + + use_optix_denoising: BoolProperty( + name="OptiX AI-Accelerated", + description="Use the OptiX denoiser to denoise the rendered image", + default=False, + update=update_render_passes, + ) denoising_optix_input_passes: EnumProperty( name="Input Passes", - description="Controls which passes the OptiX AI denoiser should use as input, which can have different effects on the denoised image", + description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)", items=enum_denoising_optix_input_passes, - default='RGB', + default='RGB_ALBEDO', ) use_pass_crypto_object: BoolProperty( diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 35d5d3801d2..f23d141e3da 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -112,6 +112,10 @@ def show_device_active(context): return True return context.preferences.addons[__package__].preferences.has_active_device() +def show_optix_denoising(context): + # OptiX AI denoiser can be used when at least one device supports OptiX + return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')) + def draw_samples_info(layout, context): cscene = context.scene.cycles @@ -177,17 +181,23 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel): if not use_optix(context): layout.prop(cscene, "progressive") - if cscene.progressive == 'PATH' or use_branched_path(context) is False: + if not use_branched_path(context): col = layout.column(align=True) col.prop(cscene, "samples", text="Render") col.prop(cscene, "preview_samples", text="Viewport") - - draw_samples_info(layout, context) else: col = layout.column(align=True) col.prop(cscene, "aa_samples", text="Render") col.prop(cscene, "preview_aa_samples", text="Viewport") + # Viewport denoising is currently only supported with OptiX + if show_optix_denoising(context): + col = layout.column() + col.prop(cscene, "preview_denoising") + + if not use_branched_path(context): + draw_samples_info(layout, context) + class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): bl_label = "Sub Samples" @@ -195,9 +205,7 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): @classmethod def poll(cls, context): - scene = context.scene - cscene = scene.cycles - return cscene.progressive != 'PATH' and use_branched_path(context) + return use_branched_path(context) def draw(self, context): layout = self.layout @@ -635,9 +643,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel): sub = col.column() sub.active = not rd.use_save_buffers - for view_layer in scene.view_layers: - if view_layer.cycles.use_denoising: - sub.active = False sub.prop(cscene, "use_progressive_refine") @@ -981,15 +986,14 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel): col = split.column(align=True) - if use_optix(context): - col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising") + if show_optix_denoising(context): + col.prop(cycles_view_layer, "use_optix_denoising") + col.separator(factor=2.0) if cycles_view_layer.use_optix_denoising: col.prop(cycles_view_layer, "denoising_optix_input_passes") return - col.separator(factor=2.0) - col.prop(cycles_view_layer, "denoising_radius", text="Radius") col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength") col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength") @@ -2192,8 +2196,6 @@ def draw_device(self, context): col = layout.column() col.prop(cscene, "feature_set") - scene = context.scene - col = layout.column() col.active = show_device_active(context) col.prop(cscene, "device") diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index c84d6e1572b..990061dd9f1 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -863,7 +863,8 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d, } } -BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render, +BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene, + BL::RenderSettings &b_render, BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, @@ -899,7 +900,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render, params.height = height; } - update_viewport_display_passes(b_v3d, params.passes); + PassType display_pass = update_viewport_display_passes(b_v3d, params.passes); + + /* Can only denoise the combined image pass */ + params.denoising_data_pass = display_pass == PASS_COMBINED && + update_viewport_display_denoising(b_v3d, b_scene); return params; } diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp index 111fc8d5192..c3c307318a8 100644 --- a/intern/cycles/blender/blender_device.cpp +++ b/intern/cycles/blender/blender_device.cpp @@ -19,6 +19,22 @@ CCL_NAMESPACE_BEGIN +enum DenoiserType { + DENOISER_NONE = 0, + DENOISER_OPTIX = 1, + + DENOISER_NUM +}; + +enum ComputeDevice { + COMPUTE_DEVICE_CPU = 0, + COMPUTE_DEVICE_CUDA = 1, + COMPUTE_DEVICE_OPENCL = 2, + COMPUTE_DEVICE_OPTIX = 3, + + COMPUTE_DEVICE_NUM +}; + int blender_device_threads(BL::Scene &b_scene) { BL::RenderSettings b_r = b_scene.render(); @@ -40,7 +56,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen /* Find network device. */ vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK); if (!devices.empty()) { - device = devices.front(); + return devices.front(); } } else if (get_enum(cscene, "device") == 1) { @@ -57,14 +73,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen } /* Test if we are using GPU devices. */ - enum ComputeDevice { - COMPUTE_DEVICE_CPU = 0, - COMPUTE_DEVICE_CUDA = 1, - COMPUTE_DEVICE_OPENCL = 2, - COMPUTE_DEVICE_OPTIX = 3, - COMPUTE_DEVICE_NUM = 4, - }; - ComputeDevice compute_device = (ComputeDevice)get_enum( cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU); @@ -106,6 +114,33 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen } } + /* Ensure there is an OptiX device when using the OptiX denoiser. */ + bool use_optix_denoising = DENOISER_OPTIX == + get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE); + BL::Scene::view_layers_iterator b_view_layer; + for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); + ++b_view_layer) { + PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles"); + if (get_boolean(crl, "use_optix_denoising")) { + use_optix_denoising = true; + } + } + + if (use_optix_denoising && device.type != DEVICE_OPTIX) { + vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX); + if (!optix_devices.empty()) { + /* Convert to a special multi device with separate denoising devices. */ + if (device.multi_devices.empty()) { + device.multi_devices.push_back(device); + } + + /* Simply use the first available OptiX device. */ + const DeviceInfo optix_device = optix_devices.front(); + device.id += optix_device.id; /* Uniquely identify this special multi device. */ + device.denoising_devices.push_back(optix_device); + } + } + return device; } diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 1490348743e..2e7a72d8072 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -166,7 +166,7 @@ void BlenderSession::create_session() /* set buffer parameters */ BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); b_engine.use_highlight_tiles(session_params.progressive_refine == false); @@ -244,7 +244,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL); BL::RegionView3D b_null_region_view3d(PointerRNA_NULL); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height); + b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); b_engine.use_highlight_tiles(session_params.progressive_refine == false); @@ -460,7 +460,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); /* render each layer */ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval(); @@ -706,7 +706,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y()); @@ -851,7 +851,6 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_) if (session->params.modified(session_params) || scene->params.modified(scene_params)) { free_session(); create_session(); - return; } /* increase samples, but never decrease */ @@ -886,10 +885,28 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_) else sync->sync_camera(b_render, b_camera_override, width, height, ""); + /* get buffer parameters */ + BufferParams buffer_params = BlenderSync::get_buffer_params( + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); + + if (session_params.device.type != DEVICE_OPTIX && + session_params.device.denoising_devices.empty()) { + /* cannot use OptiX denoising when it is not supported by the device. */ + buffer_params.denoising_data_pass = false; + } + else { + session->set_denoising(buffer_params.denoising_data_pass, true); + } + + if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) { + scene->film->denoising_data_pass = buffer_params.denoising_data_pass; + + /* Force a scene and session reset below. */ + scene->film->tag_update(scene); + } + /* reset if needed */ if (scene->need_reset()) { - BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); /* After session reset, so device is not accessing image data anymore. */ @@ -956,7 +973,7 @@ bool BlenderSession::draw(int w, int h) SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); bool session_pause = BlenderSync::get_session_pause(b_scene, background); if (session_pause == false) { @@ -974,7 +991,7 @@ bool BlenderSession::draw(int w, int h) /* draw */ BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); DeviceDrawParams draw_params; if (session->params.display_buffer_linear) { diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 8960c84567e..0412654d3bd 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -846,20 +846,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine, /* progressive refine */ BL::RenderSettings b_r = b_scene.render(); - params.progressive_refine = (b_engine.is_preview() || - get_boolean(cscene, "use_progressive_refine")) && - !b_r.use_save_buffers(); - - if (params.progressive_refine) { - BL::Scene::view_layers_iterator b_view_layer; - for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); - ++b_view_layer) { - PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles"); - if (get_boolean(crl, "use_denoising")) { - params.progressive_refine = false; - } - } - } + params.progressive_refine = b_engine.is_preview() || + get_boolean(cscene, "use_progressive_refine"); + if (b_r.use_save_buffers()) + params.progressive_refine = false; if (background) { if (params.progressive_refine) diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index f8134ff8b5c..efd4511eb1e 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -95,7 +95,8 @@ class BlenderSync { BL::Scene &b_scene, bool background); static bool get_session_pause(BL::Scene &b_scene, bool background); - static BufferParams get_buffer_params(BL::RenderSettings &b_render, + static BufferParams get_buffer_params(BL::Scene &b_scene, + BL::RenderSettings &b_render, BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp index 73ef5f94720..93e84e28032 100644 --- a/intern/cycles/blender/blender_viewport.cpp +++ b/intern/cycles/blender/blender_viewport.cpp @@ -61,6 +61,17 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const return !(use_scene_world && use_scene_lights); } +bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d, + BL::Scene &b_scene) +{ + bool use_denoising = false; + if (b_v3d) { + PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); + use_denoising = get_enum(cscene, "preview_denoising") != 0; + } + return use_denoising; +} + PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d) { PassType display_pass = PASS_NONE; @@ -72,6 +83,11 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi return display_pass; } +bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene) +{ + return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene); +} + PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes) { if (b_v3d) { diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h index f26d0d38115..d2b55358c27 100644 --- a/intern/cycles/blender/blender_viewport.h +++ b/intern/cycles/blender/blender_viewport.h @@ -44,11 +44,15 @@ class BlenderViewportParameters { friend class BlenderSync; public: + /* Get whether to enable denoising data pass in viewport. */ + static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene); /* Retrieve the render pass that needs to be displayed on the given `SpaceView3D` * When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */ static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d); }; +bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene); + PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes); CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 76670351734..c0d02e0605f 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -366,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba, Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) { +#ifdef WITH_MULTI + if (!info.multi_devices.empty()) { + /* Always create a multi device when info contains multiple devices. + * This is done so that the type can still be e.g. DEVICE_CPU to indicate + * that it is a homogeneous collection of devices, which simplifies checks. */ + return device_multi_create(info, stats, profiler, background); + } +#endif + Device *device; switch (info.type) { @@ -388,11 +397,6 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool device = NULL; break; #endif -#ifdef WITH_MULTI - case DEVICE_MULTI: - device = device_multi_create(info, stats, profiler, background); - break; -#endif #ifdef WITH_NETWORK case DEVICE_NETWORK: device = device_network_create(info, stats, profiler, "127.0.0.1"); @@ -586,7 +590,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices, } DeviceInfo info; - info.type = DEVICE_MULTI; + info.type = subdevices.front().type; info.id = "MULTI"; info.description = "Multi Device"; info.num = 0; @@ -624,6 +628,14 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices, info.multi_devices.push_back(device); } + /* Create unique ID for this combination of devices. */ + info.id += device.id; + + /* Set device type to MULTI if subdevices are not of a common type. */ + if (device.type != info.type) { + info.type = DEVICE_MULTI; + } + /* Accumulate device info. */ info.has_half_images &= device.has_half_images; info.has_volume_decoupled &= device.has_volume_decoupled; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 66fcac921d3..2aeed3f0e89 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -83,6 +83,7 @@ class DeviceInfo { bool has_profiling; /* Supports runtime collection of profiling info. */ int cpu_threads; vector<DeviceInfo> multi_devices; + vector<DeviceInfo> denoising_devices; DeviceInfo() { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c2843a61e6d..42ebf3a8399 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -508,13 +508,14 @@ class CPUDevice : public Device { void thread_run(DeviceTask *task) { - if (task->type == DeviceTask::RENDER) { + if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) thread_render(*task); - } - else if (task->type == DeviceTask::FILM_CONVERT) - thread_film_convert(*task); else if (task->type == DeviceTask::SHADER) thread_shader(*task); + else if (task->type == DeviceTask::FILM_CONVERT) + thread_film_convert(*task); + else if (task->type == DeviceTask::DENOISE_BUFFER) + thread_denoise(*task); } class CPUDeviceTask : public DeviceTask { @@ -954,6 +955,33 @@ class CPUDevice : public Device { delete split_kernel; } + void thread_denoise(DeviceTask &task) + { + RenderTile tile; + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.sample = task.sample + task.num_samples; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + DenoisingTask denoising(this, task); + + ProfilingState denoising_profiler_state; + profiler.add_state(&denoising_profiler_state); + denoising.profiler = &denoising_profiler_state; + + denoise(denoising, tile); + task.update_progress(&tile, tile.w * tile.h); + + profiler.remove_state(&denoising_profiler_state); + } + void thread_film_convert(DeviceTask &task) { float sample_scale = 1.0f / (task.sample + 1); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dfd80d678fd..e3c737cc2e7 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -994,16 +994,16 @@ class CUDADevice : public Device { else if (mem.type == MEM_TEXTURE) { assert(!"mem_copy_from not supported for textures."); } - else { - CUDAContextScope scope(this); - size_t offset = elem * y * w; - size_t size = elem * w * h; + else if (mem.host_pointer) { + const size_t size = elem * w * h; + const size_t offset = elem * y * w; - if (mem.host_pointer && mem.device_pointer) { + if (mem.device_pointer) { + const CUDAContextScope scope(this); cuda_assert(cuMemcpyDtoH( - (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); + (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size)); } - else if (mem.host_pointer) { + else { memset((char *)mem.host_pointer + offset, 0, size); } } @@ -1014,20 +1014,19 @@ class CUDADevice : public Device { if (!mem.device_pointer) { mem_alloc(mem); } - - if (mem.host_pointer) { - memset(mem.host_pointer, 0, mem.memory_size()); + if (!mem.device_pointer) { + return; } - /* If use_mapped_host of mem is false, mem.device_pointer currently - * refers to device memory regardless of mem.host_pointer and - * mem.shared_pointer. */ - - if (mem.device_pointer && - (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { - CUDAContextScope scope(this); + /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory + * regardless of mem.host_pointer and mem.shared_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { + const CUDAContextScope scope(this); cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); } + else if (mem.host_pointer) { + memset(mem.host_pointer, 0, mem.memory_size()); + } } void mem_free(device_memory &mem) @@ -2240,7 +2239,7 @@ class CUDADevice : public Device { { CUDAContextScope scope(this); - if (task->type == DeviceTask::RENDER) { + if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) { DeviceRequestedFeatures requested_features; if (use_split_kernel()) { if (split_kernel == NULL) { @@ -2288,6 +2287,24 @@ class CUDADevice : public Device { cuda_assert(cuCtxSynchronize()); } + else if (task->type == DeviceTask::DENOISE_BUFFER) { + RenderTile tile; + tile.x = task->x; + tile.y = task->y; + tile.w = task->w; + tile.h = task->h; + tile.buffer = task->buffer; + tile.sample = task->sample + task->num_samples; + tile.num_samples = task->num_samples; + tile.start_sample = task->sample; + tile.offset = task->offset; + tile.stride = task->stride; + tile.buffers = task->buffers; + + DenoisingTask denoising(this, *task); + denoise(tile, denoising); + task->update_progress(&tile, tile.w * tile.h); + } } class CUDADeviceTask : public DeviceTask { diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 60740807568..2949773ef0c 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -427,6 +427,11 @@ template<typename T> class device_vector : public device_memory { device_copy_to(); } + void copy_from_device() + { + device_copy_from(0, data_width, data_height, sizeof(T)); + } + void copy_from_device(int y, int w, int h) { device_copy_from(y, w, h, sizeof(T)); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index b8587eb0a62..9cbf69a191a 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -42,7 +42,7 @@ class MultiDevice : public Device { map<device_ptr, device_ptr> ptr_map; }; - list<SubDevice> devices; + list<SubDevice> devices, denoising_devices; device_ptr unique_key; MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) @@ -61,6 +61,12 @@ class MultiDevice : public Device { } } + foreach (DeviceInfo &subinfo, info.denoising_devices) { + Device *device = Device::create(subinfo, sub_stats_, profiler, background); + + denoising_devices.push_back(SubDevice(device)); + } + #ifdef WITH_NETWORK /* try to add network devices */ ServerDiscovery discovery(true); @@ -80,17 +86,18 @@ class MultiDevice : public Device { { foreach (SubDevice &sub, devices) delete sub.device; + foreach (SubDevice &sub, denoising_devices) + delete sub.device; } const string &error_message() { - foreach (SubDevice &sub, devices) { - if (sub.device->error_message() != "") { - if (error_msg == "") - error_msg = sub.device->error_message(); - break; - } - } + error_msg.clear(); + + foreach (SubDevice &sub, devices) + error_msg += sub.device->error_message(); + foreach (SubDevice &sub, denoising_devices) + error_msg += sub.device->error_message(); return error_msg; } @@ -118,6 +125,12 @@ class MultiDevice : public Device { if (!sub.device->load_kernels(requested_features)) return false; + if (requested_features.use_denoising) { + foreach (SubDevice &sub, denoising_devices) + if (!sub.device->load_kernels(requested_features)) + return false; + } + return true; } @@ -127,6 +140,12 @@ class MultiDevice : public Device { if (!sub.device->wait_for_availability(requested_features)) return false; + if (requested_features.use_denoising) { + foreach (SubDevice &sub, denoising_devices) + if (!sub.device->wait_for_availability(requested_features)) + return false; + } + return true; } @@ -150,16 +169,17 @@ class MultiDevice : public Device { break; } } + return result; } bool build_optix_bvh(BVH *bvh) { - // Broadcast acceleration structure build to all devices - foreach (SubDevice &sub, devices) { + // Broadcast acceleration structure build to all render devices + foreach (SubDevice &sub, devices) if (!sub.device->build_optix_bvh(bvh)) return false; - } + return true; } @@ -236,6 +256,17 @@ class MultiDevice : public Device { sub.ptr_map[key] = mem.device_pointer; } + if (strcmp(mem.name, "RenderBuffers") == 0) { + foreach (SubDevice &sub, denoising_devices) { + mem.device = sub.device; + mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; + mem.device_size = existing_size; + + sub.device->mem_zero(mem); + sub.ptr_map[key] = mem.device_pointer; + } + } + mem.device = this; mem.device_pointer = key; stats.mem_alloc(mem.device_size - existing_size); @@ -255,6 +286,17 @@ class MultiDevice : public Device { sub.ptr_map.erase(sub.ptr_map.find(key)); } + if (strcmp(mem.name, "RenderBuffers") == 0) { + foreach (SubDevice &sub, denoising_devices) { + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + mem.device_size = existing_size; + + sub.device->mem_free(mem); + sub.ptr_map.erase(sub.ptr_map.find(key)); + } + } + mem.device = this; mem.device_pointer = 0; mem.device_size = 0; @@ -302,10 +344,21 @@ class MultiDevice : public Device { void map_tile(Device *sub_device, RenderTile &tile) { + if (!tile.buffer) { + return; + } + foreach (SubDevice &sub, devices) { if (sub.device == sub_device) { - if (tile.buffer) - tile.buffer = sub.ptr_map[tile.buffer]; + tile.buffer = sub.ptr_map[tile.buffer]; + return; + } + } + + foreach (SubDevice &sub, denoising_devices) { + if (sub.device == sub_device) { + tile.buffer = sub.ptr_map[tile.buffer]; + return; } } } @@ -320,6 +373,12 @@ class MultiDevice : public Device { i++; } + foreach (SubDevice &sub, denoising_devices) { + if (sub.device == sub_device) + return i; + i++; + } + return -1; } @@ -330,11 +389,20 @@ class MultiDevice : public Device { continue; } + device_vector<float> &mem = tiles[i].buffers->buffer; + tiles[i].buffer = mem.device_pointer; + + if (mem.device == this && denoising_devices.empty()) { + /* Skip unnecessary copies in viewport mode (buffer covers the + * whole image), but still need to fix up the tile evice pointer. */ + map_tile(sub_device, tiles[i]); + continue; + } + /* If the tile was rendered on another device, copy its memory to * to the current device now, for the duration of the denoising task. * Note that this temporarily modifies the RenderBuffers and calls * the device, so this function is not thread safe. */ - device_vector<float> &mem = tiles[i].buffers->buffer; if (mem.device != sub_device) { /* Only copy from device to host once. This is faster, but * also required for the case where a CPU thread is denoising @@ -342,12 +410,20 @@ class MultiDevice : public Device { * overwriting the buffer being denoised by the CPU thread. */ if (!tiles[i].buffers->map_neighbor_copied) { tiles[i].buffers->map_neighbor_copied = true; - mem.copy_from_device(0, mem.data_size, 1); + mem.copy_from_device(); } - mem.swap_device(sub_device, 0, 0); + if (mem.device == this) { + /* Can re-use memory if tile is already allocated on the sub device. */ + map_tile(sub_device, tiles[i]); + mem.swap_device(sub_device, mem.device_size, tiles[i].buffer); + } + else { + mem.swap_device(sub_device, 0, 0); + } mem.copy_to_device(); + tiles[i].buffer = mem.device_pointer; tiles[i].device_size = mem.device_size; @@ -358,11 +434,17 @@ class MultiDevice : public Device { void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles) { - /* Copy denoised result back to the host. */ device_vector<float> &mem = tiles[9].buffers->buffer; + + if (mem.device == this && denoising_devices.empty()) { + return; + } + + /* Copy denoised result back to the host. */ mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer); - mem.copy_from_device(0, mem.data_size, 1); + mem.copy_from_device(); mem.restore_device(); + /* Copy denoised result to the original device. */ mem.copy_to_device(); @@ -372,7 +454,9 @@ class MultiDevice : public Device { } device_vector<float> &mem = tiles[i].buffers->buffer; - if (mem.device != sub_device) { + + if (mem.device != sub_device && mem.device != this) { + /* Free up memory again if it was allocated for the copy above. */ mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer); sub_device->mem_free(mem); mem.restore_device(); @@ -398,10 +482,16 @@ class MultiDevice : public Device { void task_add(DeviceTask &task) { + list<SubDevice> &task_devices = denoising_devices.empty() || + (task.type != DeviceTask::DENOISE && + task.type != DeviceTask::DENOISE_BUFFER) ? + devices : + denoising_devices; + list<DeviceTask> tasks; - task.split(tasks, devices.size()); + task.split(tasks, task_devices.size()); - foreach (SubDevice &sub, devices) { + foreach (SubDevice &sub, task_devices) { if (!tasks.empty()) { DeviceTask subtask = tasks.front(); tasks.pop_front(); @@ -426,12 +516,16 @@ class MultiDevice : public Device { { foreach (SubDevice &sub, devices) sub.device->task_wait(); + foreach (SubDevice &sub, denoising_devices) + sub.device->task_wait(); } void task_cancel() { foreach (SubDevice &sub, devices) sub.device->task_cancel(); + foreach (SubDevice &sub, denoising_devices) + sub.device->task_cancel(); } protected: diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 98469fb37b0..ac119a723e3 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -213,6 +213,7 @@ class OptiXDevice : public Device { OptixDenoiser denoiser = NULL; vector<pair<int2, CUdeviceptr>> denoiser_state; + int denoiser_input_passes = 0; public: OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) @@ -632,7 +633,7 @@ class OptiXDevice : public Device { if (have_error()) return; // Abort early if there was an error previously - if (task.type == DeviceTask::RENDER) { + if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) { RenderTile tile; while (task.acquire_tile(this, tile)) { if (tile.task == RenderTile::PATH_TRACE) @@ -652,6 +653,22 @@ class OptiXDevice : public Device { else if (task.type == DeviceTask::FILM_CONVERT) { launch_film_convert(task, thread_index); } + else if (task.type == DeviceTask::DENOISE_BUFFER) { + // Set up a single tile that covers the whole task and denoise it + RenderTile tile; + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + launch_denoise(task, tile, thread_index); + } } void launch_render(DeviceTask &task, RenderTile &rtile, int thread_index) @@ -740,6 +757,7 @@ class OptiXDevice : public Device { RenderTile rtiles[10]; rtiles[4] = rtile; task.map_neighbor_tiles(rtiles, this); + rtile = rtiles[4]; // Tile may have been modified by mapping code // Calculate size of the tile to denoise (including overlap) int4 rect = make_int4( @@ -846,7 +864,14 @@ class OptiXDevice : public Device { } # endif - if (denoiser == NULL) { + const bool recreate_denoiser = (denoiser == NULL) || + (task.denoising.optix_input_passes != denoiser_input_passes); + if (recreate_denoiser) { + // Destroy existing handle before creating new one + if (denoiser != NULL) { + optixDenoiserDestroy(denoiser); + } + // Create OptiX denoiser handle on demand when it is first used OptixDenoiserOptions denoiser_options; assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); @@ -856,6 +881,9 @@ class OptiXDevice : public Device { check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); + + // OptiX denoiser handle was created with the requested number of input passes + denoiser_input_passes = task.denoising.optix_input_passes; } OptixDenoiserSizes sizes = {}; @@ -868,13 +896,16 @@ class OptiXDevice : public Device { const size_t scratch_offset = sizes.stateSizeInBytes; // Allocate denoiser state if tile size has changed since last setup - if (state_size.x != rect_size.x || state_size.y != rect_size.y) { + if (state_size.x != rect_size.x || state_size.y != rect_size.y || recreate_denoiser) { + // Free existing state before allocating new one if (state) { cuMemFree(state); state = 0; } + check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size)); + // Initialize denoiser state for the current tile size check_result_optix_ret(optixDenoiserSetup(denoiser, cuda_stream[thread_index], rect_size.x, @@ -1972,17 +2003,17 @@ class OptiXDevice : public Device { else if (mem.type == MEM_TEXTURE) { assert(!"mem_copy_from not supported for textures."); } - else { + else if (mem.host_pointer) { // Calculate linear memory offset and size const size_t size = elem * w * h; const size_t offset = elem * y * w; - if (mem.host_pointer && mem.device_pointer) { + if (mem.device_pointer) { const CUDAContextScope scope(cuda_context); check_result_cuda(cuMemcpyDtoH( (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size)); } - else if (mem.host_pointer) { + else { memset((char *)mem.host_pointer + offset, 0, size); } } @@ -1990,21 +2021,22 @@ class OptiXDevice : public Device { void mem_zero(device_memory &mem) override { - if (mem.host_pointer) - memset(mem.host_pointer, 0, mem.memory_size()); - - if (!mem.device_pointer) + if (!mem.device_pointer) { mem_alloc(mem); // Need to allocate memory first if it does not exist yet + } + if (!mem.device_pointer) { + return; + } - /* If use_mapped_host of mem is false, mem.device_pointer currently - * refers to device memory regardless of mem.host_pointer and - * mem.shared_pointer. */ - - if (mem.device_pointer && - (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { + /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory + * regardless of mem.host_pointer and mem.shared_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { const CUDAContextScope scope(cuda_context); check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); } + else if (mem.host_pointer) { + memset(mem.host_pointer, 0, mem.memory_size()); + } } void mem_free(device_memory &mem) override diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index 376ad06a734..8f15e8c8c1e 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size) if (type == SHADER) { num = min(shader_w, num); } - else if (type == RENDER) { + else if (type == RENDER || type == DENOISE) { } else { num = min(h, num); @@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size) tasks.push_back(task); } } - else if (type == RENDER) { + else if (type == RENDER || type == DENOISE) { for (int i = 0; i < num; i++) tasks.push_back(*this); } @@ -115,7 +115,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size) void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) { - if ((type != RENDER) && (type != SHADER)) + if (type == FILM_CONVERT) return; if (update_progress_sample) { diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 1b1e97cdb10..0f718528b86 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -47,7 +47,7 @@ class DenoiseParams { int neighbor_frames; /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */ bool clamp_input; - /* Controls which passes the OptiX AI denoiser should use as input. */ + /* Passes handed over to the OptiX denoiser (default to color + albedo). */ int optix_input_passes; DenoiseParams() @@ -58,13 +58,13 @@ class DenoiseParams { relative_pca = false; neighbor_frames = 2; clamp_input = true; - optix_input_passes = 1; + optix_input_passes = 2; } }; class DeviceTask : public Task { public: - typedef enum { RENDER, FILM_CONVERT, SHADER } Type; + typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type; Type type; int x, y, w, h; @@ -81,7 +81,7 @@ class DeviceTask : public Task { int shader_filter; int shader_x, shader_w; - int passes_size; + RenderBuffers *buffers; explicit DeviceTask(Type type = RENDER); @@ -114,7 +114,6 @@ class DeviceTask : public Task { bool need_finish_queue; bool integrator_branched; - int2 requested_tile_size; protected: double last_update_time; diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 76f9ce7a18f..af40aa89db4 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -1308,13 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task) { flush_texture_buffers(); - if (task->type == DeviceTask::FILM_CONVERT) { - film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); - } - else if (task->type == DeviceTask::SHADER) { - shader(*task); - } - else if (task->type == DeviceTask::RENDER) { + if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) { RenderTile tile; DenoisingTask denoising(this, *task); @@ -1352,6 +1346,30 @@ void OpenCLDevice::thread_run(DeviceTask *task) kgbuffer.free(); } + else if (task->type == DeviceTask::SHADER) { + shader(*task); + } + else if (task->type == DeviceTask::FILM_CONVERT) { + film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); + } + else if (task->type == DeviceTask::DENOISE_BUFFER) { + RenderTile tile; + tile.x = task->x; + tile.y = task->y; + tile.w = task->w; + tile.h = task->h; + tile.buffer = task->buffer; + tile.sample = task->sample + task->num_samples; + tile.num_samples = task->num_samples; + tile.start_sample = task->sample; + tile.offset = task->offset; + tile.stride = task->stride; + tile.buffers = task->buffers; + + DenoisingTask denoising(this, *task); + denoise(tile, denoising); + task->update_progress(&tile, tile.w * tile.h); + } } void OpenCLDevice::film_convert(DeviceTask &task, diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index fbb773533ce..22fd5ea5634 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -57,9 +57,9 @@ kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int st if (num_inputs > 0) { float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float); float *out = rgb + (x + y * sw) * 3; - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; + out[0] = clamp(in[0], 0.0f, 10000.0f); + out[1] = clamp(in[1], 0.0f, 10000.0f); + out[2] = clamp(in[2], 0.0f, 10000.0f); } if (num_inputs > 1) { float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float); diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index b08b6c84c1d..41e1b73fdac 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -146,7 +146,7 @@ void RenderBuffers::reset(BufferParams ¶ms_) params = params_; /* re-allocate buffer */ - buffer.alloc(params.width * params.height * params.get_passes_size()); + buffer.alloc(params.width * params.get_passes_size(), params.height); buffer.zero_to_device(); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index dac7fbac806..160b77d5f14 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -183,7 +183,8 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param if (gpu_draw_ready) { /* then verify the buffers have the expected size, so we don't * draw previous results in a resized window */ - if (!buffer_params.modified(display->params)) { + if (buffer_params.width == display->params.width && + buffer_params.height == display->params.height) { /* for CUDA we need to do tone-mapping still, since we can * only access GL buffers from the main thread. */ if (gpu_need_display_buffer_update) { @@ -211,6 +212,7 @@ void Session::run_gpu() reset_time = time_dt(); last_update_time = time_dt(); + last_display_time = last_update_time; progress.set_render_start_time(); @@ -291,12 +293,21 @@ void Session::run_gpu() * reset and draw in between */ thread_scoped_lock buffers_lock(buffers_mutex); + /* avoid excessive denoising in viewport after reaching a certain amount of samples */ + bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 || + (time_dt() - last_display_time) >= params.progressive_update_timeout; + /* update status and timing */ update_status_time(); /* render */ render(); + /* denoise */ + if (need_denoise) { + denoise(); + } + device->task_wait(); if (!device->error_message().empty()) @@ -305,7 +316,7 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); - gpu_need_display_buffer_update = true; + gpu_need_display_buffer_update = need_denoise || !params.run_denoising; gpu_draw_ready = true; progress.set_update(); @@ -359,7 +370,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param if (display->draw_ready()) { /* then verify the buffers have the expected size, so we don't * draw previous results in a resized window */ - if (!buffer_params.modified(display->params)) { + if (buffer_params.width == display->params.width && + buffer_params.height == display->params.height) { display->draw(device, draw_params); if (display_outdated && (time_dt() - reset_time) > params.text_timeout) @@ -372,7 +384,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param return false; } -bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) +bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task) { if (progress.get_cancel()) { if (params.progressive_refine == false) { @@ -387,8 +399,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) Tile *tile; int device_num = device->device_number(tile_device); - if (!tile_manager.next_tile(tile, device_num)) + while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) { + /* Wait for denoising tiles to become available */ + if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) { + denoising_cond.wait(tile_lock); + continue; + } return false; + } /* fill render tile */ rtile.x = tile_manager.state.buffer.full_x + tile->x; @@ -399,7 +417,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; rtile.tile_index = tile->index; - rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE; + rtile.task = task; tile_lock.unlock(); @@ -413,6 +431,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) device->map_tile(tile_device, rtile); + /* Reset copy state, since buffer contents change after the tile was acquired */ + buffers->map_neighbor_copied = false; + return true; } @@ -429,6 +450,8 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) tile->buffers->reset(buffer_params); } + tile->buffers->map_neighbor_copied = false; + tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); rtile.buffer = tile->buffers->buffer.device_pointer; @@ -484,45 +507,75 @@ void Session::release_tile(RenderTile &rtile) } update_status_time(); + + /* Notify denoising thread that a tile was finished. */ + denoising_cond.notify_all(); } void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) { thread_scoped_lock tile_lock(tile_mutex); - int center_idx = tiles[4].tile_index; - assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); - BufferParams buffer_params = tile_manager.params; - int4 image_region = make_int4(buffer_params.full_x, - buffer_params.full_y, - buffer_params.full_x + buffer_params.width, - buffer_params.full_y + buffer_params.height); - - for (int dy = -1, i = 0; dy <= 1; dy++) { - for (int dx = -1; dx <= 1; dx++, i++) { - int px = tiles[4].x + dx * params.tile_size.x; - int py = tiles[4].y + dy * params.tile_size.y; - if (px >= image_region.x && py >= image_region.y && px < image_region.z && - py < image_region.w) { - int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx; - Tile *tile = &tile_manager.state.tiles[tile_index]; - assert(tile->buffers); - - tiles[i].buffer = tile->buffers->buffer.device_pointer; - tiles[i].x = tile_manager.state.buffer.full_x + tile->x; - tiles[i].y = tile_manager.state.buffer.full_y + tile->y; - tiles[i].w = tile->w; - tiles[i].h = tile->h; - tiles[i].buffers = tile->buffers; - - tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); - } - else { - tiles[i].buffer = (device_ptr)NULL; - tiles[i].buffers = NULL; - tiles[i].x = clamp(px, image_region.x, image_region.z); - tiles[i].y = clamp(py, image_region.y, image_region.w); - tiles[i].w = tiles[i].h = 0; + const int4 image_region = make_int4( + tile_manager.state.buffer.full_x, + tile_manager.state.buffer.full_y, + tile_manager.state.buffer.full_x + tile_manager.state.buffer.width, + tile_manager.state.buffer.full_y + tile_manager.state.buffer.height); + + if (!tile_manager.schedule_denoising) { + /* Fix up tile slices with overlap. */ + if (tile_manager.slice_overlap != 0) { + int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y); + tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y; + tiles[4].y = y; + } + + /* Tiles are not being denoised individually, which means the entire image is processed. */ + tiles[3].x = tiles[4].x; + tiles[1].y = tiles[4].y; + tiles[5].x = tiles[4].x + tiles[4].w; + tiles[7].y = tiles[4].y + tiles[4].h; + } + else { + int center_idx = tiles[4].tile_index; + assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); + + for (int dy = -1, i = 0; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++, i++) { + int nindex = tile_manager.get_neighbor_index(center_idx, i); + if (nindex >= 0) { + Tile *tile = &tile_manager.state.tiles[nindex]; + + tiles[i].x = image_region.x + tile->x; + tiles[i].y = image_region.y + tile->y; + tiles[i].w = tile->w; + tiles[i].h = tile->h; + + if (buffers) { + tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride); + + tiles[i].buffer = buffers->buffer.device_pointer; + tiles[i].buffers = buffers; + } + else { + assert(tile->buffers); + tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); + + tiles[i].buffer = tile->buffers->buffer.device_pointer; + tiles[i].buffers = tile->buffers; + } + } + else { + int px = tiles[4].x + dx * params.tile_size.x; + int py = tiles[4].y + dy * params.tile_size.y; + + tiles[i].x = clamp(px, image_region.x, image_region.z); + tiles[i].y = clamp(py, image_region.y, image_region.w); + tiles[i].w = tiles[i].h = 0; + + tiles[i].buffer = (device_ptr)NULL; + tiles[i].buffers = NULL; + } } } } @@ -545,6 +598,7 @@ void Session::run_cpu() bool tiles_written = false; last_update_time = time_dt(); + last_display_time = last_update_time; { /* reset once to start */ @@ -620,11 +674,6 @@ void Session::run_cpu() } if (!no_tiles) { - /* buffers mutex is locked entirely while rendering each - * sample, and released/reacquired on each iteration to allow - * reset and draw in between */ - thread_scoped_lock buffers_lock(buffers_mutex); - /* update scene */ scoped_timer update_timer; if (update_scene()) { @@ -638,17 +687,31 @@ void Session::run_cpu() if (progress.get_cancel()) break; + /* buffers mutex is locked entirely while rendering each + * sample, and released/reacquired on each iteration to allow + * reset and draw in between */ + thread_scoped_lock buffers_lock(buffers_mutex); + + /* avoid excessive denoising in viewport after reaching a certain amount of samples */ + bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 || + (time_dt() - last_display_time) >= params.progressive_update_timeout; + /* update status and timing */ update_status_time(); /* render */ render(); + /* denoise */ + if (need_denoise) { + denoise(); + } + /* update status and timing */ update_status_time(); if (!params.background) - need_copy_to_display_buffer = true; + need_copy_to_display_buffer = need_denoise || !params.run_denoising; if (!device->error_message().empty()) progress.set_error(device->error_message()); @@ -869,6 +932,20 @@ void Session::set_pause(bool pause_) pause_cond.notify_all(); } +void Session::set_denoising(bool denoising, bool optix_denoising) +{ + /* Lock buffers so no denoising operation is triggered while the settings are changed here. */ + thread_scoped_lock buffers_lock(buffers_mutex); + + params.run_denoising = denoising; + params.full_denoising = !optix_denoising; + params.optix_denoising = optix_denoising; + + // TODO(pmours): Query the required overlap value for denoising from the device? + tile_manager.slice_overlap = denoising && !params.background ? 64 : 0; + tile_manager.schedule_denoising = denoising && !buffers; +} + void Session::wait() { if (session_thread) { @@ -1016,33 +1093,74 @@ void Session::render() /* Add path trace task. */ DeviceTask task(DeviceTask::RENDER); - task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); + task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::PATH_TRACE); task.release_tile = function_bind(&Session::release_tile, this, _1); - task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); - task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; - task.requested_tile_size = params.tile_size; - task.passes_size = tile_manager.params.get_passes_size(); - if (params.run_denoising) { - task.denoising = params.denoising; - - assert(!scene->film->need_update); - task.pass_stride = scene->film->pass_stride; - task.target_pass_stride = task.pass_stride; - task.pass_denoising_data = scene->film->denoising_data_offset; - task.pass_denoising_clean = scene->film->denoising_clean_offset; - - task.denoising_from_render = true; - task.denoising_do_filter = params.full_denoising; - task.denoising_use_optix = params.optix_denoising; - task.denoising_write_passes = params.write_denoising_passes; + device->task_add(task); +} + +void Session::denoise() +{ + if (!params.run_denoising) { + return; + } + + /* It can happen that denoising was already enabled, but the scene still needs an update. */ + if (scene->film->need_update || !scene->film->denoising_data_offset) { + return; } + /* Add separate denoising task. */ + DeviceTask task(DeviceTask::DENOISE); + + if (tile_manager.schedule_denoising) { + /* Run denoising on each tile. */ + task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::DENOISE); + task.release_tile = function_bind(&Session::release_tile, this, _1); + task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); + task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); + } + else { + assert(buffers); + + /* Wait for rendering to finish. */ + device->task_wait(); + + /* Run denoising on the whole image at once. */ + task.type = DeviceTask::DENOISE_BUFFER; + task.x = tile_manager.state.buffer.full_x; + task.y = tile_manager.state.buffer.full_y; + task.w = tile_manager.state.buffer.width; + task.h = tile_manager.state.buffer.height; + task.buffer = buffers->buffer.device_pointer; + task.sample = tile_manager.state.sample; + task.num_samples = tile_manager.state.num_samples; + tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); + task.buffers = buffers; + } + + task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); + task.need_finish_queue = params.progressive_refine; + task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); + task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); + + task.denoising = params.denoising; + + task.pass_stride = scene->film->pass_stride; + task.target_pass_stride = task.pass_stride; + task.pass_denoising_data = scene->film->denoising_data_offset; + task.pass_denoising_clean = scene->film->denoising_clean_offset; + + task.denoising_from_render = true; + task.denoising_do_filter = params.full_denoising; + task.denoising_use_optix = params.optix_denoising; + task.denoising_write_passes = params.write_denoising_passes; + device->task_add(task); } @@ -1067,6 +1185,8 @@ void Session::copy_to_display_buffer(int sample) /* set display to new size */ display->draw_set(task.w, task.h); + + last_display_time = time_dt(); } display_outdated = false; diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index ec465601541..3ef2b70879a 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -154,6 +154,7 @@ class Session { void reset(BufferParams ¶ms, int samples); void set_samples(int samples); void set_pause(bool pause); + void set_denoising(bool denoising, bool optix_denoising); bool update_scene(); bool load_kernels(bool lock_scene = true); @@ -178,8 +179,10 @@ class Session { void update_status_time(bool show_pause = false, bool show_done = false); - void copy_to_display_buffer(int sample); void render(); + void denoise(); + void copy_to_display_buffer(int sample); + void reset_(BufferParams ¶ms, int samples); void run_cpu(); @@ -190,7 +193,7 @@ class Session { bool draw_gpu(BufferParams ¶ms, DeviceDrawParams &draw_params); void reset_gpu(BufferParams ¶ms, int samples); - bool acquire_tile(Device *tile_device, RenderTile &tile); + bool acquire_tile(Device *tile_device, RenderTile &tile, RenderTile::Task task); void update_tile_sample(RenderTile &tile); void release_tile(RenderTile &tile); @@ -213,14 +216,16 @@ class Session { thread_mutex tile_mutex; thread_mutex buffers_mutex; thread_mutex display_mutex; + thread_condition_variable denoising_cond; bool kernels_loaded; DeviceRequestedFeatures loaded_kernel_features; double reset_time; + double last_update_time; + double last_display_time; /* progressive refine */ - double last_update_time; bool update_progressive_refine(bool cancel); DeviceRequestedFeatures get_requested_device_features(); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 9ef0c695667..4ddfd56cd01 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_, tile_order = tile_order_; start_resolution = start_resolution_; pixel_size = pixel_size_; + slice_overlap = 0; num_samples = num_samples_; num_devices = num_devices_; preserve_tile_device = preserve_tile_device_; @@ -201,8 +202,7 @@ int TileManager::gen_tiles(bool sliced) int image_h = max(1, params.height / resolution); int2 center = make_int2(image_w / 2, image_h / 2); - int num_logical_devices = preserve_tile_device ? num_devices : 1; - int num = min(image_h, num_logical_devices); + int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1; int slice_num = sliced ? num : 1; int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); @@ -216,7 +216,7 @@ int TileManager::gen_tiles(bool sliced) tile_list = state.render_tiles.begin(); if (tile_order == TILE_HILBERT_SPIRAL) { - assert(!sliced); + assert(!sliced && slice_overlap == 0); int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); state.tiles.resize(tile_w * tile_h); @@ -319,6 +319,12 @@ int TileManager::gen_tiles(bool sliced) int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) : image_h / slice_num; + if (slice_overlap != 0) { + int slice_y_offset = max(slice_y - slice_overlap, 0); + slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset; + slice_y = slice_y_offset; + } + int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); int tiles_per_device = divide_up(tile_w * tile_h, num); @@ -363,6 +369,7 @@ void TileManager::gen_render_tiles() { /* Regenerate just the render tiles for progressive render. */ foreach (Tile &tile, state.tiles) { + tile.state = Tile::RENDER; state.render_tiles[tile.device].push_back(tile.index); } } @@ -386,17 +393,29 @@ void TileManager::set_tiles() int TileManager::get_neighbor_index(int index, int neighbor) { - static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; + /* Neighbor indices: + * 0 1 2 + * 3 4 5 + * 6 7 8 + */ + static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1}; + static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1}; int resolution = state.resolution_divider; int image_w = max(1, params.width / resolution); int image_h = max(1, params.height / resolution); + + int num = min(image_h, num_devices); + int slice_num = !background ? num : 1; + int slice_h = image_h / slice_num; + int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); - int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); + int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); - int nx = state.tiles[index].x / tile_size.x + dx[neighbor], - ny = state.tiles[index].y / tile_size.y + dy[neighbor]; - if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h) + /* Tiles in the state tile list are always indexed from left to right, top to bottom. */ + int nx = (index % tile_w) + dx[neighbor]; + int ny = (index / tile_w) + dy[neighbor]; + if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num) return -1; return ny * state.tile_stride + nx; @@ -426,15 +445,11 @@ bool TileManager::finish_tile(int index, bool &delete_tile) { delete_tile = false; - if (progressive) { - return true; - } - switch (state.tiles[index].state) { case Tile::RENDER: { if (!schedule_denoising) { state.tiles[index].state = Tile::DONE; - delete_tile = true; + delete_tile = !progressive; return true; } state.tiles[index].state = Tile::RENDERED; @@ -457,15 +472,18 @@ bool TileManager::finish_tile(int index, bool &delete_tile) int nindex = get_neighbor_index(index, neighbor); if (check_neighbor_state(nindex, Tile::DENOISED)) { state.tiles[nindex].state = Tile::DONE; - /* It can happen that the tile just finished denoising and already can be freed here. - * However, in that case it still has to be written before deleting, so we can't delete - * it yet. */ - if (neighbor == 8) { - delete_tile = true; - } - else { - delete state.tiles[nindex].buffers; - state.tiles[nindex].buffers = NULL; + /* Do not delete finished tiles in progressive mode. */ + if (!progressive) { + /* It can happen that the tile just finished denoising and already can be freed here. + * However, in that case it still has to be written before deleting, so we can't delete + * it yet. */ + if (neighbor == 4) { + delete_tile = true; + } + else { + delete state.tiles[nindex].buffers; + state.tiles[nindex].buffers = NULL; + } } } } @@ -477,27 +495,56 @@ bool TileManager::finish_tile(int index, bool &delete_tile) } } -bool TileManager::next_tile(Tile *&tile, int device) +bool TileManager::next_tile(Tile *&tile, int device, bool denoising) { - int logical_device = preserve_tile_device ? device : 0; + /* Preserve device if requested, unless this is a separate denoising device that just wants to + * grab any available tile. */ + const bool preserve_device = preserve_tile_device && device < num_devices; + + int tile_index = -1; + int logical_device = preserve_device ? device : 0; + + if (denoising) { + while (logical_device < state.denoising_tiles.size()) { + if (state.denoising_tiles[logical_device].empty()) { + if (preserve_device) { + return false; + } + else { + logical_device++; + continue; + } + } - if (logical_device >= state.render_tiles.size()) - return false; + tile_index = state.denoising_tiles[logical_device].front(); + state.denoising_tiles[logical_device].pop_front(); + break; + } + } + else { + while (logical_device < state.render_tiles.size()) { + if (state.render_tiles[logical_device].empty()) { + if (preserve_device) { + return false; + } + else { + logical_device++; + continue; + } + } - if (!state.denoising_tiles[logical_device].empty()) { - int idx = state.denoising_tiles[logical_device].front(); - state.denoising_tiles[logical_device].pop_front(); - tile = &state.tiles[idx]; - return true; + tile_index = state.render_tiles[logical_device].front(); + state.render_tiles[logical_device].pop_front(); + break; + } } - if (state.render_tiles[logical_device].empty()) - return false; + if (tile_index >= 0) { + tile = &state.tiles[tile_index]; + return true; + } - int idx = state.render_tiles[logical_device].front(); - state.render_tiles[logical_device].pop_front(); - tile = &state.tiles[idx]; - return true; + return false; } bool TileManager::done() @@ -508,6 +555,16 @@ bool TileManager::done() (state.sample + state.num_samples >= end_sample); } +bool TileManager::has_tiles() +{ + foreach (Tile &tile, state.tiles) { + if (tile.state != Tile::DONE) { + return true; + } + } + return false; +} + bool TileManager::next() { if (done()) diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 017c1af0ead..14c693683c4 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -89,6 +89,7 @@ class TileManager { } state; int num_samples; + int slice_overlap; TileManager(bool progressive, int num_samples, @@ -105,15 +106,19 @@ class TileManager { void reset(BufferParams ¶ms, int num_samples); void set_samples(int num_samples); bool next(); - bool next_tile(Tile *&tile, int device = 0); + bool next_tile(Tile *&tile, int device, bool denoising); bool finish_tile(int index, bool &delete_tile); bool done(); + bool has_tiles(); void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } + int get_neighbor_index(int index, int neighbor); + bool check_neighbor_state(int index, Tile::State state); + /* ** Sample range rendering. ** */ /* Start sample in the range. */ @@ -160,9 +165,6 @@ class TileManager { /* Generate tile list, return number of tiles. */ int gen_tiles(bool sliced); void gen_render_tiles(); - - int get_neighbor_index(int index, int neighbor); - bool check_neighbor_state(int index, Tile::State state); }; CCL_NAMESPACE_END |