From 8393ccd07634b3152b18d4d527b1460dab9dbe06 Mon Sep 17 00:00:00 2001
From: Patrick Mours <pmours@nvidia.com>
Date: Tue, 4 Jan 2022 21:39:54 +0100
Subject: Cycles: Add OptiX temporal denoising support

Enables the `bpy.ops.cycles.denoise_animation()` operator again and modifies it to support
temporal denoising with OptiX. This requires renders that were done with both the "Vector"
and "Denoising Data" passes.

Differential Revision: https://developer.blender.org/D11442
---
 intern/cycles/blender/python.cpp           |  34 +-
 intern/cycles/blender/sync.h               |   2 +-
 intern/cycles/device/denoise.cpp           |   2 +
 intern/cycles/device/denoise.h             |   6 +-
 intern/cycles/device/optix/device_impl.cpp |  81 ++++-
 intern/cycles/device/optix/device_impl.h   |   1 +
 intern/cycles/device/queue.h               |   5 +-
 intern/cycles/kernel/device/gpu/kernel.h   |  14 +-
 intern/cycles/kernel/types.h               |   1 +
 intern/cycles/scene/pass.cpp               |   5 +
 intern/cycles/session/denoising.cpp        | 529 ++++++++---------------------
 intern/cycles/session/denoising.h          |  76 ++---
 12 files changed, 285 insertions(+), 471 deletions(-)

(limited to 'intern')
diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp
index 024dae306b0..f509d5c2eeb 100644
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -735,27 +735,20 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat
 
 static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
 {
-#if 1
-  (void)args;
-  (void)keywords;
-#else
   static const char *keyword_list[] = {
-      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
+      "preferences", "scene", "view_layer", "input", "output", NULL};
   PyObject *pypreferences, *pyscene, *pyviewlayer;
   PyObject *pyinput, *pyoutput = NULL;
-  int tile_size = 0, samples = 0;
 
   if (!PyArg_ParseTupleAndKeywords(args,
                                    keywords,
-                                   "OOOO|Oii",
+                                   "OOOO|O",
                                    (char **)keyword_list,
                                    &pypreferences,
                                    &pyscene,
                                    &pyviewlayer,
                                    &pyinput,
-                                   &pyoutput,
-                                   &tile_size,
-                                   &samples)) {
+                                   &pyoutput)) {
     return NULL;
   }
 
@@ -777,14 +770,10 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
                      &RNA_ViewLayer,
                      PyLong_AsVoidPtr(pyviewlayer),
                      &viewlayerptr);
-  PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
+  BL::ViewLayer b_view_layer(viewlayerptr);
 
-  DenoiseParams params;
-  params.radius = get_int(cviewlayer, "denoising_radius");
-  params.strength = get_float(cviewlayer, "denoising_strength");
-  params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
-  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
-  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
+  DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true);
+  params.use = true;
 
   /* Parse file paths list. */
   vector<string> input, output;
@@ -812,24 +801,15 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
   }
 
   /* Create denoiser. */
-  DenoiserPipeline denoiser(device);
-  denoiser.params = params;
+  DenoiserPipeline denoiser(device, params);
   denoiser.input = input;
   denoiser.output = output;
 
-  if (tile_size > 0) {
-    denoiser.tile_size = make_int2(tile_size, tile_size);
-  }
-  if (samples > 0) {
-    denoiser.samples_override = samples;
-  }
-
   /* Run denoiser. */
   if (!denoiser.run()) {
     PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
     return NULL;
   }
-#endif
 
   Py_RETURN_NONE;
 }
diff --git a/intern/cycles/blender/sync.h b/intern/cycles/blender/sync.h
index d074f90bb1b..3722b938863 100644
--- a/intern/cycles/blender/sync.h
+++ b/intern/cycles/blender/sync.h
@@ -105,11 +105,11 @@ class BlenderSync {
   static BufferParams get_buffer_params(
       BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
 
- private:
   static DenoiseParams get_denoise_params(BL::Scene &b_scene,
                                           BL::ViewLayer &b_view_layer,
                                           bool background);
 
+ private:
   /* sync */
   void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
   void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
diff --git a/intern/cycles/device/denoise.cpp b/intern/cycles/device/denoise.cpp
index c291a7a0adb..8ae2bb213e4 100644
--- a/intern/cycles/device/denoise.cpp
+++ b/intern/cycles/device/denoise.cpp
@@ -76,6 +76,8 @@ NODE_DEFINE(DenoiseParams)
   SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
   SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
 
+  SOCKET_BOOLEAN(temporally_stable, "Temporally Stable", false);
+
   SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
 
   return type;
diff --git a/intern/cycles/device/denoise.h b/intern/cycles/device/denoise.h
index 3f30506ae06..07868527fc5 100644
--- a/intern/cycles/device/denoise.h
+++ b/intern/cycles/device/denoise.h
@@ -72,6 +72,9 @@ class DenoiseParams : public Node {
   bool use_pass_albedo = true;
   bool use_pass_normal = true;
 
+  /* Configure the denoiser to use motion vectors, previous image and a temporally stable model. */
+  bool temporally_stable = false;
+
   DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
 
   static const NodeEnum *get_type_enum();
@@ -83,7 +86,8 @@ class DenoiseParams : public Node {
   {
     return !(use == other.use && type == other.type && start_sample == other.start_sample &&
              use_pass_albedo == other.use_pass_albedo &&
-             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
+             use_pass_normal == other.use_pass_normal &&
+             temporally_stable == other.temporally_stable && prefilter == other.prefilter);
   }
 };
 
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 38cc3330ebd..009661b2dec 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -566,6 +566,19 @@ class OptiXDevice::DenoiseContext {
       }
     }
 
+    if (denoise_params.temporally_stable) {
+      prev_output.device_pointer = render_buffers->buffer.device_pointer;
+
+      prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+
+      prev_output.stride = buffer_params.stride;
+      prev_output.pass_stride = buffer_params.pass_stride;
+
+      num_input_passes += 1;
+      use_pass_flow = true;
+      pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
+    }
+
     use_guiding_passes = (num_input_passes - 1) > 0;
 
     if (use_guiding_passes) {
@@ -574,6 +587,7 @@ class OptiXDevice::DenoiseContext {
 
         guiding_params.pass_albedo = pass_denoising_albedo;
         guiding_params.pass_normal = pass_denoising_normal;
+        guiding_params.pass_flow = pass_motion;
 
         guiding_params.stride = buffer_params.stride;
         guiding_params.pass_stride = buffer_params.pass_stride;
@@ -588,6 +602,10 @@ class OptiXDevice::DenoiseContext {
           guiding_params.pass_normal = guiding_params.pass_stride;
           guiding_params.pass_stride += 3;
         }
+        if (use_pass_flow) {
+          guiding_params.pass_flow = guiding_params.pass_stride;
+          guiding_params.pass_stride += 2;
+        }
 
         guiding_params.stride = buffer_params.width;
 
@@ -605,6 +623,16 @@ class OptiXDevice::DenoiseContext {
   RenderBuffers *render_buffers = nullptr;
   const BufferParams &buffer_params;
 
+  /* Previous output. */
+  struct {
+    device_ptr device_pointer = 0;
+
+    int offset = PASS_UNUSED;
+
+    int stride = -1;
+    int pass_stride = -1;
+  } prev_output;
+
   /* Device-side storage of the guiding passes. */
   device_only_memory<float> guiding_buffer;
 
@@ -614,6 +642,7 @@ class OptiXDevice::DenoiseContext {
     /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
     int pass_albedo = PASS_UNUSED;
     int pass_normal = PASS_UNUSED;
+    int pass_flow = PASS_UNUSED;
 
     int stride = -1;
     int pass_stride = -1;
@@ -624,6 +653,7 @@ class OptiXDevice::DenoiseContext {
   bool use_guiding_passes = false;
   bool use_pass_albedo = false;
   bool use_pass_normal = false;
+  bool use_pass_flow = false;
 
   int num_samples = 0;
 
@@ -632,6 +662,7 @@ class OptiXDevice::DenoiseContext {
   /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
   int pass_denoising_albedo = PASS_UNUSED;
   int pass_denoising_normal = PASS_UNUSED;
+  int pass_motion = PASS_UNUSED;
 
   /* For passes which don't need albedo channel for denoising we replace the actual albedo with
    * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
@@ -702,6 +733,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
                              &context.guiding_params.pass_stride,
                              &context.guiding_params.pass_albedo,
                              &context.guiding_params.pass_normal,
+                             &context.guiding_params.pass_flow,
                              &context.render_buffers->buffer.device_pointer,
                              &buffer_params.offset,
                              &buffer_params.stride,
@@ -709,6 +741,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
                              &context.pass_sample_count,
                              &context.pass_denoising_albedo,
                              &context.pass_denoising_normal,
+                             &context.pass_motion,
                              &buffer_params.full_x,
                              &buffer_params.full_y,
                              &buffer_params.width,
@@ -881,7 +914,8 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
 {
   const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
                                  (denoiser_.use_pass_albedo != context.use_pass_albedo) ||
-                                 (denoiser_.use_pass_normal != context.use_pass_normal);
+                                 (denoiser_.use_pass_normal != context.use_pass_normal) ||
+                                 (denoiser_.use_pass_flow != context.use_pass_flow);
   if (!recreate_denoiser) {
     return true;
   }
@@ -895,8 +929,14 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
   OptixDenoiserOptions denoiser_options = {};
   denoiser_options.guideAlbedo = context.use_pass_albedo;
   denoiser_options.guideNormal = context.use_pass_normal;
+
+  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
+  if (context.use_pass_flow) {
+    model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
+  }
+
   const OptixResult result = optixDenoiserCreate(
-      this->context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser_.optix_denoiser);
+      this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
 
   if (result != OPTIX_SUCCESS) {
     set_error("Failed to create OptiX denoiser");
@@ -906,6 +946,7 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
   /* OptiX denoiser handle was created with the requested number of input passes. */
   denoiser_.use_pass_albedo = context.use_pass_albedo;
   denoiser_.use_pass_normal = context.use_pass_normal;
+  denoiser_.use_pass_flow = context.use_pass_flow;
 
   /* OptiX denoiser has been created, but it needs configuration. */
   denoiser_.is_configured = false;
@@ -965,8 +1006,10 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
   OptixImage2D color_layer = {0};
   OptixImage2D albedo_layer = {0};
   OptixImage2D normal_layer = {0};
+  OptixImage2D flow_layer = {0};
 
   OptixImage2D output_layer = {0};
+  OptixImage2D prev_output_layer = {0};
 
   /* Color pass. */
   {
@@ -982,6 +1025,19 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
     color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
   }
 
+  /* Previous output. */
+  if (context.prev_output.offset != PASS_UNUSED) {
+    const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
+
+    prev_output_layer.data = context.prev_output.device_pointer +
+                             context.prev_output.offset * sizeof(float);
+    prev_output_layer.width = width;
+    prev_output_layer.height = height;
+    prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
+    prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
+    prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+  }
+
   /* Optional albedo and color passes. */
   if (context.num_input_passes > 1) {
     const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@@ -1005,21 +1061,32 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
       normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
       normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
     }
+
+    if (context.use_pass_flow) {
+      flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
+      flow_layer.width = width;
+      flow_layer.height = height;
+      flow_layer.rowStrideInBytes = row_stride_in_bytes;
+      flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+      flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
+    }
   }
 
   /* Denoise in-place of the noisy input in the render buffers. */
   output_layer = color_layer;
 
-  /* Finally run denoising. */
-  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
+  OptixDenoiserGuideLayer guide_layers = {};
+  guide_layers.albedo = albedo_layer;
+  guide_layers.normal = normal_layer;
+  guide_layers.flow = flow_layer;
 
   OptixDenoiserLayer image_layers = {};
   image_layers.input = color_layer;
+  image_layers.previousOutput = prev_output_layer;
   image_layers.output = output_layer;
 
-  OptixDenoiserGuideLayer guide_layers = {};
-  guide_layers.albedo = albedo_layer;
-  guide_layers.normal = normal_layer;
+  /* Finally run denoising. */
+  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
 
   optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
                                             denoiser_.queue.stream(),
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 25073c60e69..a1865527c2d 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -104,6 +104,7 @@ class OptiXDevice : public CUDADevice {
 
     bool use_pass_albedo = false;
     bool use_pass_normal = false;
+    bool use_pass_flow = false;
   };
   Denoiser denoiser_;
 
diff --git a/intern/cycles/device/queue.h b/intern/cycles/device/queue.h
index 4e9f41f7875..926b7cba78a 100644
--- a/intern/cycles/device/queue.h
+++ b/intern/cycles/device/queue.h
@@ -19,6 +19,7 @@
 #include "device/kernel.h"
 
 #include "device/graphics_interop.h"
+#include "util/debug.h"
 #include "util/log.h"
 #include "util/map.h"
 #include "util/string.h"
@@ -42,7 +43,7 @@ struct DeviceKernelArguments {
     KERNEL_FILM_CONVERT,
   };
 
-  static const int MAX_ARGS = 16;
+  static const int MAX_ARGS = 18;
   Type types[MAX_ARGS];
   void *values[MAX_ARGS];
   size_t sizes[MAX_ARGS];
@@ -85,6 +86,8 @@ struct DeviceKernelArguments {
   }
   void add(const Type type, const void *value, size_t size)
   {
+    assert(count < MAX_ARGS);
+
     types[count] = type;
     values[count] = (void *)value;
     sizes[count] = size;
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index b50f492e8c7..027b2a7a8c7 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -756,6 +756,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
                              int guiding_pass_stride,
                              int guiding_pass_albedo,
                              int guiding_pass_normal,
+                             int guiding_pass_flow,
                              ccl_global const float *render_buffer,
                              int render_offset,
                              int render_stride,
@@ -763,6 +764,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
                              int render_pass_sample_count,
                              int render_pass_denoising_albedo,
                              int render_pass_denoising_normal,
+                             int render_pass_motion,
                              int full_x,
                              int full_y,
                              int width,
@@ -814,6 +816,17 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
     normal_out[1] = normal_in[1] * pixel_scale;
     normal_out[2] = normal_in[2] * pixel_scale;
   }
+
+  /* Flow pass. */
+  if (guiding_pass_flow != PASS_UNUSED) {
+    kernel_assert(render_pass_motion != PASS_UNUSED);
+
+    const float *motion_in = buffer + render_pass_motion;
+    float *flow_out = guiding_pixel + guiding_pass_flow;
+
+    flow_out[0] = -motion_in[0] * pixel_scale;
+    flow_out[1] = -motion_in[1] * pixel_scale;
+  }
 }
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@@ -899,7 +912,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
   else {
     /* Assigning to zero since this is a default alpha value for 3-component passes, and it
      * is an opaque pixel for 4 component passes. */
-
     denoised_pixel[3] = 0;
   }
 }
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 1d0537f9547..5d41abb53c4 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -388,6 +388,7 @@ typedef enum PassType {
   PASS_DENOISING_NORMAL,
   PASS_DENOISING_ALBEDO,
   PASS_DENOISING_DEPTH,
+  PASS_DENOISING_PREVIOUS,
 
   /* PASS_SHADOW_CATCHER accumulates contribution of shadow catcher object which is not affected by
    * any other object. The pass accessor will divide the combined pass by the shadow catcher. The
diff --git a/intern/cycles/scene/pass.cpp b/intern/cycles/scene/pass.cpp
index a885ede50a4..ca5687e6b4d 100644
--- a/intern/cycles/scene/pass.cpp
+++ b/intern/cycles/scene/pass.cpp
@@ -101,6 +101,7 @@ const NodeEnum *Pass::get_type_enum()
     pass_type_enum.insert("denoising_normal", PASS_DENOISING_NORMAL);
     pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO);
     pass_type_enum.insert("denoising_depth", PASS_DENOISING_DEPTH);
+    pass_type_enum.insert("denoising_previous", PASS_DENOISING_PREVIOUS);
 
     pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER);
     pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT);
@@ -299,6 +300,10 @@ PassInfo Pass::get_info(const PassType type, const bool include_albedo)
     case PASS_DENOISING_DEPTH:
       pass_info.num_components = 1;
       break;
+    case PASS_DENOISING_PREVIOUS:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      break;
 
     case PASS_SHADOW_CATCHER:
       pass_info.num_components = 3;
diff --git a/intern/cycles/session/denoising.cpp b/intern/cycles/session/denoising.cpp
index 21df068092a..91a18928bd2 100644
--- a/intern/cycles/session/denoising.cpp
+++ b/intern/cycles/session/denoising.cpp
@@ -16,17 +16,12 @@
 
 #include "session/denoising.h"
 
-#if 0
+#include "util/map.h"
+#include "util/system.h"
+#include "util/task.h"
+#include "util/time.h"
 
-#  include "kernel/filter/filter_defines.h"
-
-#  include "util/util_foreach.h"
-#  include "util/util_map.h"
-#  include "util/util_system.h"
-#  include "util/util_task.h"
-#  include "util/util_time.h"
-
-#  include <OpenImageIO/filesystem.h>
+#include <OpenImageIO/filesystem.h>
 
 CCL_NAMESPACE_BEGIN
 
@@ -125,24 +120,18 @@ static void fill_mapping(vector<ChannelMapping> &map, int pos, string name, stri
   }
 }
 
-static const int INPUT_NUM_CHANNELS = 15;
-static const int INPUT_DENOISING_DEPTH = 0;
-static const int INPUT_DENOISING_NORMAL = 1;
-static const int INPUT_DENOISING_SHADOWING = 4;
-static const int INPUT_DENOISING_ALBEDO = 5;
-static const int INPUT_NOISY_IMAGE = 8;
-static const int INPUT_DENOISING_VARIANCE = 11;
-static const int INPUT_DENOISING_INTENSITY = 14;
+static const int INPUT_NUM_CHANNELS = 13;
+static const int INPUT_NOISY_IMAGE = 0;
+static const int INPUT_DENOISING_NORMAL = 3;
+static const int INPUT_DENOISING_ALBEDO = 6;
+static const int INPUT_MOTION = 9;
 static vector<ChannelMapping> input_channels()
 {
   vector<ChannelMapping> map;
-  fill_mapping(map, INPUT_DENOISING_DEPTH, "Denoising Depth", "Z");
+  fill_mapping(map, INPUT_NOISY_IMAGE, "Combined", "RGB");
   fill_mapping(map, INPUT_DENOISING_NORMAL, "Denoising Normal", "XYZ");
-  fill_mapping(map, INPUT_DENOISING_SHADOWING, "Denoising Shadowing", "X");
   fill_mapping(map, INPUT_DENOISING_ALBEDO, "Denoising Albedo", "RGB");
-  fill_mapping(map, INPUT_NOISY_IMAGE, "Noisy Image", "RGB");
-  fill_mapping(map, INPUT_DENOISING_VARIANCE, "Denoising Variance", "RGB");
-  fill_mapping(map, INPUT_DENOISING_INTENSITY, "Denoising Intensity", "X");
+  fill_mapping(map, INPUT_MOTION, "Vector", "XYZW");
   return map;
 }
 
@@ -162,7 +151,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
   input_to_image_channel.clear();
   input_to_image_channel.resize(INPUT_NUM_CHANNELS, -1);
 
-  foreach (const ChannelMapping &mapping, input_channels()) {
+  for (const ChannelMapping &mapping : input_channels()) {
     vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
     if (i == channels.end()) {
       return false;
@@ -177,7 +166,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
   output_to_image_channel.clear();
   output_to_image_channel.resize(OUTPUT_NUM_CHANNELS, -1);
 
-  foreach (const ChannelMapping &mapping, output_channels()) {
+  for (const ChannelMapping &mapping : output_channels()) {
     vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
     if (i == channels.end()) {
       return false;
@@ -199,18 +188,16 @@ bool DenoiseImageLayer::detect_denoising_channels()
   return true;
 }
 
-bool DenoiseImageLayer::match_channels(int neighbor,
-                                       const std::vector<string> &channelnames,
+bool DenoiseImageLayer::match_channels(const std::vector<string> &channelnames,
                                        const std::vector<string> &neighbor_channelnames)
 {
-  neighbor_input_to_image_channel.resize(neighbor + 1);
-  vector<int> &mapping = neighbor_input_to_image_channel[neighbor];
+  vector<int> &mapping = previous_output_to_image_channel;
 
   assert(mapping.size() == 0);
-  mapping.resize(input_to_image_channel.size(), -1);
+  mapping.resize(output_to_image_channel.size(), -1);
 
-  for (int i = 0; i < input_to_image_channel.size(); i++) {
-    const string &channel = channelnames[input_to_image_channel[i]];
+  for (int i = 0; i < output_to_image_channel.size(); i++) {
+    const string &channel = channelnames[output_to_image_channel[i]];
     std::vector<string>::const_iterator frame_channel = find(
         neighbor_channelnames.begin(), neighbor_channelnames.end(), channel);
 
@@ -226,19 +213,9 @@ bool DenoiseImageLayer::match_channels(int neighbor,
 
 /* Denoise Task */
 
-DenoiseTask::DenoiseTask(Device *device,
-                         DenoiserPipeline *denoiser,
-                         int frame,
-                         const vector<int> &neighbor_frames)
-    : denoiser(denoiser),
-      device(device),
-      frame(frame),
-      neighbor_frames(neighbor_frames),
-      current_layer(0),
-      input_pixels(device, "filter input buffer", MEM_READ_ONLY),
-      num_tiles(0)
+DenoiseTask::DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame)
+    : denoiser(denoiser), device(device), frame(frame), current_layer(0), buffers(device)
 {
-  image.samples = denoiser->samples_override;
 }
 
 DenoiseTask::~DenoiseTask()
@@ -246,284 +223,39 @@ DenoiseTask::~DenoiseTask()
   free();
 }
 
-/* Device callbacks */
-
-bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &tile)
-{
-  thread_scoped_lock tile_lock(tiles_mutex);
-
-  if (tiles.empty()) {
-    return false;
-  }
-
-  tile = tiles.front();
-  tiles.pop_front();
-
-  device->map_tile(tile_device, tile);
-
-  print_progress(num_tiles - tiles.size(), num_tiles, frame, denoiser->num_frames);
-
-  return true;
-}
-
-/* Mapping tiles is required for regular rendering since each tile has its separate memory
- * which may be allocated on a different device.
- * For standalone denoising, there is a single memory that is present on all devices, so the only
- * thing that needs to be done here is to specify the surrounding tile geometry.
- *
- * However, since there is only one large memory, the denoised result has to be written to
- * a different buffer to avoid having to copy an entire horizontal slice of the image. */
-void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
-{
-  RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
-  RenderTile &target_tile = neighbors.target;
-
-  /* Fill tile information. */
-  for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
-    if (i == RenderTileNeighbors::CENTER) {
-      continue;
-    }
-
-    RenderTile &tile = neighbors.tiles[i];
-    int dx = (i % 3) - 1;
-    int dy = (i / 3) - 1;
-    tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
-    tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
-    tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
-    tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
-
-    tile.buffer = center_tile.buffer;
-    tile.offset = center_tile.offset;
-    tile.stride = image.width;
-  }
-
-  /* Allocate output buffer. */
-  device_vector<float> *output_mem = new device_vector<float>(
-      tile_device, "denoising_output", MEM_READ_WRITE);
-  output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
-
-  /* Fill output buffer with noisy image, assumed by kernel_filter_finalize
-   * when skipping denoising of some pixels. */
-  float *result = output_mem->data();
-  float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
-
-  const DenoiseImageLayer &layer = image.layers[current_layer];
-  const int *input_to_image_channel = layer.input_to_image_channel.data();
-
-  for (int y = 0; y < center_tile.h; y++) {
-    for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
-      for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
-        result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
-      }
-    }
-    in += image.num_channels * image.width;
-  }
-
-  output_mem->copy_to_device();
-
-  /* Fill output tile info. */
-  target_tile = center_tile;
-  target_tile.buffer = output_mem->device_pointer;
-  target_tile.stride = target_tile.w;
-  target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
-
-  thread_scoped_lock output_lock(output_mutex);
-  assert(output_pixels.count(center_tile.tile_index) == 0);
-  output_pixels[target_tile.tile_index] = output_mem;
-}
-
-void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
-{
-  RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
-  RenderTile &target_tile = neighbors.target;
-
-  thread_scoped_lock output_lock(output_mutex);
-  assert(output_pixels.count(center_tile.tile_index) == 1);
-  device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
-  output_pixels.erase(center_tile.tile_index);
-  output_lock.unlock();
-
-  /* Copy denoised pixels from device. */
-  output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
-
-  float *result = output_mem->data();
-  float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
-
-  const DenoiseImageLayer &layer = image.layers[current_layer];
-  const int *output_to_image_channel = layer.output_to_image_channel.data();
-
-  for (int y = 0; y < target_tile.h; y++) {
-    for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
-      for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
-        out[image.num_channels * x + output_to_image_channel[i]] = result[i];
-      }
-    }
-    out += image.num_channels * image.width;
-  }
-
-  /* Free device buffer. */
-  output_mem->free();
-  delete output_mem;
-}
-
-void DenoiseTask::release_tile()
-{
-}
-
-bool DenoiseTask::get_cancel()
-{
-  return false;
-}
-
-void DenoiseTask::create_task(DeviceTask &task)
-{
-  /* Callback functions. */
-  task.acquire_tile = function_bind(&DenoiseTask::acquire_tile, this, device, _1, _2);
-  task.map_neighbor_tiles = function_bind(&DenoiseTask::map_neighboring_tiles, this, _1, _2);
-  task.unmap_neighbor_tiles = function_bind(&DenoiseTask::unmap_neighboring_tiles, this, _1);
-  task.release_tile = function_bind(&DenoiseTask::release_tile, this);
-  task.get_cancel = function_bind(&DenoiseTask::get_cancel, this);
-
-  /* Denoising parameters. */
-  task.denoising = denoiser->params;
-  task.denoising.type = DENOISER_NLM;
-  task.denoising.use = true;
-  task.denoising_from_render = false;
-
-  task.denoising_frames.resize(neighbor_frames.size());
-  for (int i = 0; i < neighbor_frames.size(); i++) {
-    task.denoising_frames[i] = neighbor_frames[i] - frame;
-  }
-
-  /* Buffer parameters. */
-  task.pass_stride = INPUT_NUM_CHANNELS;
-  task.target_pass_stride = OUTPUT_NUM_CHANNELS;
-  task.pass_denoising_data = 0;
-  task.pass_denoising_clean = -1;
-  task.frame_stride = image.width * image.height * INPUT_NUM_CHANNELS;
-
-  /* Create tiles. */
-  thread_scoped_lock tile_lock(tiles_mutex);
-  thread_scoped_lock output_lock(output_mutex);
-
-  tiles.clear();
-  assert(output_pixels.empty());
-  output_pixels.clear();
-
-  int tiles_x = divide_up(image.width, denoiser->tile_size.x);
-  int tiles_y = divide_up(image.height, denoiser->tile_size.y);
-
-  for (int ty = 0; ty < tiles_y; ty++) {
-    for (int tx = 0; tx < tiles_x; tx++) {
-      RenderTile tile;
-      tile.x = tx * denoiser->tile_size.x;
-      tile.y = ty * denoiser->tile_size.y;
-      tile.w = min(image.width - tile.x, denoiser->tile_size.x);
-      tile.h = min(image.height - tile.y, denoiser->tile_size.y);
-      tile.start_sample = 0;
-      tile.num_samples = image.layers[current_layer].samples;
-      tile.sample = 0;
-      tile.offset = 0;
-      tile.stride = image.width;
-      tile.tile_index = ty * tiles_x + tx;
-      tile.task = RenderTile::DENOISE;
-      tile.buffers = NULL;
-      tile.buffer = input_pixels.device_pointer;
-      tiles.push_back(tile);
-    }
-  }
-
-  num_tiles = tiles.size();
-}
-
 /* Denoiser Operations */
 
 bool DenoiseTask::load_input_pixels(int layer)
 {
-  int w = image.width;
-  int h = image.height;
-  int num_pixels = image.width * image.height;
-  int frame_stride = num_pixels * INPUT_NUM_CHANNELS;
-
   /* Load center image */
   DenoiseImageLayer &image_layer = image.layers[layer];
 
-  float *buffer_data = input_pixels.data();
-  image.read_pixels(image_layer, buffer_data);
-  buffer_data += frame_stride;
-
-  /* Load neighbor images */
-  for (int i = 0; i < image.in_neighbors.size(); i++) {
-    if (!image.read_neighbor_pixels(i, image_layer, buffer_data)) {
-      error = "Failed to read neighbor frame pixels";
-      return false;
-    }
-    buffer_data += frame_stride;
-  }
-
-  /* Preprocess */
-  buffer_data = input_pixels.data();
-  for (int neighbor = 0; neighbor < image.in_neighbors.size() + 1; neighbor++) {
-    /* Clamp */
-    if (denoiser->params.clamp_input) {
-      for (int i = 0; i < num_pixels * INPUT_NUM_CHANNELS; i++) {
-        buffer_data[i] = clamp(buffer_data[i], -1e8f, 1e8f);
-      }
-    }
-
-    /* Box blur */
-    int r = 5 * denoiser->params.radius;
-    float *data = buffer_data + 14;
-    array<float> temp(num_pixels);
-
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int n = 0;
-        float sum = 0.0f;
-        for (int dx = max(x - r, 0); dx < min(x + r + 1, w); dx++, n++) {
-          sum += data[INPUT_NUM_CHANNELS * (y * w + dx)];
-        }
-        temp[y * w + x] = sum / n;
-      }
-    }
-
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int n = 0;
-        float sum = 0.0f;
-
-        for (int dy = max(y - r, 0); dy < min(y + r + 1, h); dy++, n++) {
-          sum += temp[dy * w + x];
-        }
-
-        data[INPUT_NUM_CHANNELS * (y * w + x)] = sum / n;
-      }
-    }
-
-    /* Highlight compression */
-    data = buffer_data + 8;
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int idx = INPUT_NUM_CHANNELS * (y * w + x);
-        float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]);
-        color = color_highlight_compress(color, NULL);
-        data[idx] = color.x;
-        data[idx + 1] = color.y;
-        data[idx + 2] = color.z;
-      }
-    }
+  float *buffer_data = buffers.buffer.data();
+  image.read_pixels(image_layer, buffers.params, buffer_data);
 
-    buffer_data += frame_stride;
+  /* Load previous image */
+  if (frame > 0 && !image.read_previous_pixels(image_layer, buffers.params, buffer_data)) {
+    error = "Failed to read neighbor frame pixels";
+    return false;
   }
 
   /* Copy to device */
-  input_pixels.copy_to_device();
+  buffers.buffer.copy_to_device();
 
   return true;
 }
 
 /* Task stages */
 
+static void add_pass(vector<Pass *> &passes, PassType type, PassMode mode = PassMode::NOISY)
+{
+  Pass *pass = new Pass();
+  pass->set_type(type);
+  pass->set_mode(mode);
+
+  passes.push_back(pass);
+}
+
 bool DenoiseTask::load()
 {
   string center_filepath = denoiser->input[frame];
@@ -531,7 +263,8 @@ bool DenoiseTask::load()
     return false;
   }
 
-  if (!image.load_neighbors(denoiser->input, neighbor_frames, error)) {
+  /* Use previous frame output as input for subsequent frames. */
+  if (frame > 0 && !image.load_previous(denoiser->output[frame - 1], error)) {
     return false;
   }
 
@@ -540,10 +273,35 @@ bool DenoiseTask::load()
     return false;
   }
 
+  /* Enable temporal denoising for frames after the first (which will use the output from the
+   * previous frames). */
+  DenoiseParams params = denoiser->denoiser->get_params();
+  params.temporally_stable = frame > 0;
+  denoiser->denoiser->set_params(params);
+
   /* Allocate device buffer. */
-  int num_frames = image.in_neighbors.size() + 1;
-  input_pixels.alloc(image.width * INPUT_NUM_CHANNELS, image.height * num_frames);
-  input_pixels.zero_to_device();
+  vector<Pass *> passes;
+  add_pass(passes, PassType::PASS_COMBINED);
+  add_pass(passes, PassType::PASS_DENOISING_ALBEDO);
+  add_pass(passes, PassType::PASS_DENOISING_NORMAL);
+  add_pass(passes, PassType::PASS_MOTION);
+  add_pass(passes, PassType::PASS_DENOISING_PREVIOUS);
+  add_pass(passes, PassType::PASS_COMBINED, PassMode::DENOISED);
+
+  BufferParams buffer_params;
+  buffer_params.width = image.width;
+  buffer_params.height = image.height;
+  buffer_params.full_x = 0;
+  buffer_params.full_y = 0;
+  buffer_params.full_width = image.width;
+  buffer_params.full_height = image.height;
+  buffer_params.update_passes(passes);
+
+  for (Pass *pass : passes) {
+    delete pass;
+  }
+
+  buffers.reset(buffer_params);
 
   /* Read pixels for first layer. */
   current_layer = 0;
@@ -565,10 +323,26 @@ bool DenoiseTask::exec()
     }
 
     /* Run task on device. */
-    DeviceTask task(DeviceTask::RENDER);
-    create_task(task);
-    device->task_add(task);
-    device->task_wait();
+    denoiser->denoiser->denoise_buffer(buffers.params, &buffers, 1, true);
+
+    /* Copy denoised pixels from device. */
+    buffers.buffer.copy_from_device();
+
+    float *result = buffers.buffer.data(), *out = image.pixels.data();
+
+    const DenoiseImageLayer &layer = image.layers[current_layer];
+    const int *output_to_image_channel = layer.output_to_image_channel.data();
+
+    for (int y = 0; y < image.height; y++) {
+      for (int x = 0; x < image.width; x++, result += buffers.params.pass_stride) {
+        for (int j = 0; j < OUTPUT_NUM_CHANNELS; j++) {
+          int offset = buffers.params.get_pass_offset(PASS_COMBINED, PassMode::DENOISED);
+          int image_channel = output_to_image_channel[j];
+          out[image.num_channels * x + image_channel] = result[offset + j];
+        }
+      }
+      out += image.num_channels * image.width;
+    }
 
     printf("\n");
   }
@@ -586,8 +360,7 @@ bool DenoiseTask::save()
 void DenoiseTask::free()
 {
   image.free();
-  input_pixels.free();
-  assert(output_pixels.empty());
+  buffers.buffer.free();
 }
 
 /* Denoise Image Storage */
@@ -607,7 +380,7 @@ DenoiseImage::~DenoiseImage()
 
 void DenoiseImage::close_input()
 {
-  in_neighbors.clear();
+  in_previous.reset();
 }
 
 void DenoiseImage::free()
@@ -677,39 +450,61 @@ bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error)
   return true;
 }
 
-void DenoiseImage::read_pixels(const DenoiseImageLayer &layer, float *input_pixels)
+void DenoiseImage::read_pixels(const DenoiseImageLayer &layer,
+                               const BufferParams &params,
+                               float *input_pixels)
 {
   /* Pixels from center file have already been loaded into pixels.
    * We copy a subset into the device input buffer with channels reshuffled. */
   const int *input_to_image_channel = layer.input_to_image_channel.data();
 
   for (int i = 0; i < width * height; i++) {
-    for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
-      int image_channel = input_to_image_channel[j];
-      input_pixels[i * INPUT_NUM_CHANNELS + j] =
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_COMBINED);
+      int image_channel = input_to_image_channel[INPUT_NOISY_IMAGE + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_NORMAL);
+      int image_channel = input_to_image_channel[INPUT_DENOISING_NORMAL + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_ALBEDO);
+      int image_channel = input_to_image_channel[INPUT_DENOISING_ALBEDO + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 4; ++j) {
+      int offset = params.get_pass_offset(PASS_MOTION);
+      int image_channel = input_to_image_channel[INPUT_MOTION + j];
+      input_pixels[i * params.pass_stride + offset + j] =
           pixels[((size_t)i) * num_channels + image_channel];
     }
   }
 }
 
-bool DenoiseImage::read_neighbor_pixels(int neighbor,
-                                        const DenoiseImageLayer &layer,
+bool DenoiseImage::read_previous_pixels(const DenoiseImageLayer &layer,
+                                        const BufferParams &params,
                                         float *input_pixels)
 {
   /* Load pixels from neighboring frames, and copy them into device buffer
    * with channels reshuffled. */
   size_t num_pixels = (size_t)width * (size_t)height;
   array<float> neighbor_pixels(num_pixels * num_channels);
-  if (!in_neighbors[neighbor]->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
+  if (!in_previous->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
     return false;
   }
 
-  const int *input_to_image_channel = layer.neighbor_input_to_image_channel[neighbor].data();
+  const int *output_to_image_channel = layer.previous_output_to_image_channel.data();
 
   for (int i = 0; i < width * height; i++) {
-    for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
-      int image_channel = input_to_image_channel[j];
-      input_pixels[i * INPUT_NUM_CHANNELS + j] =
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+      int image_channel = output_to_image_channel[j];
+      input_pixels[i * params.pass_stride + offset + j] =
           neighbor_pixels[((size_t)i) * num_channels + image_channel];
     }
   }
@@ -739,8 +534,8 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
     return false;
   }
 
-  if (layers.size() == 0) {
-    error = "Could not find a render layer containing denoising info";
+  if (layers.empty()) {
+    error = "Could not find a render layer containing denoising data and motion vector passes";
     return false;
   }
 
@@ -757,46 +552,34 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
   return true;
 }
 
-bool DenoiseImage::load_neighbors(const vector<string> &filepaths,
-                                  const vector<int> &frames,
-                                  string &error)
+bool DenoiseImage::load_previous(const string &filepath, string &error)
 {
-  if (frames.size() > DENOISE_MAX_FRAMES - 1) {
-    error = string_printf("Maximum number of neighbors (%d) exceeded\n", DENOISE_MAX_FRAMES - 1);
+  if (!Filesystem::is_regular(filepath)) {
+    error = "Couldn't find neighbor frame: " + filepath;
     return false;
   }
 
-  for (int neighbor = 0; neighbor < frames.size(); neighbor++) {
-    int frame = frames[neighbor];
-    const string &filepath = filepaths[frame];
-
-    if (!Filesystem::is_regular(filepath)) {
-      error = "Couldn't find neighbor frame: " + filepath;
-      return false;
-    }
+  unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
+  if (!in_neighbor) {
+    error = "Couldn't open neighbor frame: " + filepath;
+    return false;
+  }
 
-    unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
-    if (!in_neighbor) {
-      error = "Couldn't open neighbor frame: " + filepath;
-      return false;
-    }
+  const ImageSpec &neighbor_spec = in_neighbor->spec();
+  if (neighbor_spec.width != width || neighbor_spec.height != height) {
+    error = "Neighbor frame has different dimensions: " + filepath;
+    return false;
+  }
 
-    const ImageSpec &neighbor_spec = in_neighbor->spec();
-    if (neighbor_spec.width != width || neighbor_spec.height != height) {
-      error = "Neighbor frame has different dimensions: " + filepath;
+  for (DenoiseImageLayer &layer : layers) {
+    if (!layer.match_channels(in_spec.channelnames, neighbor_spec.channelnames)) {
+      error = "Neighbor frame misses denoising data passes: " + filepath;
       return false;
     }
-
-    foreach (DenoiseImageLayer &layer, layers) {
-      if (!layer.match_channels(neighbor, in_spec.channelnames, neighbor_spec.channelnames)) {
-        error = "Neighbor frame misses denoising data passes: " + filepath;
-        return false;
-      }
-    }
-
-    in_neighbors.push_back(std::move(in_neighbor));
   }
 
+  in_previous = std::move(in_neighbor);
+
   return true;
 }
 
@@ -864,24 +647,22 @@ bool DenoiseImage::save_output(const string &out_filepath, string &error)
 
 /* File pattern handling and outer loop over frames */
 
-DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info)
+DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params)
 {
-  samples_override = 0;
-  tile_size = make_int2(64, 64);
-
-  num_frames = 0;
-
   /* Initialize task scheduler. */
   TaskScheduler::init();
 
   /* Initialize device. */
-  device = Device::create(device_info, stats, profiler, true);
-
+  device = Device::create(device_info, stats, profiler);
   device->load_kernels(KERNEL_FEATURE_DENOISING);
+
+  denoiser = Denoiser::create(device, params);
+  denoiser->load_kernels(nullptr);
 }
 
 DenoiserPipeline::~DenoiserPipeline()
 {
+  denoiser.reset();
   delete device;
   TaskScheduler::exit();
 }
@@ -890,7 +671,7 @@ bool DenoiserPipeline::run()
 {
   assert(input.size() == output.size());
 
-  num_frames = output.size();
+  int num_frames = output.size();
 
   for (int frame = 0; frame < num_frames; frame++) {
     /* Skip empty output paths. */
@@ -898,16 +679,8 @@ bool DenoiserPipeline::run()
       continue;
     }
 
-    /* Determine neighbor frame numbers that should be used for filtering. */
-    vector<int> neighbor_frames;
-    for (int f = frame - params.neighbor_frames; f <= frame + params.neighbor_frames; f++) {
-      if (f >= 0 && f < num_frames && f != frame) {
-        neighbor_frames.push_back(f);
-      }
-    }
-
     /* Execute task. */
-    DenoiseTask task(device, this, frame, neighbor_frames);
+    DenoiseTask task(device, this, frame);
     if (!task.load()) {
       error = task.error;
       return false;
@@ -930,5 +703,3 @@ bool DenoiserPipeline::run()
 }
 
 CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/session/denoising.h b/intern/cycles/session/denoising.h
index 097cc570d06..15e691f73fd 100644
--- a/intern/cycles/session/denoising.h
+++ b/intern/cycles/session/denoising.h
@@ -17,20 +17,17 @@
 #ifndef __DENOISING_H__
 #define __DENOISING_H__
 
-#if 0
-
 /* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or
  * parameters. Currently it is an annoying mixture of terms used interchangeably. */
 
-#  include "device/device.h"
-
-#  include "render/buffers.h"
+#include "device/device.h"
+#include "integrator/denoiser.h"
 
-#  include "util/util_string.h"
-#  include "util/util_unique_ptr.h"
-#  include "util/util_vector.h"
+#include "util/string.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
-#  include <OpenImageIO/imageio.h>
+#include <OpenImageIO/imageio.h>
 
 OIIO_NAMESPACE_USING
 
@@ -40,7 +37,7 @@ CCL_NAMESPACE_BEGIN
 
 class DenoiserPipeline {
  public:
-  DenoiserPipeline(DeviceInfo &device_info);
+  DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params);
   ~DenoiserPipeline();
 
   bool run();
@@ -55,22 +52,13 @@ class DenoiserPipeline {
    * taking into account all input frames. */
   vector<string> output;
 
-  /* Sample number override, takes precedence over values from input frames. */
-  int samples_override;
-  /* Tile size for processing on device. */
-  int2 tile_size;
-
-  /* Equivalent to the settings in the regular denoiser. */
-  DenoiseParams params;
-
  protected:
   friend class DenoiseTask;
 
   Stats stats;
   Profiler profiler;
   Device *device;
-
-  int num_frames;
+  std::unique_ptr<Denoiser> denoiser;
 };
 
 /* Denoise Image Layer */
@@ -88,13 +76,13 @@ struct DenoiseImageLayer {
   /* Device input channel will be copied from image channel input_to_image_channel[i]. */
   vector<int> input_to_image_channel;
 
-  /* input_to_image_channel of the secondary frames, if any are used. */
-  vector<vector<int>> neighbor_input_to_image_channel;
-
   /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the
    * file. */
   vector<int> output_to_image_channel;
 
+  /* output_to_image_channel of the previous frame, if used. */
+  vector<int> previous_output_to_image_channel;
+
   /* Detect whether this layer contains a full set of channels and set up the offsets accordingly.
    */
   bool detect_denoising_channels();
@@ -102,8 +90,7 @@ struct DenoiseImageLayer {
   /* Map the channels of a secondary frame to the channels that are required for processing,
    * fill neighbor_input_to_image_channel if all are present or return false if a channel are
    * missing. */
-  bool match_channels(int neighbor,
-                      const std::vector<string> &channelnames,
+  bool match_channels(const std::vector<string> &channelnames,
                       const std::vector<string> &neighbor_channelnames);
 };
 
@@ -125,7 +112,7 @@ class DenoiseImage {
 
   /* Image file handles */
   ImageSpec in_spec;
-  vector<unique_ptr<ImageInput>> in_neighbors;
+  unique_ptr<ImageInput> in_previous;
 
   /* Render layers */
   vector<DenoiseImageLayer> layers;
@@ -137,12 +124,16 @@ class DenoiseImage {
   bool load(const string &in_filepath, string &error);
 
   /* Load neighboring frames. */
-  bool load_neighbors(const vector<string> &filepaths, const vector<int> &frames, string &error);
+  bool load_previous(const string &in_filepath, string &error);
 
   /* Load subset of pixels from file buffer into input buffer, as needed for denoising
    * on the device. Channels are reshuffled following the provided mapping. */
-  void read_pixels(const DenoiseImageLayer &layer, float *input_pixels);
-  bool read_neighbor_pixels(int neighbor, const DenoiseImageLayer &layer, float *input_pixels);
+  void read_pixels(const DenoiseImageLayer &layer,
+                   const BufferParams &params,
+                   float *input_pixels);
+  bool read_previous_pixels(const DenoiseImageLayer &layer,
+                            const BufferParams &params,
+                            float *input_pixels);
 
   bool save_output(const string &out_filepath, string &error);
 
@@ -159,10 +150,7 @@ class DenoiseImage {
 
 class DenoiseTask {
  public:
-  DenoiseTask(Device *device,
-              DenoiserPipeline *denoiser,
-              int frame,
-              const vector<int> &neighbor_frames);
+  DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame);
   ~DenoiseTask();
 
   /* Task stages */
@@ -180,37 +168,17 @@ class DenoiseTask {
 
   /* Frame number to be denoised */
   int frame;
-  vector<int> neighbor_frames;
 
   /* Image file data */
   DenoiseImage image;
   int current_layer;
 
-  /* Device input buffer */
-  device_vector<float> input_pixels;
-
-  /* Tiles */
-  thread_mutex tiles_mutex;
-  list<RenderTile> tiles;
-  int num_tiles;
-
-  thread_mutex output_mutex;
-  map<int, device_vector<float> *> output_pixels;
+  RenderBuffers buffers;
 
   /* Task handling */
   bool load_input_pixels(int layer);
-  void create_task(DeviceTask &task);
-
-  /* Device task callbacks */
-  bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
-  void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
-  void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
-  void release_tile();
-  bool get_cancel();
 };
 
 CCL_NAMESPACE_END
 
-#endif
-
 #endif /* __DENOISING_H__ */
-- 
cgit v1.2.3