42 files changed, 3475 insertions, 3786 deletions
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index feead27c5ca..6edb5261b32 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -32,10 +32,10 @@ set(SRC
   camera.cpp
   colorspace.cpp
   constant_fold.cpp
-  coverage.cpp
   denoising.cpp
   film.cpp
   geometry.cpp
+  gpu_display.cpp
   graph.cpp
   hair.cpp
   image.cpp
@@ -54,6 +54,7 @@ set(SRC
   object.cpp
   osl.cpp
   particles.cpp
+  pass.cpp
   curves.cpp
   scene.cpp
   session.cpp
@@ -76,10 +77,10 @@ set(SRC_HEADERS
   camera.h
   colorspace.h
   constant_fold.h
-  coverage.h
   denoising.h
   film.h
   geometry.h
+  gpu_display.h
   graph.h
   hair.h
   image.h
@@ -95,6 +96,7 @@ set(SRC_HEADERS
   object.h
   osl.h
   particles.h
+  pass.h
   procedural.h
   curves.h
   scene.h
@@ -111,6 +113,7 @@ set(SRC_HEADERS
 set(LIB
   cycles_bvh
   cycles_device
+  cycles_integrator
   cycles_subd
   cycles_util
 )
diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp
index b925e755434..ae6290ac27b 100644
--- a/intern/cycles/render/background.cpp
+++ b/intern/cycles/render/background.cpp
@@ -34,11 +34,7 @@ NODE_DEFINE(Background)
 {
   NodeType *type = NodeType::add("background", create);
 
-  SOCKET_FLOAT(ao_factor, "AO Factor", 0.0f);
-  SOCKET_FLOAT(ao_distance, "AO Distance", FLT_MAX);
-
   SOCKET_BOOLEAN(use_shader, "Use Shader", true);
-  SOCKET_BOOLEAN(use_ao, "Use AO", false);
   SOCKET_UINT(visibility, "Visibility", PATH_RAY_ALL_VISIBILITY);
 
   SOCKET_BOOLEAN(transparent, "Transparent", false);
@@ -80,10 +76,6 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene
   /* set shader index and transparent option */
   KernelBackground *kbackground = &dscene->data.background;
 
-  kbackground->ao_factor = (use_ao) ? ao_factor : 0.0f;
-  kbackground->ao_bounces_factor = ao_factor;
-  kbackground->ao_distance = ao_distance;
-
   kbackground->transparent = transparent;
   kbackground->surface_shader = scene->shader_manager->get_shader_id(bg_shader);
 
@@ -138,10 +130,6 @@ void Background::tag_update(Scene *scene)
      * and to avoid doing unnecessary updates anywhere else. */
     tag_use_shader_modified();
   }
-
-  if (ao_factor_is_modified() || use_ao_is_modified()) {
-    scene->integrator->tag_update(scene, Integrator::BACKGROUND_AO_MODIFIED);
-  }
 }
 
 Shader *Background::get_shader(const Scene *scene)
diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h
index e89ffbc2445..2f7ef0f7737 100644
--- a/intern/cycles/render/background.h
+++ b/intern/cycles/render/background.h
@@ -32,11 +32,7 @@ class Background : public Node {
  public:
   NODE_DECLARE
 
-  NODE_SOCKET_API(float, ao_factor)
-  NODE_SOCKET_API(float, ao_distance)
-
   NODE_SOCKET_API(bool, use_shader)
-  NODE_SOCKET_API(bool, use_ao)
 
   NODE_SOCKET_API(uint, visibility)
   NODE_SOCKET_API(Shader *, shader)
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 317a3937cab..54e496caed6 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -26,58 +26,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-static int aa_samples(Scene *scene, Object *object, ShaderEvalType type)
-{
-  if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
-    return 1;
-  }
-  else if (type == SHADER_EVAL_NORMAL) {
-    /* Only antialias normal if mesh has bump mapping. */
-    if (object->get_geometry()) {
-      foreach (Node *node, object->get_geometry()->get_used_shaders()) {
-        Shader *shader = static_cast<Shader *>(node);
-        if (shader->has_bump) {
-          return scene->integrator->get_aa_samples();
-        }
-      }
-    }
-
-    return 1;
-  }
-  else {
-    return scene->integrator->get_aa_samples();
-  }
-}
-
-/* Keep it synced with kernel_bake.h logic */
-static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter)
-{
-  const int component_flags = pass_filter &
-                              (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-
-  switch (type) {
-    case SHADER_EVAL_AO:
-      return BAKE_FILTER_AO;
-    case SHADER_EVAL_SHADOW:
-      return BAKE_FILTER_DIRECT;
-    case SHADER_EVAL_DIFFUSE:
-      return BAKE_FILTER_DIFFUSE | component_flags;
-    case SHADER_EVAL_GLOSSY:
-      return BAKE_FILTER_GLOSSY | component_flags;
-    case SHADER_EVAL_TRANSMISSION:
-      return BAKE_FILTER_TRANSMISSION | component_flags;
-    case SHADER_EVAL_COMBINED:
-      return pass_filter;
-    default:
-      return 0;
-  }
-}
-
 BakeManager::BakeManager()
 {
-  type = SHADER_EVAL_BAKE;
-  pass_filter = 0;
-
   need_update_ = true;
 }
 
@@ -85,32 +35,14 @@ BakeManager::~BakeManager()
 {
 }
 
-bool BakeManager::get_baking()
+bool BakeManager::get_baking() const
 {
   return !object_name.empty();
 }
 
-void BakeManager::set(Scene *scene,
-                      const std::string &object_name_,
-                      ShaderEvalType type_,
-                      int pass_filter_)
+void BakeManager::set(Scene *scene, const std::string &object_name_)
 {
   object_name = object_name_;
-  type = type_;
-  pass_filter = shader_type_to_pass_filter(type_, pass_filter_);
-
-  Pass::add(PASS_BAKE_PRIMITIVE, scene->passes);
-  Pass::add(PASS_BAKE_DIFFERENTIAL, scene->passes);
-
-  if (type == SHADER_EVAL_UV) {
-    /* force UV to be available */
-    Pass::add(PASS_UV, scene->passes);
-  }
-
-  /* force use_light_pass to be true if we bake more than just colors */
-  if (pass_filter & ~BAKE_FILTER_COLOR) {
-    Pass::add(PASS_LIGHT, scene->passes);
-  }
 
   /* create device and update scene */
   scene->film->tag_modified();
@@ -127,29 +59,29 @@ void BakeManager::device_update(Device * /*device*/,
   if (!need_update())
     return;
 
-  scoped_callback_timer timer([scene](double time) {
-    if (scene->update_stats) {
-      scene->update_stats->bake.times.add_entry({"device_update", time});
-    }
-  });
-
-  KernelIntegrator *kintegrator = &dscene->data.integrator;
   KernelBake *kbake = &dscene->data.bake;
+  memset(kbake, 0, sizeof(*kbake));
 
-  kbake->type = type;
-  kbake->pass_filter = pass_filter;
-
-  int object_index = 0;
-  foreach (Object *object, scene->objects) {
-    const Geometry *geom = object->get_geometry();
-    if (object->name == object_name && geom->geometry_type == Geometry::MESH) {
-      kbake->object_index = object_index;
-      kbake->tri_offset = geom->prim_offset;
-      kintegrator->aa_samples = aa_samples(scene, object, type);
-      break;
-    }
+  if (!object_name.empty()) {
+    scoped_callback_timer timer([scene](double time) {
+      if (scene->update_stats) {
+        scene->update_stats->bake.times.add_entry({"device_update", time});
+      }
+    });
+
+    kbake->use = true;
 
-    object_index++;
+    int object_index = 0;
+    foreach (Object *object, scene->objects) {
+      const Geometry *geom = object->get_geometry();
+      if (object->name == object_name && geom->geometry_type == Geometry::MESH) {
+        kbake->object_index = object_index;
+        kbake->tri_offset = geom->prim_offset;
+        break;
+      }
+
+      object_index++;
+    }
   }
 
   need_update_ = false;
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 655b9b1cf7e..39e504490c2 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -30,8 +30,8 @@ class BakeManager {
   BakeManager();
   ~BakeManager();
 
-  void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter);
-  bool get_baking();
+  void set(Scene *scene, const std::string &object_name);
+  bool get_baking() const;
 
   void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
   void device_free(Device *device, DeviceScene *dscene);
@@ -42,8 +42,6 @@ class BakeManager {
 
  private:
   bool need_update_;
-  ShaderEvalType type;
-  int pass_filter;
   std::string object_name;
 };
 
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index fcfad58995e..1882510cd70 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -28,537 +28,335 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Buffer Params */
+/* --------------------------------------------------------------------
+ * Convert part information to an index of `BufferParams::pass_offset_`.
+ */
 
-BufferParams::BufferParams()
+static int pass_type_mode_to_index(PassType pass_type, PassMode mode)
 {
-  width = 0;
-  height = 0;
-
-  full_x = 0;
-  full_y = 0;
-  full_width = 0;
-  full_height = 0;
+  int index = static_cast<int>(pass_type) * 2;
 
-  denoising_data_pass = false;
-  denoising_clean_pass = false;
-  denoising_prefiltered_pass = false;
+  if (mode == PassMode::DENOISED) {
+    ++index;
+  }
 
-  Pass::add(PASS_COMBINED, passes);
+  return index;
 }
 
-void BufferParams::get_offset_stride(int &offset, int &stride)
+static int pass_to_index(const BufferPass &pass)
 {
-  offset = -(full_x + full_y * width);
-  stride = width;
+  return pass_type_mode_to_index(pass.type, pass.mode);
 }
 
-bool BufferParams::modified(const BufferParams &params)
-{
-  return !(full_x == params.full_x && full_y == params.full_y && width == params.width &&
-           height == params.height && full_width == params.full_width &&
-           full_height == params.full_height && Pass::equals(passes, params.passes) &&
-           denoising_data_pass == params.denoising_data_pass &&
-           denoising_clean_pass == params.denoising_clean_pass &&
-           denoising_prefiltered_pass == params.denoising_prefiltered_pass);
-}
+/* --------------------------------------------------------------------
+ * Buffer pass.
+ */
 
-int BufferParams::get_passes_size()
+NODE_DEFINE(BufferPass)
 {
-  int size = 0;
+  NodeType *type = NodeType::add("buffer_pass", create);
 
-  for (size_t i = 0; i < passes.size(); i++)
-    size += passes[i].components;
+  const NodeEnum *pass_type_enum = Pass::get_type_enum();
+  const NodeEnum *pass_mode_enum = Pass::get_mode_enum();
 
-  if (denoising_data_pass) {
-    size += DENOISING_PASS_SIZE_BASE;
-    if (denoising_clean_pass)
-      size += DENOISING_PASS_SIZE_CLEAN;
-    if (denoising_prefiltered_pass)
-      size += DENOISING_PASS_SIZE_PREFILTERED;
-  }
+  SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED);
+  SOCKET_ENUM(mode, "Mode", *pass_mode_enum, static_cast<int>(PassMode::DENOISED));
+  SOCKET_STRING(name, "Name", ustring());
+  SOCKET_BOOLEAN(include_albedo, "Include Albedo", false);
 
-  return align_up(size, 4);
-}
+  SOCKET_INT(offset, "Offset", -1);
 
-int BufferParams::get_denoising_offset()
-{
-  int offset = 0;
-
-  for (size_t i = 0; i < passes.size(); i++)
-    offset += passes[i].components;
-
-  return offset;
+  return type;
 }
 
-int BufferParams::get_denoising_prefiltered_offset()
+BufferPass::BufferPass() : Node(get_node_type())
 {
-  assert(denoising_prefiltered_pass);
-
-  int offset = get_denoising_offset();
-
-  offset += DENOISING_PASS_SIZE_BASE;
-  if (denoising_clean_pass) {
-    offset += DENOISING_PASS_SIZE_CLEAN;
-  }
-
-  return offset;
 }
 
-/* Render Buffer Task */
-
-RenderTile::RenderTile()
+BufferPass::BufferPass(const Pass *scene_pass)
+    : Node(get_node_type()),
+      type(scene_pass->get_type()),
+      mode(scene_pass->get_mode()),
+      name(scene_pass->get_name()),
+      include_albedo(scene_pass->get_include_albedo())
 {
-  x = 0;
-  y = 0;
-  w = 0;
-  h = 0;
-
-  sample = 0;
-  start_sample = 0;
-  num_samples = 0;
-  resolution = 0;
-
-  offset = 0;
-  stride = 0;
-
-  buffer = 0;
-
-  buffers = NULL;
-  stealing_state = NO_STEALING;
 }
 
-/* Render Buffers */
-
-RenderBuffers::RenderBuffers(Device *device)
-    : buffer(device, "RenderBuffers", MEM_READ_WRITE),
-      map_neighbor_copied(false),
-      render_time(0.0f)
+PassInfo BufferPass::get_info() const
 {
+  return Pass::get_info(type, include_albedo);
 }
 
-RenderBuffers::~RenderBuffers()
-{
-  buffer.free();
-}
+/* --------------------------------------------------------------------
+ * Buffer Params.
+ */
 
-void RenderBuffers::reset(BufferParams &params_)
+NODE_DEFINE(BufferParams)
 {
-  params = params_;
-
-  /* re-allocate buffer */
-  buffer.alloc(params.width * params.get_passes_size(), params.height);
-  buffer.zero_to_device();
+  NodeType *type = NodeType::add("buffer_params", create);
+
+  SOCKET_INT(width, "Width", 0);
+  SOCKET_INT(height, "Height", 0);
+
+  SOCKET_INT(full_x, "Full X", 0);
+  SOCKET_INT(full_y, "Full Y", 0);
+  SOCKET_INT(full_width, "Full Width", 0);
+  SOCKET_INT(full_height, "Full Height", 0);
+
+  SOCKET_STRING(layer, "Layer", ustring());
+  SOCKET_STRING(view, "View", ustring());
+  SOCKET_INT(samples, "Samples", 0);
+  SOCKET_FLOAT(exposure, "Exposure", 1.0f);
+  SOCKET_BOOLEAN(use_approximate_shadow_catcher, "Use Approximate Shadow Catcher", false);
+  SOCKET_BOOLEAN(use_transparent_background, "Transparent Background", false);
+
+  /* Notes:
+   *  - Skip passes since they do not follow typical container socket definition.
+   *    Might look into covering those as a socket in the future.
+   *
+   *  - Skip offset, stride, and pass stride since those can be delivered from the passes and
+   *    rest of the sockets. */
+
+  return type;
 }
 
-void RenderBuffers::zero()
+BufferParams::BufferParams() : Node(get_node_type())
 {
-  buffer.zero_to_device();
+  reset_pass_offset();
 }
 
-bool RenderBuffers::copy_from_device()
+void BufferParams::update_passes()
 {
-  if (!buffer.device_pointer)
-    return false;
-
-  buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height);
-
-  return true;
-}
-
-bool RenderBuffers::get_denoising_pass_rect(
-    int type, float exposure, int sample, int components, float *pixels)
-{
-  if (buffer.data() == NULL) {
-    return false;
-  }
-
-  float scale = 1.0f;
-  float alpha_scale = 1.0f / sample;
-  if (type == DENOISING_PASS_PREFILTERED_COLOR || type == DENOISING_PASS_CLEAN ||
-      type == DENOISING_PASS_PREFILTERED_INTENSITY) {
-    scale *= exposure;
-  }
-  else if (type == DENOISING_PASS_PREFILTERED_VARIANCE) {
-    scale *= exposure * exposure * (sample - 1);
-  }
+  update_offset_stride();
+  reset_pass_offset();
+
+  pass_stride = 0;
+  for (const BufferPass &pass : passes) {
+    if (pass.offset != PASS_UNUSED) {
+      const int index = pass_to_index(pass);
+      if (pass_offset_[index] == PASS_UNUSED) {
+        pass_offset_[index] = pass_stride;
+      }
 
-  int offset;
-  if (type == DENOISING_PASS_CLEAN) {
-    /* The clean pass isn't changed by prefiltering, so we use the original one there. */
-    offset = type + params.get_denoising_offset();
-    scale /= sample;
-  }
-  else if (params.denoising_prefiltered_pass) {
-    offset = type + params.get_denoising_prefiltered_offset();
-  }
-  else {
-    switch (type) {
-      case DENOISING_PASS_PREFILTERED_DEPTH:
-        offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH;
-        break;
-      case DENOISING_PASS_PREFILTERED_NORMAL:
-        offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL;
-        break;
-      case DENOISING_PASS_PREFILTERED_ALBEDO:
-        offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO;
-        break;
-      case DENOISING_PASS_PREFILTERED_COLOR:
-        /* If we're not saving the prefiltering result, return the original noisy pass. */
-        offset = params.get_denoising_offset() + DENOISING_PASS_COLOR;
-        break;
-      default:
-        return false;
+      pass_stride += pass.get_info().num_components;
     }
-    scale /= sample;
   }
+}
 
-  int pass_stride = params.get_passes_size();
-  int size = params.width * params.height;
+void BufferParams::update_passes(const vector<Pass *> &scene_passes)
+{
+  passes.clear();
 
-  float *in = buffer.data() + offset;
+  pass_stride = 0;
+  for (const Pass *scene_pass : scene_passes) {
+    BufferPass buffer_pass(scene_pass);
 
-  if (components == 1) {
-    for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-      pixels[0] = in[0] * scale;
+    if (scene_pass->is_written()) {
+      buffer_pass.offset = pass_stride;
+      pass_stride += scene_pass->get_info().num_components;
     }
-  }
-  else if (components == 3) {
-    for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
-      pixels[0] = in[0] * scale;
-      pixels[1] = in[1] * scale;
-      pixels[2] = in[2] * scale;
-    }
-  }
-  else if (components == 4) {
-    /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */
-    assert(params.passes[0].type == PASS_COMBINED);
-    float *in_combined = buffer.data();
-
-    for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
-      float3 val = make_float3(in[0], in[1], in[2]);
-      if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) {
-        /* Remove highlight compression from the image. */
-        val = color_highlight_uncompress(val);
-      }
-      pixels[0] = val.x * scale;
-      pixels[1] = val.y * scale;
-      pixels[2] = val.z * scale;
-      pixels[3] = saturate(in_combined[3] * alpha_scale);
+    else {
+      buffer_pass.offset = PASS_UNUSED;
     }
-  }
-  else {
-    return false;
+
+    passes.emplace_back(std::move(buffer_pass));
   }
 
-  return true;
+  update_passes();
 }
 
-bool RenderBuffers::get_pass_rect(
-    const string &name, float exposure, int sample, int components, float *pixels)
+void BufferParams::reset_pass_offset()
 {
-  if (buffer.data() == NULL) {
-    return false;
+  for (int i = 0; i < kNumPassOffsets; ++i) {
+    pass_offset_[i] = PASS_UNUSED;
   }
+}
 
-  float *sample_count = NULL;
-  if (name == "Combined") {
-    int sample_offset = 0;
-    for (size_t j = 0; j < params.passes.size(); j++) {
-      Pass &pass = params.passes[j];
-      if (pass.type != PASS_SAMPLE_COUNT) {
-        sample_offset += pass.components;
-        continue;
-      }
-      else {
-        sample_count = buffer.data() + sample_offset;
-        break;
-      }
-    }
+int BufferParams::get_pass_offset(PassType pass_type, PassMode mode) const
+{
+  if (pass_type == PASS_NONE || pass_type == PASS_UNUSED) {
+    return PASS_UNUSED;
   }
 
-  int pass_offset = 0;
-
-  for (size_t j = 0; j < params.passes.size(); j++) {
-    Pass &pass = params.passes[j];
+  const int index = pass_type_mode_to_index(pass_type, mode);
+  return pass_offset_[index];
+}
 
-    /* Pass is identified by both type and name, multiple of the same type
-     * may exist with a different name. */
-    if (pass.name != name) {
-      pass_offset += pass.components;
-      continue;
+const BufferPass *BufferParams::find_pass(string_view name) const
+{
+  for (const BufferPass &pass : passes) {
+    if (pass.name == name) {
+      return &pass;
     }
+  }
 
-    PassType type = pass.type;
-
-    float *in = buffer.data() + pass_offset;
-    int pass_stride = params.get_passes_size();
-
-    float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f;
-    float scale_exposure = (pass.exposure) ? scale * exposure : scale;
-
-    int size = params.width * params.height;
+  return nullptr;
+}
 
-    if (components == 1 && type == PASS_RENDER_TIME) {
-      /* Render time is not stored by kernel, but measured per tile. */
-      float val = (float)(1000.0 * render_time / (params.width * params.height * sample));
-      for (int i = 0; i < size; i++, pixels++) {
-        pixels[0] = val;
-      }
-    }
-    else if (components == 1) {
-      assert(pass.components == components);
-
-      /* Scalar */
-      if (type == PASS_DEPTH) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          float f = *in;
-          pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
-        }
-      }
-      else if (type == PASS_MIST) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          float f = *in;
-          pixels[0] = saturate(f * scale_exposure);
-        }
-      }
-      else {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          float f = *in;
-          pixels[0] = f * scale_exposure;
-        }
-      }
-    }
-    else if (components == 3) {
-      assert(pass.components == 4);
-
-      /* RGBA */
-      if (type == PASS_SHADOW) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
-          float4 f = make_float4(in[0], in[1], in[2], in[3]);
-          float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
-
-          pixels[0] = f.x * invw;
-          pixels[1] = f.y * invw;
-          pixels[2] = f.z * invw;
-        }
-      }
-      else if (pass.divide_type != PASS_NONE) {
-        /* RGB lighting passes that need to divide out color */
-        pass_offset = 0;
-        for (size_t k = 0; k < params.passes.size(); k++) {
-          Pass &color_pass = params.passes[k];
-          if (color_pass.type == pass.divide_type)
-            break;
-          pass_offset += color_pass.components;
-        }
-
-        float *in_divide = buffer.data() + pass_offset;
-
-        for (int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
-          float3 f = make_float3(in[0], in[1], in[2]);
-          float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
-
-          f = safe_divide_even_color(f * exposure, f_divide);
-
-          pixels[0] = f.x;
-          pixels[1] = f.y;
-          pixels[2] = f.z;
-        }
-      }
-      else {
-        /* RGB/vector */
-        for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
-          float3 f = make_float3(in[0], in[1], in[2]);
-
-          pixels[0] = f.x * scale_exposure;
-          pixels[1] = f.y * scale_exposure;
-          pixels[2] = f.z * scale_exposure;
-        }
-      }
-    }
-    else if (components == 4) {
-      assert(pass.components == components);
-
-      /* RGBA */
-      if (type == PASS_SHADOW) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
-          float4 f = make_float4(in[0], in[1], in[2], in[3]);
-          float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
-
-          pixels[0] = f.x * invw;
-          pixels[1] = f.y * invw;
-          pixels[2] = f.z * invw;
-          pixels[3] = 1.0f;
-        }
-      }
-      else if (type == PASS_MOTION) {
-        /* need to normalize by number of samples accumulated for motion */
-        pass_offset = 0;
-        for (size_t k = 0; k < params.passes.size(); k++) {
-          Pass &color_pass = params.passes[k];
-          if (color_pass.type == PASS_MOTION_WEIGHT)
-            break;
-          pass_offset += color_pass.components;
-        }
-
-        float *in_weight = buffer.data() + pass_offset;
-
-        for (int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
-          float4 f = make_float4(in[0], in[1], in[2], in[3]);
-          float w = in_weight[0];
-          float invw = (w > 0.0f) ? 1.0f / w : 0.0f;
-
-          pixels[0] = f.x * invw;
-          pixels[1] = f.y * invw;
-          pixels[2] = f.z * invw;
-          pixels[3] = f.w * invw;
-        }
-      }
-      else if (type == PASS_CRYPTOMATTE) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
-          float4 f = make_float4(in[0], in[1], in[2], in[3]);
-          /* x and z contain integer IDs, don't rescale them.
-             y and w contain matte weights, they get scaled. */
-          pixels[0] = f.x;
-          pixels[1] = f.y * scale;
-          pixels[2] = f.z;
-          pixels[3] = f.w * scale;
-        }
-      }
-      else {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
-          if (sample_count && sample_count[i * pass_stride] < 0.0f) {
-            scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
-            scale_exposure = (pass.exposure) ? scale * exposure : scale;
-          }
-
-          float4 f = make_float4(in[0], in[1], in[2], in[3]);
-
-          pixels[0] = f.x * scale_exposure;
-          pixels[1] = f.y * scale_exposure;
-          pixels[2] = f.z * scale_exposure;
-
-          /* Clamp since alpha might be > 1.0 due to Russian roulette. */
-          pixels[3] = saturate(f.w * scale);
-        }
-      }
+const BufferPass *BufferParams::find_pass(PassType type, PassMode mode) const
+{
+  for (const BufferPass &pass : passes) {
+    if (pass.type == type && pass.mode == mode) {
+      return &pass;
     }
-
-    return true;
   }
 
-  return false;
+  return nullptr;
 }
 
-bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels, int samples)
+const BufferPass *BufferParams::get_actual_display_pass(PassType type, PassMode mode) const
 {
-  if (buffer.data() == NULL) {
-    return false;
-  }
-
-  int pass_offset = 0;
+  const BufferPass *pass = find_pass(type, mode);
+  return get_actual_display_pass(pass);
+}
 
-  for (size_t j = 0; j < params.passes.size(); j++) {
-    Pass &pass = params.passes[j];
+const BufferPass *BufferParams::get_actual_display_pass(const BufferPass *pass) const
+{
+  if (!pass) {
+    return nullptr;
+  }
 
-    if (pass.type != type) {
-      pass_offset += pass.components;
-      continue;
+  if (pass->type == PASS_COMBINED) {
+    const BufferPass *shadow_catcher_matte_pass = find_pass(PASS_SHADOW_CATCHER_MATTE, pass->mode);
+    if (shadow_catcher_matte_pass) {
+      pass = shadow_catcher_matte_pass;
     }
+  }
 
-    float *out = buffer.data() + pass_offset;
-    int pass_stride = params.get_passes_size();
-    int size = params.width * params.height;
-
-    assert(pass.components == components);
+  return pass;
+}
 
-    for (int i = 0; i < size; i++, out += pass_stride, pixels += components) {
-      if (pass.filter) {
-        /* Scale by the number of samples, inverse of what we do in get_pass_rect.
-         * A better solution would be to remove the need for set_pass_rect entirely,
-         * and change baking to bake multiple objects in a tile at once. */
-        for (int j = 0; j < components; j++) {
-          out[j] = pixels[j] * samples;
-        }
-      }
-      else {
-        /* For non-filtered passes just straight copy, these may contain non-float data. */
-        memcpy(out, pixels, sizeof(float) * components);
-      }
-    }
+void BufferParams::update_offset_stride()
+{
+  offset = -(full_x + full_y * width);
+  stride = width;
+}
 
+bool BufferParams::modified(const BufferParams &other) const
+{
+  if (!(width == other.width && height == other.height && full_x == other.full_x &&
+        full_y == other.full_y && full_width == other.full_width &&
+        full_height == other.full_height && offset == other.offset && stride == other.stride &&
+        pass_stride == other.pass_stride && layer == other.layer && view == other.view &&
+        exposure == other.exposure &&
+        use_approximate_shadow_catcher == other.use_approximate_shadow_catcher &&
+        use_transparent_background == other.use_transparent_background)) {
     return true;
   }
 
-  return false;
+  return !(passes == other.passes);
 }
 
-/* Display Buffer */
+/* --------------------------------------------------------------------
+ * Render Buffers.
+ */
 
-DisplayBuffer::DisplayBuffer(Device *device, bool linear)
-    : draw_width(0),
-      draw_height(0),
-      transparent(true), /* todo: determine from background */
-      half_float(linear),
-      rgba_byte(device, "display buffer byte"),
-      rgba_half(device, "display buffer half")
+RenderBuffers::RenderBuffers(Device *device) : buffer(device, "RenderBuffers", MEM_READ_WRITE)
 {
 }
 
-DisplayBuffer::~DisplayBuffer()
+RenderBuffers::~RenderBuffers()
 {
-  rgba_byte.free();
-  rgba_half.free();
+  buffer.free();
 }
 
-void DisplayBuffer::reset(BufferParams &params_)
+void RenderBuffers::reset(const BufferParams &params_)
 {
-  draw_width = 0;
-  draw_height = 0;
+  DCHECK(params_.pass_stride != -1);
 
   params = params_;
 
-  /* allocate display pixels */
-  if (half_float) {
-    rgba_half.alloc_to_device(params.width, params.height);
-  }
-  else {
-    rgba_byte.alloc_to_device(params.width, params.height);
-  }
+  /* re-allocate buffer */
+  buffer.alloc(params.width * params.pass_stride, params.height);
 }
 
-void DisplayBuffer::draw_set(int width, int height)
+void RenderBuffers::zero()
 {
-  assert(width <= params.width && height <= params.height);
+  buffer.zero_to_device();
+}
 
-  draw_width = width;
-  draw_height = height;
+bool RenderBuffers::copy_from_device()
+{
+  DCHECK(params.pass_stride != -1);
+
+  if (!buffer.device_pointer)
+    return false;
+
+  buffer.copy_from_device(0, params.width * params.pass_stride, params.height);
+
+  return true;
 }
 
-void DisplayBuffer::draw(Device *device, const DeviceDrawParams &draw_params)
+void RenderBuffers::copy_to_device()
 {
-  if (draw_width != 0 && draw_height != 0) {
-    device_memory &rgba = (half_float) ? (device_memory &)rgba_half : (device_memory &)rgba_byte;
-
-    device->draw_pixels(rgba,
-                        0,
-                        draw_width,
-                        draw_height,
-                        params.width,
-                        params.height,
-                        params.full_x,
-                        params.full_y,
-                        params.full_width,
-                        params.full_height,
-                        transparent,
-                        draw_params);
-  }
+  buffer.copy_to_device();
 }
 
-bool DisplayBuffer::draw_ready()
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+                                       const BufferParams &dst_params,
+                                       const RenderBuffers *src,
+                                       const BufferParams &src_params,
+                                       const size_t src_offset)
 {
-  return (draw_width != 0 && draw_height != 0);
+  DCHECK_EQ(dst_params.width, src_params.width);
+  /* TODO(sergey): More sanity checks to avoid buffer overrun. */
+
+  /* Create a map of pass offsets to be copied.
+   * Assume offsets are different to allow copying passes between buffers with different set of
+   * passes. */
+
+  struct {
+    int dst_offset;
+    int src_offset;
+  } pass_offsets[PASS_NUM];
+
+  int num_passes = 0;
+
+  for (int i = 0; i < PASS_NUM; ++i) {
+    const PassType pass_type = static_cast<PassType>(i);
+
+    const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED);
+    if (dst_pass_offset == PASS_UNUSED) {
+      continue;
+    }
+
+    const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED);
+    if (src_pass_offset == PASS_UNUSED) {
+      continue;
+    }
+
+    pass_offsets[num_passes].dst_offset = dst_pass_offset;
+    pass_offsets[num_passes].src_offset = src_pass_offset;
+    ++num_passes;
+  }
+
+  /* Copy passes. */
+  /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */
+
+  const int64_t dst_width = dst_params.width;
+  const int64_t dst_height = dst_params.height;
+  const int64_t dst_pass_stride = dst_params.pass_stride;
+  const int64_t dst_num_pixels = dst_width * dst_height;
+
+  const int64_t src_pass_stride = src_params.pass_stride;
+  const int64_t src_offset_in_floats = src_offset * src_pass_stride;
+
+  const float *src_pixel = src->buffer.data() + src_offset_in_floats;
+  float *dst_pixel = dst->buffer.data();
+
+  for (int i = 0; i < dst_num_pixels;
+       ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) {
+    for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) {
+      const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset;
+      const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset;
+
+      /* TODO(sergey): Support non-RGBA passes. */
+      dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0];
+      dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1];
+      dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2];
+      dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3];
+    }
+  }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 4ffc628bb52..184ac7197af 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -18,8 +18,8 @@
 #define __BUFFERS_H__
 
 #include "device/device_memory.h"
-
-#include "render/film.h"
+#include "graph/node.h"
+#include "render/pass.h"
 
 #include "kernel/kernel_types.h"
 
@@ -34,170 +34,157 @@ class Device;
 struct DeviceDrawParams;
 struct float4;
 
+/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */
+class BufferPass : public Node {
+ public:
+  NODE_DECLARE
+
+  PassType type = PASS_NONE;
+  PassMode mode = PassMode::NOISY;
+  ustring name;
+  bool include_albedo = false;
+
+  int offset = -1;
+
+  BufferPass();
+  explicit BufferPass(const Pass *scene_pass);
+
+  BufferPass(BufferPass &&other) noexcept = default;
+  BufferPass(const BufferPass &other) = default;
+
+  BufferPass &operator=(BufferPass &&other) = default;
+  BufferPass &operator=(const BufferPass &other) = default;
+
+  ~BufferPass() = default;
+
+  PassInfo get_info() const;
+
+  inline bool operator==(const BufferPass &other) const
+  {
+    return type == other.type && mode == other.mode && name == other.name &&
+           include_albedo == other.include_albedo && offset == other.offset;
+  }
+  inline bool operator!=(const BufferPass &other) const
+  {
+    return !(*this == other);
+  }
+};
+
 /* Buffer Parameters
  * Size of render buffer and how it fits in the full image (border render). */
 
-class BufferParams {
+/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */
+class BufferParams : public Node {
  public:
-  /* width/height of the physical buffer */
-  int width;
-  int height;
-
-  /* offset into and width/height of the full buffer */
-  int full_x;
-  int full_y;
-  int full_width;
-  int full_height;
-
-  /* passes */
-  vector<Pass> passes;
-  bool denoising_data_pass;
-  /* If only some light path types should be target, an additional pass is needed. */
-  bool denoising_clean_pass;
-  /* When we're prefiltering the passes during rendering, we need to keep both the
-   * original and the prefiltered data around because neighboring tiles might still
-   * need the original data. */
-  bool denoising_prefiltered_pass;
-
-  /* functions */
-  BufferParams();
+  NODE_DECLARE
 
-  void get_offset_stride(int &offset, int &stride);
-  bool modified(const BufferParams &params);
-  int get_passes_size();
-  int get_denoising_offset();
-  int get_denoising_prefiltered_offset();
-};
+  /* Width/height of the physical buffer. */
+  int width = 0;
+  int height = 0;
 
-/* Render Buffers */
+  /* Offset into and width/height of the full buffer. */
+  int full_x = 0;
+  int full_y = 0;
+  int full_width = 0;
+  int full_height = 0;
 
-class RenderBuffers {
- public:
-  /* buffer parameters */
-  BufferParams params;
+  /* Runtime fields, only valid after `update_passes()` or `update_offset_stride()`. */
+  int offset = -1, stride = -1;
 
-  /* float buffer */
-  device_vector<float> buffer;
-  bool map_neighbor_copied;
-  double render_time;
+  /* Runtime fields, only valid after `update_passes()`. */
+  int pass_stride = -1;
 
-  explicit RenderBuffers(Device *device);
-  ~RenderBuffers();
+  /* Properties which are used for accessing buffer pixels outside of scene graph. */
+  vector<BufferPass> passes;
+  ustring layer;
+  ustring view;
+  int samples = 0;
+  float exposure = 1.0f;
+  bool use_approximate_shadow_catcher = false;
+  bool use_transparent_background = false;
 
-  void reset(BufferParams &params);
-  void zero();
+  BufferParams();
 
-  bool copy_from_device();
-  bool get_pass_rect(
-      const string &name, float exposure, int sample, int components, float *pixels);
-  bool get_denoising_pass_rect(
-      int offset, float exposure, int sample, int components, float *pixels);
-  bool set_pass_rect(PassType type, int components, float *pixels, int samples);
-};
+  BufferParams(BufferParams &&other) noexcept = default;
+  BufferParams(const BufferParams &other) = default;
 
-/* Display Buffer
- *
- * The buffer used for drawing during render, filled by converting the render
- * buffers to byte of half float storage */
+  BufferParams &operator=(BufferParams &&other) = default;
+  BufferParams &operator=(const BufferParams &other) = default;
 
-class DisplayBuffer {
- public:
-  /* buffer parameters */
-  BufferParams params;
-  /* dimensions for how much of the buffer is actually ready for display.
-   * with progressive render we can be using only a subset of the buffer.
-   * if these are zero, it means nothing can be drawn yet */
-  int draw_width, draw_height;
-  /* draw alpha channel? */
-  bool transparent;
-  /* use half float? */
-  bool half_float;
-  /* byte buffer for converted result */
-  device_pixels<uchar4> rgba_byte;
-  device_pixels<half4> rgba_half;
-
-  DisplayBuffer(Device *device, bool linear = false);
-  ~DisplayBuffer();
-
-  void reset(BufferParams &params);
-
-  void draw_set(int width, int height);
-  void draw(Device *device, const DeviceDrawParams &draw_params);
-  bool draw_ready();
-};
+  ~BufferParams() = default;
 
-/* Render Tile
- * Rendering task on a buffer */
+  /* Pre-calculate all fields which depends on the passes.
+   *
+   * When the scene passes are given, the buffer passes will be created from them and stored in
+   * this params, and then params are updated for those passes.
+   * The `update_passes()` without parameters updates offsets and strides which are stored outside
+   * of the passes. */
+  void update_passes();
+  void update_passes(const vector<Pass *> &scene_passes);
 
-class RenderTile {
- public:
-  typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task;
+  /* Returns PASS_UNUSED if there is no such pass in the buffer. */
+  int get_pass_offset(PassType type, PassMode mode = PassMode::NOISY) const;
 
-  Task task;
-  int x, y, w, h;
-  int start_sample;
-  int num_samples;
-  int sample;
-  int resolution;
-  int offset;
-  int stride;
-  int tile_index;
+  /* Returns nullptr if pass with given name does not exist. */
+  const BufferPass *find_pass(string_view name) const;
+  const BufferPass *find_pass(PassType type, PassMode mode = PassMode::NOISY) const;
 
-  device_ptr buffer;
-  int device_size;
+  /* Get display pass from its name.
+   * Will do special logic to replace combined pass with shadow catcher matte. */
+  const BufferPass *get_actual_display_pass(PassType type, PassMode mode = PassMode::NOISY) const;
+  const BufferPass *get_actual_display_pass(const BufferPass *pass) const;
 
-  typedef enum { NO_STEALING = 0, CAN_BE_STOLEN = 1, WAS_STOLEN = 2 } StealingState;
-  StealingState stealing_state;
+  void update_offset_stride();
 
-  RenderBuffers *buffers;
+  bool modified(const BufferParams &other) const;
 
-  RenderTile();
+ protected:
+  void reset_pass_offset();
 
-  int4 bounds() const
-  {
-    return make_int4(x,      /* xmin */
-                     y,      /* ymin */
-                     x + w,  /* xmax */
-                     y + h); /* ymax */
-  }
+  /* Multiplied by 2 to be able to store noisy and denoised pass types. */
+  static constexpr int kNumPassOffsets = PASS_NUM * 2;
+
+  /* Indexed by an index derived from pass type and mode, indicates offset of the corresponding
+   * pass in the buffer.
+   * If there are multiple passes with same type and mode contains lowest offset of all of them. */
+  int pass_offset_[kNumPassOffsets];
 };
 
-/* Render Tile Neighbors
- * Set of neighboring tiles used for denoising. Tile order:
- *  0 1 2
- *  3 4 5
- *  6 7 8 */
+/* Render Buffers */
 
-class RenderTileNeighbors {
+class RenderBuffers {
  public:
-  static const int SIZE = 9;
-  static const int CENTER = 4;
+  /* buffer parameters */
+  BufferParams params;
 
-  RenderTile tiles[SIZE];
-  RenderTile target;
+  /* float buffer */
+  device_vector<float> buffer;
 
-  RenderTileNeighbors(const RenderTile &center)
-  {
-    tiles[CENTER] = center;
-  }
+  explicit RenderBuffers(Device *device);
+  ~RenderBuffers();
 
-  int4 bounds() const
-  {
-    return make_int4(tiles[3].x,               /* xmin */
-                     tiles[1].y,               /* ymin */
-                     tiles[5].x + tiles[5].w,  /* xmax */
-                     tiles[7].y + tiles[7].h); /* ymax */
-  }
+  void reset(const BufferParams &params);
+  void zero();
 
-  void set_bounds_from_center()
-  {
-    tiles[3].x = tiles[CENTER].x;
-    tiles[1].y = tiles[CENTER].y;
-    tiles[5].x = tiles[CENTER].x + tiles[CENTER].w;
-    tiles[7].y = tiles[CENTER].y + tiles[CENTER].h;
-  }
+  bool copy_from_device();
+  void copy_to_device();
 };
 
+/* Copy denoised passes form source to destination.
+ *
+ * Buffer parameters are provided explicitly, allowing to copy pixels between render buffers which
+ * content corresponds to a render result at a non-unit resolution divider.
+ *
+ * `src_offset` allows to offset source pixel index which is used when a fraction of the source
+ * buffer is to be copied.
+ *
+ * Copy happens of the number of pixels in the destination. */
+void render_buffers_host_copy_denoised(RenderBuffers *dst,
+                                       const BufferParams &dst_params,
+                                       const RenderBuffers *src,
+                                       const BufferParams &src_params,
+                                       const size_t src_offset = 0);
+
 CCL_NAMESPACE_END
 
 #endif /* __BUFFERS_H__ */
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index 327f166f9d8..8b69c971991 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -33,9 +33,9 @@
 
 /* needed for calculating differentials */
 // clang-format off
-#include "kernel/kernel_compat_cpu.h"
-#include "kernel/split/kernel_split_data.h"
-#include "kernel/kernel_globals.h"
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
 #include "kernel/kernel_projection.h"
 #include "kernel/kernel_differential.h"
 #include "kernel/kernel_montecarlo.h"
@@ -169,7 +169,6 @@ Camera::Camera() : Node(get_node_type())
 
   width = 1024;
   height = 512;
-  resolution = 1;
 
   use_perspective_motion = false;
 
@@ -455,7 +454,6 @@ void Camera::update(Scene *scene)
   /* render size */
   kcam->width = width;
   kcam->height = height;
-  kcam->resolution = resolution;
 
   /* store differentials */
   kcam->dx = float3_to_float4(dx);
@@ -776,9 +774,11 @@ float Camera::world_to_raster_size(float3 P)
                            &ray);
 #endif
 
-    differential_transfer(&ray.dP, ray.dP, ray.D, ray.dD, ray.D, dist);
+    /* TODO: would it help to use more accurate differentials here? */
+    differential3 dP;
+    differential_transfer_compact(&dP, ray.dP, ray.D, ray.dD, ray.D, dist);
 
-    return max(len(ray.dP.dx), len(ray.dP.dy));
+    return max(len(dP.dx), len(dP.dy));
   }
 
   return res;
@@ -789,12 +789,11 @@ bool Camera::use_motion() const
   return motion.size() > 1;
 }
 
-void Camera::set_screen_size_and_resolution(int width_, int height_, int resolution_)
+void Camera::set_screen_size(int width_, int height_)
 {
-  if (width_ != width || height_ != height || resolution_ != resolution) {
+  if (width_ != width || height_ != height) {
     width = width_;
     height = height_;
-    resolution = resolution_;
     tag_modified();
   }
 }
diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h
index 5abb4750764..cb8ecac1a7e 100644
--- a/intern/cycles/render/camera.h
+++ b/intern/cycles/render/camera.h
@@ -199,7 +199,6 @@ class Camera : public Node {
  private:
   int width;
   int height;
-  int resolution;
 
  public:
   /* functions */
@@ -225,7 +224,7 @@ class Camera : public Node {
   int motion_step(float time) const;
   bool use_motion() const;
 
-  void set_screen_size_and_resolution(int width_, int height_, int resolution_);
+  void set_screen_size(int width_, int height_);
 
  private:
   /* Private utility functions. */
diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp
deleted file mode 100644
index 99d4daa6961..00000000000
--- a/intern/cycles/render/coverage.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright 2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "render/coverage.h"
-#include "render/buffers.h"
-
-#include "kernel/kernel_compat_cpu.h"
-#include "kernel/kernel_types.h"
-#include "kernel/split/kernel_split_data.h"
-
-#include "kernel/kernel_globals.h"
-#include "kernel/kernel_id_passes.h"
-
-#include "util/util_map.h"
-
-CCL_NAMESPACE_BEGIN
-
-static bool crypomatte_comp(const pair<float, float> &i, const pair<float, float> j)
-{
-  return i.first > j.first;
-}
-
-void Coverage::finalize()
-{
-  int pass_offset = 0;
-  if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
-    finalize_buffer(coverage_object, pass_offset);
-    pass_offset += kernel_data.film.cryptomatte_depth * 4;
-  }
-  if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
-    finalize_buffer(coverage_material, pass_offset);
-    pass_offset += kernel_data.film.cryptomatte_depth * 4;
-  }
-  if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
-    finalize_buffer(coverage_asset, pass_offset);
-  }
-}
-
-void Coverage::init_path_trace()
-{
-  kg->coverage_object = kg->coverage_material = kg->coverage_asset = NULL;
-
-  if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
-    if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
-      coverage_object.clear();
-      coverage_object.resize(tile.w * tile.h);
-    }
-    if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
-      coverage_material.clear();
-      coverage_material.resize(tile.w * tile.h);
-    }
-    if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
-      coverage_asset.clear();
-      coverage_asset.resize(tile.w * tile.h);
-    }
-  }
-}
-
-void Coverage::init_pixel(int x, int y)
-{
-  if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
-    const int pixel_index = tile.w * (y - tile.y) + x - tile.x;
-    if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
-      kg->coverage_object = &coverage_object[pixel_index];
-    }
-    if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
-      kg->coverage_material = &coverage_material[pixel_index];
-    }
-    if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
-      kg->coverage_asset = &coverage_asset[pixel_index];
-    }
-  }
-}
-
-void Coverage::finalize_buffer(vector<CoverageMap> &coverage, const int pass_offset)
-{
-  if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
-    flatten_buffer(coverage, pass_offset);
-  }
-  else {
-    sort_buffer(pass_offset);
-  }
-}
-
-void Coverage::flatten_buffer(vector<CoverageMap> &coverage, const int pass_offset)
-{
-  /* Sort the coverage map and write it to the output */
-  int pixel_index = 0;
-  int pass_stride = tile.buffers->params.get_passes_size();
-  for (int y = 0; y < tile.h; ++y) {
-    for (int x = 0; x < tile.w; ++x) {
-      const CoverageMap &pixel = coverage[pixel_index];
-      if (!pixel.empty()) {
-        /* buffer offset */
-        int index = x + y * tile.stride;
-        float *buffer = (float *)tile.buffer + index * pass_stride;
-
-        /* sort the cryptomatte pixel */
-        vector<pair<float, float>> sorted_pixel;
-        for (CoverageMap::const_iterator it = pixel.begin(); it != pixel.end(); ++it) {
-          sorted_pixel.push_back(std::make_pair(it->second, it->first));
-        }
-        sort(sorted_pixel.begin(), sorted_pixel.end(), crypomatte_comp);
-        int num_slots = 2 * (kernel_data.film.cryptomatte_depth);
-        if (sorted_pixel.size() > num_slots) {
-          float leftover = 0.0f;
-          for (vector<pair<float, float>>::iterator it = sorted_pixel.begin() + num_slots;
-               it != sorted_pixel.end();
-               ++it) {
-            leftover += it->first;
-          }
-          sorted_pixel[num_slots - 1].first += leftover;
-        }
-        int limit = min(num_slots, sorted_pixel.size());
-        for (int i = 0; i < limit; ++i) {
-          kernel_write_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset,
-                                2 * (kernel_data.film.cryptomatte_depth),
-                                sorted_pixel[i].second,
-                                sorted_pixel[i].first);
-        }
-      }
-      ++pixel_index;
-    }
-  }
-}
-
-void Coverage::sort_buffer(const int pass_offset)
-{
-  /* Sort the coverage map and write it to the output */
-  int pass_stride = tile.buffers->params.get_passes_size();
-  for (int y = 0; y < tile.h; ++y) {
-    for (int x = 0; x < tile.w; ++x) {
-      /* buffer offset */
-      int index = x + y * tile.stride;
-      float *buffer = (float *)tile.buffer + index * pass_stride;
-      kernel_sort_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset,
-                           2 * (kernel_data.film.cryptomatte_depth));
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h
deleted file mode 100644
index 12182c614da..00000000000
--- a/intern/cycles/render/coverage.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __COVERAGE_H__
-#define __COVERAGE_H__
-
-#include "util/util_map.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct KernelGlobals;
-class RenderTile;
-
-typedef unordered_map<float, float> CoverageMap;
-
-class Coverage {
- public:
-  Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_)
-  {
-  }
-  void init_path_trace();
-  void init_pixel(int x, int y);
-  void finalize();
-
- private:
-  vector<CoverageMap> coverage_object;
-  vector<CoverageMap> coverage_material;
-  vector<CoverageMap> coverage_asset;
-  KernelGlobals *kg;
-  RenderTile &tile;
-  void finalize_buffer(vector<CoverageMap> &coverage, const int pass_offset);
-  void flatten_buffer(vector<CoverageMap> &coverage, const int pass_offset);
-  void sort_buffer(const int pass_offset);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __COVERAGE_H__ */
diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp
index ddbe7484800..bcf8d3fa204 100644
--- a/intern/cycles/render/denoising.cpp
+++ b/intern/cycles/render/denoising.cpp
@@ -16,15 +16,17 @@
 
 #include "render/denoising.h"
 
-#include "kernel/filter/filter_defines.h"
+#if 0
 
-#include "util/util_foreach.h"
-#include "util/util_map.h"
-#include "util/util_system.h"
-#include "util/util_task.h"
-#include "util/util_time.h"
+#  include "kernel/filter/filter_defines.h"
 
-#include <OpenImageIO/filesystem.h>
+#  include "util/util_foreach.h"
+#  include "util/util_map.h"
+#  include "util/util_system.h"
+#  include "util/util_task.h"
+#  include "util/util_time.h"
+
+#  include <OpenImageIO/filesystem.h>
 
 CCL_NAMESPACE_BEGIN
 
@@ -225,7 +227,7 @@ bool DenoiseImageLayer::match_channels(int neighbor,
 /* Denoise Task */
 
 DenoiseTask::DenoiseTask(Device *device,
-                         Denoiser *denoiser,
+                         DenoiserPipeline *denoiser,
                          int frame,
                          const vector<int> &neighbor_frames)
     : denoiser(denoiser),
@@ -386,7 +388,6 @@ void DenoiseTask::create_task(DeviceTask &task)
   task.denoising = denoiser->params;
   task.denoising.type = DENOISER_NLM;
   task.denoising.use = true;
-  task.denoising.store_passes = false;
   task.denoising_from_render = false;
 
   task.denoising_frames.resize(neighbor_frames.size());
@@ -863,7 +864,7 @@ bool DenoiseImage::save_output(const string &out_filepath, string &error)
 
 /* File pattern handling and outer loop over frames */
 
-Denoiser::Denoiser(DeviceInfo &device_info)
+DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info)
 {
   samples_override = 0;
   tile_size = make_int2(64, 64);
@@ -876,18 +877,16 @@ Denoiser::Denoiser(DeviceInfo &device_info)
   /* Initialize device. */
   device = Device::create(device_info, stats, profiler, true);
 
-  DeviceRequestedFeatures req;
-  req.use_denoising = true;
-  device->load_kernels(req);
+  device->load_kernels(KERNEL_FEATURE_DENOISING);
 }
 
-Denoiser::~Denoiser()
+DenoiserPipeline::~DenoiserPipeline()
 {
   delete device;
   TaskScheduler::exit();
 }
 
-bool Denoiser::run()
+bool DenoiserPipeline::run()
 {
   assert(input.size() == output.size());
 
@@ -931,3 +930,5 @@ bool Denoiser::run()
 }
 
 CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h
index c1b4d0a5596..097cc570d06 100644
--- a/intern/cycles/render/denoising.h
+++ b/intern/cycles/render/denoising.h
@@ -17,27 +17,31 @@
 #ifndef __DENOISING_H__
 #define __DENOISING_H__
 
-#include "device/device.h"
-#include "device/device_denoising.h"
+#if 0
 
-#include "render/buffers.h"
+/* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or
+ * parameters. Currently it is an annoying mixture of terms used interchangeably. */
 
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#  include "device/device.h"
 
-#include <OpenImageIO/imageio.h>
+#  include "render/buffers.h"
+
+#  include "util/util_string.h"
+#  include "util/util_unique_ptr.h"
+#  include "util/util_vector.h"
+
+#  include <OpenImageIO/imageio.h>
 
 OIIO_NAMESPACE_USING
 
 CCL_NAMESPACE_BEGIN
 
-/* Denoiser */
+/* Denoiser pipeline */
 
-class Denoiser {
+class DenoiserPipeline {
  public:
-  Denoiser(DeviceInfo &device_info);
-  ~Denoiser();
+  DenoiserPipeline(DeviceInfo &device_info);
+  ~DenoiserPipeline();
 
   bool run();
 
@@ -155,7 +159,10 @@ class DenoiseImage {
 
 class DenoiseTask {
  public:
-  DenoiseTask(Device *device, Denoiser *denoiser, int frame, const vector<int> &neighbor_frames);
+  DenoiseTask(Device *device,
+              DenoiserPipeline *denoiser,
+              int frame,
+              const vector<int> &neighbor_frames);
   ~DenoiseTask();
 
   /* Task stages */
@@ -168,7 +175,7 @@ class DenoiseTask {
 
  protected:
   /* Denoiser parameters and device */
-  Denoiser *denoiser;
+  DenoiserPipeline *denoiser;
   Device *device;
 
   /* Frame number to be denoised */
@@ -204,4 +211,6 @@ class DenoiseTask {
 
 CCL_NAMESPACE_END
 
+#endif
+
 #endif /* __DENOISING_H__ */
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index 5df396394c4..8e14b338bd3 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -16,9 +16,12 @@
 
 #include "render/film.h"
 #include "device/device.h"
+#include "render/background.h"
+#include "render/bake.h"
 #include "render/camera.h"
 #include "render/integrator.h"
 #include "render/mesh.h"
+#include "render/object.h"
 #include "render/scene.h"
 #include "render/stats.h"
 #include "render/tables.h"
@@ -31,261 +34,6 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Pass */
-
-static bool compare_pass_order(const Pass &a, const Pass &b)
-{
-  if (a.components == b.components)
-    return (a.type < b.type);
-  return (a.components > b.components);
-}
-
-static NodeEnum *get_pass_type_enum()
-{
-  static NodeEnum pass_type_enum;
-  pass_type_enum.insert("combined", PASS_COMBINED);
-  pass_type_enum.insert("depth", PASS_DEPTH);
-  pass_type_enum.insert("normal", PASS_NORMAL);
-  pass_type_enum.insert("uv", PASS_UV);
-  pass_type_enum.insert("object_id", PASS_OBJECT_ID);
-  pass_type_enum.insert("material_id", PASS_MATERIAL_ID);
-  pass_type_enum.insert("motion", PASS_MOTION);
-  pass_type_enum.insert("motion_weight", PASS_MOTION_WEIGHT);
-  pass_type_enum.insert("render_time", PASS_RENDER_TIME);
-  pass_type_enum.insert("cryptomatte", PASS_CRYPTOMATTE);
-  pass_type_enum.insert("aov_color", PASS_AOV_COLOR);
-  pass_type_enum.insert("aov_value", PASS_AOV_VALUE);
-  pass_type_enum.insert("adaptive_aux_buffer", PASS_ADAPTIVE_AUX_BUFFER);
-  pass_type_enum.insert("sample_count", PASS_SAMPLE_COUNT);
-  pass_type_enum.insert("mist", PASS_MIST);
-  pass_type_enum.insert("emission", PASS_EMISSION);
-  pass_type_enum.insert("background", PASS_BACKGROUND);
-  pass_type_enum.insert("ambient_occlusion", PASS_AO);
-  pass_type_enum.insert("shadow", PASS_SHADOW);
-  pass_type_enum.insert("diffuse_direct", PASS_DIFFUSE_DIRECT);
-  pass_type_enum.insert("diffuse_indirect", PASS_DIFFUSE_INDIRECT);
-  pass_type_enum.insert("diffuse_color", PASS_DIFFUSE_COLOR);
-  pass_type_enum.insert("glossy_direct", PASS_GLOSSY_DIRECT);
-  pass_type_enum.insert("glossy_indirect", PASS_GLOSSY_INDIRECT);
-  pass_type_enum.insert("glossy_color", PASS_GLOSSY_COLOR);
-  pass_type_enum.insert("transmission_direct", PASS_TRANSMISSION_DIRECT);
-  pass_type_enum.insert("transmission_indirect", PASS_TRANSMISSION_INDIRECT);
-  pass_type_enum.insert("transmission_color", PASS_TRANSMISSION_COLOR);
-  pass_type_enum.insert("volume_direct", PASS_VOLUME_DIRECT);
-  pass_type_enum.insert("volume_indirect", PASS_VOLUME_INDIRECT);
-  pass_type_enum.insert("bake_primitive", PASS_BAKE_PRIMITIVE);
-  pass_type_enum.insert("bake_differential", PASS_BAKE_DIFFERENTIAL);
-
-  return &pass_type_enum;
-}
-
-NODE_DEFINE(Pass)
-{
-  NodeType *type = NodeType::add("pass", create);
-
-  NodeEnum *pass_type_enum = get_pass_type_enum();
-  SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED);
-  SOCKET_STRING(name, "Name", ustring());
-
-  return type;
-}
-
-Pass::Pass() : Node(get_node_type())
-{
-}
-
-void Pass::add(PassType type, vector<Pass> &passes, const char *name)
-{
-  for (size_t i = 0; i < passes.size(); i++) {
-    if (passes[i].type != type) {
-      continue;
-    }
-
-    /* An empty name is used as a placeholder to signal that any pass of
-     * that type is fine (because the content always is the same).
-     * This is important to support divide_type: If the pass that has a
-     * divide_type is added first, a pass for divide_type with an empty
-     * name will be added. Then, if a matching pass with a name is later
-     * requested, the existing placeholder will be renamed to that.
-     * If the divide_type is explicitly allocated with a name first and
-     * then again as part of another pass, the second one will just be
-     * skipped because that type already exists. */
-
-    /* If no name is specified, any pass of the correct type will match. */
-    if (name == NULL) {
-      return;
-    }
-
-    /* If we already have a placeholder pass, rename that one. */
-    if (passes[i].name.empty()) {
-      passes[i].name = name;
-      return;
-    }
-
-    /* If neither existing nor requested pass have placeholder name, they
-     * must match. */
-    if (name == passes[i].name) {
-      return;
-    }
-  }
-
-  Pass pass;
-
-  pass.type = type;
-  pass.filter = true;
-  pass.exposure = false;
-  pass.divide_type = PASS_NONE;
-  if (name) {
-    pass.name = name;
-  }
-
-  switch (type) {
-    case PASS_NONE:
-      pass.components = 0;
-      break;
-    case PASS_COMBINED:
-      pass.components = 4;
-      pass.exposure = true;
-      break;
-    case PASS_DEPTH:
-      pass.components = 1;
-      pass.filter = false;
-      break;
-    case PASS_MIST:
-      pass.components = 1;
-      break;
-    case PASS_NORMAL:
-      pass.components = 4;
-      break;
-    case PASS_UV:
-      pass.components = 4;
-      break;
-    case PASS_MOTION:
-      pass.components = 4;
-      pass.divide_type = PASS_MOTION_WEIGHT;
-      break;
-    case PASS_MOTION_WEIGHT:
-      pass.components = 1;
-      break;
-    case PASS_OBJECT_ID:
-    case PASS_MATERIAL_ID:
-      pass.components = 1;
-      pass.filter = false;
-      break;
-
-    case PASS_EMISSION:
-    case PASS_BACKGROUND:
-      pass.components = 4;
-      pass.exposure = true;
-      break;
-    case PASS_AO:
-      pass.components = 4;
-      break;
-    case PASS_SHADOW:
-      pass.components = 4;
-      pass.exposure = false;
-      break;
-    case PASS_LIGHT:
-      /* This isn't a real pass, used by baking to see whether
-       * light data is needed or not.
-       *
-       * Set components to 0 so pass sort below happens in a
-       * determined way.
-       */
-      pass.components = 0;
-      break;
-    case PASS_RENDER_TIME:
-      /* This pass is handled entirely on the host side. */
-      pass.components = 0;
-      break;
-
-    case PASS_DIFFUSE_COLOR:
-    case PASS_GLOSSY_COLOR:
-    case PASS_TRANSMISSION_COLOR:
-      pass.components = 4;
-      break;
-    case PASS_DIFFUSE_DIRECT:
-    case PASS_DIFFUSE_INDIRECT:
-      pass.components = 4;
-      pass.exposure = true;
-      pass.divide_type = PASS_DIFFUSE_COLOR;
-      break;
-    case PASS_GLOSSY_DIRECT:
-    case PASS_GLOSSY_INDIRECT:
-      pass.components = 4;
-      pass.exposure = true;
-      pass.divide_type = PASS_GLOSSY_COLOR;
-      break;
-    case PASS_TRANSMISSION_DIRECT:
-    case PASS_TRANSMISSION_INDIRECT:
-      pass.components = 4;
-      pass.exposure = true;
-      pass.divide_type = PASS_TRANSMISSION_COLOR;
-      break;
-    case PASS_VOLUME_DIRECT:
-    case PASS_VOLUME_INDIRECT:
-      pass.components = 4;
-      pass.exposure = true;
-      break;
-    case PASS_CRYPTOMATTE:
-      pass.components = 4;
-      break;
-    case PASS_ADAPTIVE_AUX_BUFFER:
-      pass.components = 4;
-      break;
-    case PASS_SAMPLE_COUNT:
-      pass.components = 1;
-      pass.exposure = false;
-      break;
-    case PASS_AOV_COLOR:
-      pass.components = 4;
-      break;
-    case PASS_AOV_VALUE:
-      pass.components = 1;
-      break;
-    case PASS_BAKE_PRIMITIVE:
-    case PASS_BAKE_DIFFERENTIAL:
-      pass.components = 4;
-      pass.exposure = false;
-      pass.filter = false;
-      break;
-    default:
-      assert(false);
-      break;
-  }
-
-  passes.push_back(pass);
-
-  /* Order from by components, to ensure alignment so passes with size 4
-   * come first and then passes with size 1. Note this must use stable sort
-   * so cryptomatte passes remain in the right order. */
-  stable_sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
-
-  if (pass.divide_type != PASS_NONE)
-    Pass::add(pass.divide_type, passes);
-}
-
-bool Pass::equals(const vector<Pass> &A, const vector<Pass> &B)
-{
-  if (A.size() != B.size())
-    return false;
-
-  for (int i = 0; i < A.size(); i++)
-    if (A[i].type != B[i].type || A[i].name != B[i].name)
-      return false;
-
-  return true;
-}
-
-bool Pass::contains(const vector<Pass> &passes, PassType type)
-{
-  for (size_t i = 0; i < passes.size(); i++)
-    if (passes[i].type == type)
-      return true;
-
-  return false;
-}
-
 /* Pixel Filter */
 
 static float filter_func_box(float /*v*/, float /*width*/)
@@ -368,17 +116,11 @@ NODE_DEFINE(Film)
   SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f);
   SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f);
 
-  SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false);
-  SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
-  SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
-  SOCKET_INT(denoising_flags, "Denoising Flags", 0);
-  SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);
-
-  SOCKET_BOOLEAN(use_light_visibility, "Use Light Visibility", false);
-
-  NodeEnum *pass_type_enum = get_pass_type_enum();
+  const NodeEnum *pass_type_enum = Pass::get_type_enum();
   SOCKET_ENUM(display_pass, "Display Pass", *pass_type_enum, PASS_COMBINED);
 
+  SOCKET_BOOLEAN(show_active_pixels, "Show Active Pixels", false);
+
   static NodeEnum cryptomatte_passes_enum;
   cryptomatte_passes_enum.insert("none", CRYPT_NONE);
   cryptomatte_passes_enum.insert("object", CRYPT_OBJECT);
@@ -389,15 +131,13 @@ NODE_DEFINE(Film)
 
   SOCKET_INT(cryptomatte_depth, "Cryptomatte Depth", 0);
 
+  SOCKET_BOOLEAN(use_approximate_shadow_catcher, "Use Approximate Shadow Catcher", false);
+
   return type;
 }
 
-Film::Film() : Node(get_node_type())
+Film::Film() : Node(get_node_type()), filter_table_offset_(TABLE_OFFSET_INVALID)
 {
-  use_light_visibility = false;
-  filter_table_offset = TABLE_OFFSET_INVALID;
-  cryptomatte_passes = CRYPT_NONE;
-  display_pass = PASS_COMBINED;
 }
 
 Film::~Film()
@@ -406,7 +146,8 @@ Film::~Film()
 
 void Film::add_default(Scene *scene)
 {
-  Pass::add(PASS_COMBINED, scene->passes);
+  Pass *pass = scene->create_node<Pass>();
+  pass->set_type(PASS_COMBINED);
 }
 
 void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
@@ -426,50 +167,77 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
 
   /* update __data */
   kfilm->exposure = exposure;
+  kfilm->pass_alpha_threshold = pass_alpha_threshold;
   kfilm->pass_flag = 0;
 
-  kfilm->display_pass_stride = -1;
-  kfilm->display_pass_components = 0;
-  kfilm->display_divide_pass_stride = -1;
-  kfilm->use_display_exposure = false;
-  kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED);
+  kfilm->use_approximate_shadow_catcher = get_use_approximate_shadow_catcher();
 
   kfilm->light_pass_flag = 0;
   kfilm->pass_stride = 0;
-  kfilm->use_light_pass = use_light_visibility;
-  kfilm->pass_aov_value_num = 0;
-  kfilm->pass_aov_color_num = 0;
+
+  /* Mark with PASS_UNUSED to avoid mask test in the kernel. */
+  kfilm->pass_background = PASS_UNUSED;
+  kfilm->pass_emission = PASS_UNUSED;
+  kfilm->pass_ao = PASS_UNUSED;
+  kfilm->pass_diffuse_direct = PASS_UNUSED;
+  kfilm->pass_diffuse_indirect = PASS_UNUSED;
+  kfilm->pass_glossy_direct = PASS_UNUSED;
+  kfilm->pass_glossy_indirect = PASS_UNUSED;
+  kfilm->pass_transmission_direct = PASS_UNUSED;
+  kfilm->pass_transmission_indirect = PASS_UNUSED;
+  kfilm->pass_volume_direct = PASS_UNUSED;
+  kfilm->pass_volume_indirect = PASS_UNUSED;
+  kfilm->pass_volume_direct = PASS_UNUSED;
+  kfilm->pass_volume_indirect = PASS_UNUSED;
+  kfilm->pass_shadow = PASS_UNUSED;
+
+  /* Mark passes as unused so that the kernel knows the pass is inaccessible. */
+  kfilm->pass_denoising_normal = PASS_UNUSED;
+  kfilm->pass_denoising_albedo = PASS_UNUSED;
+  kfilm->pass_sample_count = PASS_UNUSED;
+  kfilm->pass_adaptive_aux_buffer = PASS_UNUSED;
+  kfilm->pass_shadow_catcher = PASS_UNUSED;
+  kfilm->pass_shadow_catcher_sample_count = PASS_UNUSED;
+  kfilm->pass_shadow_catcher_matte = PASS_UNUSED;
 
   bool have_cryptomatte = false;
+  bool have_aov_color = false;
+  bool have_aov_value = false;
 
   for (size_t i = 0; i < scene->passes.size(); i++) {
-    Pass &pass = scene->passes[i];
+    const Pass *pass = scene->passes[i];
 
-    if (pass.type == PASS_NONE) {
+    if (pass->get_type() == PASS_NONE || !pass->is_written()) {
+      continue;
+    }
+
+    if (pass->get_mode() == PassMode::DENOISED) {
+      /* Generally we only storing offsets of the noisy passes. The display pass is an exception
+       * since it is a read operation and not a write. */
+      kfilm->pass_stride += pass->get_info().num_components;
       continue;
     }
 
     /* Can't do motion pass if no motion vectors are available. */
-    if (pass.type == PASS_MOTION || pass.type == PASS_MOTION_WEIGHT) {
+    if (pass->get_type() == PASS_MOTION || pass->get_type() == PASS_MOTION_WEIGHT) {
       if (scene->need_motion() != Scene::MOTION_PASS) {
-        kfilm->pass_stride += pass.components;
+        kfilm->pass_stride += pass->get_info().num_components;
         continue;
       }
     }
 
-    int pass_flag = (1 << (pass.type % 32));
-    if (pass.type <= PASS_CATEGORY_MAIN_END) {
-      kfilm->pass_flag |= pass_flag;
-    }
-    else if (pass.type <= PASS_CATEGORY_LIGHT_END) {
-      kfilm->use_light_pass = 1;
+    const int pass_flag = (1 << (pass->get_type() % 32));
+    if (pass->get_type() <= PASS_CATEGORY_LIGHT_END) {
       kfilm->light_pass_flag |= pass_flag;
     }
+    else if (pass->get_type() <= PASS_CATEGORY_DATA_END) {
+      kfilm->pass_flag |= pass_flag;
+    }
     else {
-      assert(pass.type <= PASS_CATEGORY_BAKE_END);
+      assert(pass->get_type() <= PASS_CATEGORY_BAKE_END);
     }
 
-    switch (pass.type) {
+    switch (pass->get_type()) {
       case PASS_COMBINED:
         kfilm->pass_combined = kfilm->pass_stride;
         break;
@@ -479,6 +247,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
       case PASS_NORMAL:
         kfilm->pass_normal = kfilm->pass_stride;
         break;
+      case PASS_POSITION:
+        kfilm->pass_position = kfilm->pass_stride;
+        break;
+      case PASS_ROUGHNESS:
+        kfilm->pass_roughness = kfilm->pass_stride;
+        break;
       case PASS_UV:
         kfilm->pass_uv = kfilm->pass_stride;
         break;
@@ -511,9 +285,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
         kfilm->pass_shadow = kfilm->pass_stride;
         break;
 
-      case PASS_LIGHT:
-        break;
-
       case PASS_DIFFUSE_COLOR:
         kfilm->pass_diffuse_color = kfilm->pass_stride;
         break;
@@ -563,78 +334,56 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
                                       kfilm->pass_stride;
         have_cryptomatte = true;
         break;
+
+      case PASS_DENOISING_NORMAL:
+        kfilm->pass_denoising_normal = kfilm->pass_stride;
+        break;
+      case PASS_DENOISING_ALBEDO:
+        kfilm->pass_denoising_albedo = kfilm->pass_stride;
+        break;
+
+      case PASS_SHADOW_CATCHER:
+        kfilm->pass_shadow_catcher = kfilm->pass_stride;
+        break;
+      case PASS_SHADOW_CATCHER_SAMPLE_COUNT:
+        kfilm->pass_shadow_catcher_sample_count = kfilm->pass_stride;
+        break;
+      case PASS_SHADOW_CATCHER_MATTE:
+        kfilm->pass_shadow_catcher_matte = kfilm->pass_stride;
+        break;
+
       case PASS_ADAPTIVE_AUX_BUFFER:
         kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
         break;
       case PASS_SAMPLE_COUNT:
         kfilm->pass_sample_count = kfilm->pass_stride;
         break;
+
       case PASS_AOV_COLOR:
-        if (kfilm->pass_aov_color_num == 0) {
+        if (!have_aov_color) {
           kfilm->pass_aov_color = kfilm->pass_stride;
+          have_aov_color = true;
         }
-        kfilm->pass_aov_color_num++;
         break;
       case PASS_AOV_VALUE:
-        if (kfilm->pass_aov_value_num == 0) {
+        if (!have_aov_value) {
           kfilm->pass_aov_value = kfilm->pass_stride;
+          have_aov_value = true;
         }
-        kfilm->pass_aov_value_num++;
         break;
       default:
         assert(false);
         break;
     }
 
-    if (pass.type == display_pass) {
-      kfilm->display_pass_stride = kfilm->pass_stride;
-      kfilm->display_pass_components = pass.components;
-      kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f);
-    }
-    else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR ||
-             pass.type == PASS_GLOSSY_COLOR) {
-      kfilm->display_divide_pass_stride = kfilm->pass_stride;
-    }
-
-    kfilm->pass_stride += pass.components;
-  }
-
-  kfilm->pass_denoising_data = 0;
-  kfilm->pass_denoising_clean = 0;
-  kfilm->denoising_flags = 0;
-  if (denoising_data_pass) {
-    kfilm->pass_denoising_data = kfilm->pass_stride;
-    kfilm->pass_stride += DENOISING_PASS_SIZE_BASE;
-    kfilm->denoising_flags = denoising_flags;
-    if (denoising_clean_pass) {
-      kfilm->pass_denoising_clean = kfilm->pass_stride;
-      kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN;
-      kfilm->use_light_pass = 1;
-    }
-    if (denoising_prefiltered_pass) {
-      kfilm->pass_stride += DENOISING_PASS_SIZE_PREFILTERED;
-    }
-  }
-
-  kfilm->pass_stride = align_up(kfilm->pass_stride, 4);
-
-  /* When displaying the normal/uv pass in the viewport we need to disable
-   * transparency.
-   *
-   * We also don't need to perform light accumulations. Later we want to optimize this to suppress
-   * light calculations. */
-  if (display_pass == PASS_NORMAL || display_pass == PASS_UV) {
-    kfilm->use_light_pass = 0;
-  }
-  else {
-    kfilm->pass_alpha_threshold = pass_alpha_threshold;
+    kfilm->pass_stride += pass->get_info().num_components;
   }
 
   /* update filter table */
   vector<float> table = filter_table(filter_type, filter_width);
-  scene->lookup_tables->remove_table(&filter_table_offset);
-  filter_table_offset = scene->lookup_tables->add_table(dscene, table);
-  kfilm->filter_table_offset = (int)filter_table_offset;
+  scene->lookup_tables->remove_table(&filter_table_offset_);
+  filter_table_offset_ = scene->lookup_tables->add_table(dscene, table);
+  kfilm->filter_table_offset = (int)filter_table_offset_;
 
   /* mist pass parameters */
   kfilm->mist_start = mist_start;
@@ -644,79 +393,298 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
   kfilm->cryptomatte_passes = cryptomatte_passes;
   kfilm->cryptomatte_depth = cryptomatte_depth;
 
-  pass_stride = kfilm->pass_stride;
-  denoising_data_offset = kfilm->pass_denoising_data;
-  denoising_clean_offset = kfilm->pass_denoising_clean;
-
   clear_modified();
 }
 
 void Film::device_free(Device * /*device*/, DeviceScene * /*dscene*/, Scene *scene)
 {
-  scene->lookup_tables->remove_table(&filter_table_offset);
+  scene->lookup_tables->remove_table(&filter_table_offset_);
 }
 
-void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes)
+int Film::get_aov_offset(Scene *scene, string name, bool &is_color)
 {
-  if (Pass::contains(scene->passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
-    scene->geometry_manager->tag_update(scene, GeometryManager::UV_PASS_NEEDED);
+  int offset_color = 0, offset_value = 0;
+  foreach (const Pass *pass, scene->passes) {
+    if (pass->get_name() == name) {
+      if (pass->get_type() == PASS_AOV_VALUE) {
+        is_color = false;
+        return offset_value;
+      }
+      else if (pass->get_type() == PASS_AOV_COLOR) {
+        is_color = true;
+        return offset_color;
+      }
+    }
+
+    if (pass->get_type() == PASS_AOV_VALUE) {
+      offset_value += pass->get_info().num_components;
+    }
+    else if (pass->get_type() == PASS_AOV_COLOR) {
+      offset_color += pass->get_info().num_components;
+    }
+  }
+
+  return -1;
+}
+
+void Film::update_passes(Scene *scene, bool add_sample_count_pass)
+{
+  const Background *background = scene->background;
+  const BakeManager *bake_manager = scene->bake_manager;
+  const ObjectManager *object_manager = scene->object_manager;
+  Integrator *integrator = scene->integrator;
+
+  if (!is_modified() && !object_manager->need_update() && !integrator->is_modified()) {
+    return;
+  }
+
+  /* Remove auto generated passes and recreate them. */
+  remove_auto_passes(scene);
+
+  /* Display pass for viewport. */
+  const PassType display_pass = get_display_pass();
+  add_auto_pass(scene, display_pass);
+
+  /* Assumption is that a combined pass always exists for now, for example
+   * adaptive sampling is always based on a combined pass. But we should
+   * try to lift this limitation in the future for faster rendering of
+   * individual passes. */
+  if (display_pass != PASS_COMBINED) {
+    add_auto_pass(scene, PASS_COMBINED);
+  }
+
+  /* Create passes needed for adaptive sampling. */
+  const AdaptiveSampling adaptive_sampling = integrator->get_adaptive_sampling();
+  if (adaptive_sampling.use) {
+    add_auto_pass(scene, PASS_SAMPLE_COUNT);
+    add_auto_pass(scene, PASS_ADAPTIVE_AUX_BUFFER);
+  }
+
+  /* Create passes needed for denoising. */
+  const bool use_denoise = integrator->get_use_denoise();
+  if (use_denoise) {
+    if (integrator->get_use_denoise_pass_normal()) {
+      add_auto_pass(scene, PASS_DENOISING_NORMAL);
+    }
+    if (integrator->get_use_denoise_pass_albedo()) {
+      add_auto_pass(scene, PASS_DENOISING_ALBEDO);
+    }
+  }
+
+  /* Create passes for shadow catcher. */
+  if (scene->has_shadow_catcher()) {
+    const bool need_background = get_use_approximate_shadow_catcher() &&
+                                 !background->get_transparent();
+
+    add_auto_pass(scene, PASS_SHADOW_CATCHER);
+    add_auto_pass(scene, PASS_SHADOW_CATCHER_SAMPLE_COUNT);
+    add_auto_pass(scene, PASS_SHADOW_CATCHER_MATTE);
+
+    if (need_background) {
+      add_auto_pass(scene, PASS_BACKGROUND);
+    }
+  }
+  else if (Pass::contains(scene->passes, PASS_SHADOW_CATCHER)) {
+    add_auto_pass(scene, PASS_SHADOW_CATCHER);
+    add_auto_pass(scene, PASS_SHADOW_CATCHER_SAMPLE_COUNT);
+  }
+
+  const vector<Pass *> passes_immutable = scene->passes;
+  for (const Pass *pass : passes_immutable) {
+    const PassInfo info = pass->get_info();
+    /* Add utility passes needed to generate some light passes. */
+    if (info.divide_type != PASS_NONE) {
+      add_auto_pass(scene, info.divide_type);
+    }
+    if (info.direct_type != PASS_NONE) {
+      add_auto_pass(scene, info.direct_type);
+    }
+    if (info.indirect_type != PASS_NONE) {
+      add_auto_pass(scene, info.indirect_type);
+    }
+
+    /* NOTE: Enable all denoised passes when storage is requested.
+     * This way it is possible to tweak denoiser parameters later on. */
+    if (info.support_denoise && use_denoise) {
+      add_auto_pass(scene, pass->get_type(), PassMode::DENOISED);
+    }
+  }
+
+  if (bake_manager->get_baking()) {
+    add_auto_pass(scene, PASS_BAKE_PRIMITIVE, "BakePrimitive");
+    add_auto_pass(scene, PASS_BAKE_DIFFERENTIAL, "BakeDifferential");
+  }
+
+  if (add_sample_count_pass) {
+    if (!Pass::contains(scene->passes, PASS_SAMPLE_COUNT)) {
+      add_auto_pass(scene, PASS_SAMPLE_COUNT);
+    }
+  }
+
+  /* Remove duplicates and initialize internal pass info. */
+  finalize_passes(scene, use_denoise);
 
+  /* Flush scene updates. */
+  const bool have_uv_pass = Pass::contains(scene->passes, PASS_UV);
+  const bool have_motion_pass = Pass::contains(scene->passes, PASS_MOTION);
+  const bool have_ao_pass = Pass::contains(scene->passes, PASS_AO);
+
+  if (have_uv_pass != prev_have_uv_pass) {
+    scene->geometry_manager->tag_update(scene, GeometryManager::UV_PASS_NEEDED);
     foreach (Shader *shader, scene->shaders)
       shader->need_update_uvs = true;
   }
-  else if (Pass::contains(scene->passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) {
+  if (have_motion_pass != prev_have_motion_pass) {
     scene->geometry_manager->tag_update(scene, GeometryManager::MOTION_PASS_NEEDED);
   }
-  else if (Pass::contains(scene->passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) {
+  if (have_ao_pass != prev_have_ao_pass) {
     scene->integrator->tag_update(scene, Integrator::AO_PASS_MODIFIED);
   }
 
-  if (update_passes) {
-    scene->passes = passes_;
+  prev_have_uv_pass = have_uv_pass;
+  prev_have_motion_pass = have_motion_pass;
+  prev_have_ao_pass = have_ao_pass;
+
+  tag_modified();
+
+  /* Debug logging. */
+  if (VLOG_IS_ON(2)) {
+    VLOG(2) << "Effective scene passes:";
+    for (const Pass *pass : scene->passes) {
+      VLOG(2) << "- " << *pass;
+    }
   }
 }
 
-int Film::get_aov_offset(Scene *scene, string name, bool &is_color)
+void Film::add_auto_pass(Scene *scene, PassType type, const char *name)
 {
-  int num_color = 0, num_value = 0;
-  foreach (const Pass &pass, scene->passes) {
-    if (pass.type == PASS_AOV_COLOR) {
-      num_color++;
-    }
-    else if (pass.type == PASS_AOV_VALUE) {
-      num_value++;
+  add_auto_pass(scene, type, PassMode::NOISY, name);
+}
+
+void Film::add_auto_pass(Scene *scene, PassType type, PassMode mode, const char *name)
+{
+  Pass *pass = new Pass();
+  pass->set_type(type);
+  pass->set_mode(mode);
+  pass->set_name(ustring((name) ? name : ""));
+  pass->is_auto_ = true;
+
+  pass->set_owner(scene);
+  scene->passes.push_back(pass);
+}
+
+void Film::remove_auto_passes(Scene *scene)
+{
+  /* Remove all passes which were automatically created. */
+  vector<Pass *> new_passes;
+
+  for (Pass *pass : scene->passes) {
+    if (!pass->is_auto_) {
+      new_passes.push_back(pass);
     }
     else {
-      continue;
-    }
-
-    if (pass.name == name) {
-      is_color = (pass.type == PASS_AOV_COLOR);
-      return (is_color ? num_color : num_value) - 1;
+      delete pass;
     }
   }
 
-  return -1;
+  scene->passes = new_passes;
 }
 
-int Film::get_pass_stride() const
+static bool compare_pass_order(const Pass *a, const Pass *b)
 {
-  return pass_stride;
-}
+  const int num_components_a = a->get_info().num_components;
+  const int num_components_b = b->get_info().num_components;
 
-int Film::get_denoising_data_offset() const
-{
-  return denoising_data_offset;
+  if (num_components_a == num_components_b) {
+    return (a->get_type() < b->get_type());
+  }
+
+  return num_components_a > num_components_b;
 }
 
-int Film::get_denoising_clean_offset() const
+void Film::finalize_passes(Scene *scene, const bool use_denoise)
 {
-  return denoising_clean_offset;
+  /* Remove duplicate passes. */
+  vector<Pass *> new_passes;
+
+  for (Pass *pass : scene->passes) {
+    /* Disable denoising on passes if denoising is disabled, or if the
+     * pass does not support it. */
+    pass->set_mode((use_denoise && pass->get_info().support_denoise) ? pass->get_mode() :
+                                                                       PassMode::NOISY);
+
+    /* Merge duplicate passes. */
+    bool duplicate_found = false;
+    for (Pass *new_pass : new_passes) {
+      /* If different type or denoising, don't merge. */
+      if (new_pass->get_type() != pass->get_type() || new_pass->get_mode() != pass->get_mode()) {
+        continue;
+      }
+
+      /* If both passes have a name and the names are different, don't merge.
+       * If either pass has a name, we'll use that name. */
+      if (!pass->get_name().empty() && !new_pass->get_name().empty() &&
+          pass->get_name() != new_pass->get_name()) {
+        continue;
+      }
+
+      if (!pass->get_name().empty() && new_pass->get_name().empty()) {
+        new_pass->set_name(pass->get_name());
+      }
+
+      new_pass->is_auto_ &= pass->is_auto_;
+      duplicate_found = true;
+      break;
+    }
+
+    if (!duplicate_found) {
+      new_passes.push_back(pass);
+    }
+    else {
+      delete pass;
+    }
+  }
+
+  /* Order from by components and type, This is required to for AOVs and cryptomatte passes,
+   * which the kernel assumes to be in order. Note this must use stable sort so cryptomatte
+   * passes remain in the right order. */
+  stable_sort(new_passes.begin(), new_passes.end(), compare_pass_order);
+
+  scene->passes = new_passes;
 }
 
-size_t Film::get_filter_table_offset() const
+uint Film::get_kernel_features(const Scene *scene) const
 {
-  return filter_table_offset;
+  uint kernel_features = 0;
+
+  for (const Pass *pass : scene->passes) {
+    if (!pass->is_written()) {
+      continue;
+    }
+
+    const PassType pass_type = pass->get_type();
+    const PassMode pass_mode = pass->get_mode();
+
+    if (pass_mode == PassMode::DENOISED || pass_type == PASS_DENOISING_NORMAL ||
+        pass_type == PASS_DENOISING_ALBEDO) {
+      kernel_features |= KERNEL_FEATURE_DENOISING;
+    }
+
+    if (pass_type != PASS_NONE && pass_type != PASS_COMBINED &&
+        pass_type <= PASS_CATEGORY_LIGHT_END) {
+      kernel_features |= KERNEL_FEATURE_LIGHT_PASSES;
+
+      if (pass_type == PASS_SHADOW) {
+        kernel_features |= KERNEL_FEATURE_SHADOW_PASS;
+      }
+    }
+
+    if (pass_type == PASS_AO) {
+      kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE;
+    }
+  }
+
+  return kernel_features;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h
index 462a7275491..5d327353361 100644
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -17,6 +17,7 @@
 #ifndef __FILM_H__
 #define __FILM_H__
 
+#include "render/pass.h"
 #include "util/util_string.h"
 #include "util/util_vector.h"
 
@@ -38,36 +39,15 @@ typedef enum FilterType {
   FILTER_NUM_TYPES,
 } FilterType;
 
-class Pass : public Node {
- public:
-  NODE_DECLARE
-
-  Pass();
-
-  PassType type;
-  int components;
-  bool filter;
-  bool exposure;
-  PassType divide_type;
-  ustring name;
-
-  static void add(PassType type, vector<Pass> &passes, const char *name = NULL);
-  static bool equals(const vector<Pass> &A, const vector<Pass> &B);
-  static bool contains(const vector<Pass> &passes, PassType);
-};
-
 class Film : public Node {
  public:
   NODE_DECLARE
 
   NODE_SOCKET_API(float, exposure)
-  NODE_SOCKET_API(bool, denoising_data_pass)
-  NODE_SOCKET_API(bool, denoising_clean_pass)
-  NODE_SOCKET_API(bool, denoising_prefiltered_pass)
-  NODE_SOCKET_API(int, denoising_flags)
   NODE_SOCKET_API(float, pass_alpha_threshold)
 
   NODE_SOCKET_API(PassType, display_pass)
+  NODE_SOCKET_API(bool, show_active_pixels)
 
   NODE_SOCKET_API(FilterType, filter_type)
   NODE_SOCKET_API(float, filter_width)
@@ -76,17 +56,18 @@ class Film : public Node {
   NODE_SOCKET_API(float, mist_depth)
   NODE_SOCKET_API(float, mist_falloff)
 
-  NODE_SOCKET_API(bool, use_light_visibility)
   NODE_SOCKET_API(CryptomatteType, cryptomatte_passes)
   NODE_SOCKET_API(int, cryptomatte_depth)
 
-  NODE_SOCKET_API(bool, use_adaptive_sampling)
+  /* Approximate shadow catcher pass into its matte pass, so that both artificial objects and
+   * shadows can be alpha-overed onto a backdrop. */
+  NODE_SOCKET_API(bool, use_approximate_shadow_catcher)
 
  private:
-  int pass_stride;
-  int denoising_data_offset;
-  int denoising_clean_offset;
-  size_t filter_table_offset;
+  size_t filter_table_offset_;
+  bool prev_have_uv_pass = false;
+  bool prev_have_motion_pass = false;
+  bool prev_have_ao_pass = false;
 
  public:
   Film();
@@ -98,14 +79,20 @@ class Film : public Node {
   void device_update(Device *device, DeviceScene *dscene, Scene *scene);
   void device_free(Device *device, DeviceScene *dscene, Scene *scene);
 
-  void tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes = true);
-
   int get_aov_offset(Scene *scene, string name, bool &is_color);
 
-  int get_pass_stride() const;
-  int get_denoising_data_offset() const;
-  int get_denoising_clean_offset() const;
-  size_t get_filter_table_offset() const;
+  /* Update passes so that they contain all passes required for the configured functionality.
+   *
+   * If `add_sample_count_pass` is true then the SAMPLE_COUNT pass is ensured to be added. */
+  void update_passes(Scene *scene, bool add_sample_count_pass);
+
+  uint get_kernel_features(const Scene *scene) const;
+
+ private:
+  void add_auto_pass(Scene *scene, PassType type, const char *name = nullptr);
+  void add_auto_pass(Scene *scene, PassType type, PassMode mode, const char *name = nullptr);
+  void remove_auto_passes(Scene *scene);
+  void finalize_passes(Scene *scene, const bool use_denoise);
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp
index 7ec1d2d9abb..6804a006fe6 100644
--- a/intern/cycles/render/geometry.cpp
+++ b/intern/cycles/render/geometry.cpp
@@ -215,6 +215,12 @@ void Geometry::compute_bvh(
       msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
 
     Object object;
+
+    /* Ensure all visibility bits are set at the geometry level BVH. In
+     * the object level BVH is where actual visibility is tested. */
+    object.set_is_shadow_catcher(true);
+    object.set_visibility(~0);
+
     object.set_geometry(this);
 
     vector<Geometry *> geometry;
@@ -315,7 +321,7 @@ void GeometryManager::update_osl_attributes(Device *device,
 {
 #ifdef WITH_OSL
   /* for OSL, a hash map is used to lookup the attribute by name. */
-  OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+  OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
 
   og->object_name_map.clear();
   og->attribute_map.clear();
@@ -1855,8 +1861,8 @@ void GeometryManager::device_update(Device *device,
     });
 
     Camera *dicing_camera = scene->dicing_camera;
-    dicing_camera->set_screen_size_and_resolution(
-        dicing_camera->get_full_width(), dicing_camera->get_full_height(), 1);
+    dicing_camera->set_screen_size(dicing_camera->get_full_width(),
+                                   dicing_camera->get_full_height());
     dicing_camera->update(scene);
 
     size_t i = 0;
@@ -2157,7 +2163,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc
   dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE;
 
 #ifdef WITH_OSL
-  OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+  OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
 
   if (og) {
     og->object_name_map.clear();
diff --git a/intern/cycles/render/gpu_display.cpp b/intern/cycles/render/gpu_display.cpp
new file mode 100644
index 00000000000..a8f0cc50583
--- /dev/null
+++ b/intern/cycles/render/gpu_display.cpp
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/gpu_display.h"
+
+#include "render/buffers.h"
+#include "util/util_logging.h"
+
+CCL_NAMESPACE_BEGIN
+
+void GPUDisplay::reset(const BufferParams &buffer_params)
+{
+  thread_scoped_lock lock(mutex_);
+
+  const GPUDisplayParams old_params = params_;
+
+  params_.offset = make_int2(buffer_params.full_x, buffer_params.full_y);
+  params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height);
+  params_.size = make_int2(buffer_params.width, buffer_params.height);
+
+  /* If the parameters did change tag texture as unusable. This avoids drawing old texture content
+   * in an updated configuration of the viewport. For example, avoids drawing old frame when render
+   * border did change.
+   * If the parameters did not change, allow drawing the current state of the texture, which will
+   * not count as an up-to-date redraw. This will avoid flickering when doping camera navigation by
+   * showing a previously rendered frame for until the new one is ready. */
+  if (old_params.modified(params_)) {
+    texture_state_.is_usable = false;
+  }
+
+  texture_state_.is_outdated = true;
+}
+
+void GPUDisplay::mark_texture_updated()
+{
+  texture_state_.is_outdated = false;
+  texture_state_.is_usable = true;
+}
+
+/* --------------------------------------------------------------------
+ * Update procedure.
+ */
+
+bool GPUDisplay::update_begin(int texture_width, int texture_height)
+{
+  DCHECK(!update_state_.is_active);
+
+  if (update_state_.is_active) {
+    LOG(ERROR) << "Attempt to re-activate update process.";
+    return false;
+  }
+
+  /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
+   * The update itself is non-blocking however, for better performance and to avoid
+   * potential deadlocks due to locks held by the subclass. */
+  GPUDisplayParams params;
+  {
+    thread_scoped_lock lock(mutex_);
+    params = params_;
+    texture_state_.size = make_int2(texture_width, texture_height);
+  }
+
+  if (!do_update_begin(params, texture_width, texture_height)) {
+    LOG(ERROR) << "GPUDisplay implementation could not begin update.";
+    return false;
+  }
+
+  update_state_.is_active = true;
+
+  return true;
+}
+
+void GPUDisplay::update_end()
+{
+  DCHECK(update_state_.is_active);
+
+  if (!update_state_.is_active) {
+    LOG(ERROR) << "Attempt to deactivate inactive update process.";
+    return;
+  }
+
+  do_update_end();
+
+  update_state_.is_active = false;
+}
+
+int2 GPUDisplay::get_texture_size() const
+{
+  return texture_state_.size;
+}
+
+/* --------------------------------------------------------------------
+ * Texture update from CPU buffer.
+ */
+
+void GPUDisplay::copy_pixels_to_texture(
+    const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
+{
+  DCHECK(update_state_.is_active);
+
+  if (!update_state_.is_active) {
+    LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
+    return;
+  }
+
+  mark_texture_updated();
+  do_copy_pixels_to_texture(rgba_pixels, texture_x, texture_y, pixels_width, pixels_height);
+}
+
+/* --------------------------------------------------------------------
+ * Texture buffer mapping.
+ */
+
+half4 *GPUDisplay::map_texture_buffer()
+{
+  DCHECK(!texture_buffer_state_.is_mapped);
+  DCHECK(update_state_.is_active);
+
+  if (texture_buffer_state_.is_mapped) {
+    LOG(ERROR) << "Attempt to re-map an already mapped texture buffer.";
+    return nullptr;
+  }
+
+  if (!update_state_.is_active) {
+    LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
+    return nullptr;
+  }
+
+  half4 *mapped_rgba_pixels = do_map_texture_buffer();
+
+  if (mapped_rgba_pixels) {
+    texture_buffer_state_.is_mapped = true;
+  }
+
+  return mapped_rgba_pixels;
+}
+
+void GPUDisplay::unmap_texture_buffer()
+{
+  DCHECK(texture_buffer_state_.is_mapped);
+
+  if (!texture_buffer_state_.is_mapped) {
+    LOG(ERROR) << "Attempt to unmap non-mapped texture buffer.";
+    return;
+  }
+
+  texture_buffer_state_.is_mapped = false;
+
+  mark_texture_updated();
+  do_unmap_texture_buffer();
+}
+
+/* --------------------------------------------------------------------
+ * Graphics interoperability.
+ */
+
+DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get()
+{
+  DCHECK(!texture_buffer_state_.is_mapped);
+  DCHECK(update_state_.is_active);
+
+  if (texture_buffer_state_.is_mapped) {
+    LOG(ERROR)
+        << "Attempt to use graphics interoperability mode while the texture buffer is mapped.";
+    return DeviceGraphicsInteropDestination();
+  }
+
+  if (!update_state_.is_active) {
+    LOG(ERROR) << "Attempt to use graphics interoperability outside of GPUDisplay update.";
+    return DeviceGraphicsInteropDestination();
+  }
+
+  /* Assume that interop will write new values to the texture. */
+  mark_texture_updated();
+
+  return do_graphics_interop_get();
+}
+
+void GPUDisplay::graphics_interop_activate()
+{
+}
+
+void GPUDisplay::graphics_interop_deactivate()
+{
+}
+
+/* --------------------------------------------------------------------
+ * Drawing.
+ */
+
+bool GPUDisplay::draw()
+{
+  /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
+   * The drawing itself is non-blocking however, for better performance and to avoid
+   * potential deadlocks due to locks held by the subclass. */
+  GPUDisplayParams params;
+  bool is_usable;
+  bool is_outdated;
+
+  {
+    thread_scoped_lock lock(mutex_);
+    params = params_;
+    is_usable = texture_state_.is_usable;
+    is_outdated = texture_state_.is_outdated;
+  }
+
+  if (is_usable) {
+    do_draw(params);
+  }
+
+  return !is_outdated;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/gpu_display.h b/intern/cycles/render/gpu_display.h
new file mode 100644
index 00000000000..a01348d28d5
--- /dev/null
+++ b/intern/cycles/render/gpu_display.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/device_graphics_interop.h"
+#include "util/util_half.h"
+#include "util/util_thread.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BufferParams;
+
+/* GPUDisplay class takes care of drawing render result in a viewport. The render result is stored
+ * in a GPU-side texture, which is updated from a path tracer and drawn by an application.
+ *
+ * The base GPUDisplay does some special texture state tracking, which allows render Session to
+ * make decisions on whether reset for an updated state is possible or not. This state should only
+ * be tracked in a base class and a particular implementation should not worry about it.
+ *
+ * The subclasses should only implement the pure virtual methods, which allows them to not worry
+ * about parent method calls, which helps them to be as small and reliable as possible. */
+
+class GPUDisplayParams {
+ public:
+  /* Offset of the display within a viewport.
+   * For example, set to a lower-bottom corner of border render in Blender's viewport. */
+  int2 offset = make_int2(0, 0);
+
+  /* Full viewport size.
+   *
+   * NOTE: Is not affected by the resolution divider. */
+  int2 full_size = make_int2(0, 0);
+
+  /* Effective vieport size.
+   * In the case of border render, size of the border rectangle.
+   *
+   * NOTE: Is not affected by the resolution divider. */
+  int2 size = make_int2(0, 0);
+
+  bool modified(const GPUDisplayParams &other) const
+  {
+    return !(offset == other.offset && full_size == other.full_size && size == other.size);
+  }
+};
+
+class GPUDisplay {
+ public:
+  GPUDisplay() = default;
+  virtual ~GPUDisplay() = default;
+
+  /* Reset the display for the new state of render session. Is called whenever session is reset,
+   * which happens on changes like viewport navigation or viewport dimension change.
+   *
+   * This call will configure parameters for a changed buffer and reset the texture state. */
+  void reset(const BufferParams &buffer_params);
+
+  const GPUDisplayParams &get_params() const
+  {
+    return params_;
+  }
+
+  /* --------------------------------------------------------------------
+   * Update procedure.
+   *
+   * These calls indicates a desire of the caller to update content of the displayed texture. */
+
+  /* Returns true when update is ready. Update should be finished with update_end().
+   *
+   * If false is returned then no update is possible, and no update_end() call is needed.
+   *
+   * The texture width and height denotes an actual resolution of the underlying render result. */
+  bool update_begin(int texture_width, int texture_height);
+
+  void update_end();
+
+  /* Get currently configured texture size of the display (as configured by `update_begin()`. */
+  int2 get_texture_size() const;
+
+  /* --------------------------------------------------------------------
+   * Texture update from CPU buffer.
+   *
+   * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
+   *
+   * Most portable implementation, which must be supported by all platforms. Might not be the most
+   * efficient one.
+   */
+
+  /* Copy buffer of rendered pixels of a given size into a given position of the texture.
+   *
+   * This function does not acquire a lock. The reason for this is is to allow use of this function
+   * for partial updates from different devices. In this case the caller will acquire the lock
+   * once, update all the slices and release
+   * the lock once. This will ensure that draw() will never use partially updated texture. */
+  void copy_pixels_to_texture(
+      const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height);
+
+  /* --------------------------------------------------------------------
+   * Texture buffer mapping.
+   *
+   * This functionality is used to update GPU-side texture content without need to maintain CPU
+   * side buffer on the caller.
+   *
+   * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
+   *
+   * NOTE: Texture buffer can not be mapped while graphics interoperability is active. This means
+   * that `map_texture_buffer()` is not allowed between `graphics_interop_begin()` and
+   * `graphics_interop_end()` calls.
+   */
+
+  /* Map pixels memory form texture to a buffer available for write from CPU. Width and height will
+   * define a requested size of the texture to write to.
+   * Upon success a non-null pointer is returned and the texture buffer is to be unmapped.
+   * If an error happens during mapping, or if mapping is not supported by this GPU display a
+   * null pointer is returned and the buffer is NOT to be unmapped.
+   *
+   * NOTE: Usually the implementation will rely on a GPU context of some sort, and the GPU context
+   * is often can not be bound to two threads simultaneously, and can not be released from a
+   * different thread. This means that the mapping API should be used from the single thread only,
+   */
+  half4 *map_texture_buffer();
+  void unmap_texture_buffer();
+
+  /* --------------------------------------------------------------------
+   * Graphics interoperability.
+   *
+   * A special code path which allows to update texture content directly from the GPU compute
+   * device. Complementary part of DeviceGraphicsInterop.
+   *
+   * NOTE: Graphics interoperability can not be used while the texture buffer is mapped. This means
+   * that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and
+   * `unmap_texture_buffer()` calls. */
+
+  /* Get GPUDisplay graphics interoperability information which acts as a destination for the
+   * device API. */
+  DeviceGraphicsInteropDestination graphics_interop_get();
+
+  /* (De)activate GPU display for graphics interoperability outside of regular display update
+   * routines. */
+  virtual void graphics_interop_activate();
+  virtual void graphics_interop_deactivate();
+
+  /* --------------------------------------------------------------------
+   * Drawing.
+   */
+
+  /* Clear the texture by filling it with all zeroes.
+   *
+   * This call might happen in parallel with draw, but can never happen in parallel with the
+   * update.
+   *
+   * The actual zero-ing can be deferred to a later moment. What is important is that after clear
+   * and before pixels update the drawing texture will be fully empty, and that partial update
+   * after clear will write new pixel values for an updating area, leaving everything else zeroed.
+   *
+   * If the GPU display supports graphics interoperability then the zeroing the display is to be
+   * delegated to the device via the `DeviceGraphicsInteropDestination`. */
+  virtual void clear() = 0;
+
+  /* Draw the current state of the texture.
+   *
+   * Returns true if this call did draw an updated state of the texture. */
+  bool draw();
+
+ protected:
+  /* Implementation-specific calls which subclasses are to implement.
+   * These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to
+   * simplify their particular implementation. */
+  virtual bool do_update_begin(const GPUDisplayParams &params,
+                               int texture_width,
+                               int texture_height) = 0;
+  virtual void do_update_end() = 0;
+
+  virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
+                                         int texture_x,
+                                         int texture_y,
+                                         int pixels_width,
+                                         int pixels_height) = 0;
+
+  virtual half4 *do_map_texture_buffer() = 0;
+  virtual void do_unmap_texture_buffer() = 0;
+
+  /* Note that this might be called in parallel to do_update_begin() and do_update_end(),
+   * the subclass is responsible for appropriate mutex locks to avoid multiple threads
+   * editing and drawing the texture at the same time. */
+  virtual void do_draw(const GPUDisplayParams &params) = 0;
+
+  virtual DeviceGraphicsInteropDestination do_graphics_interop_get() = 0;
+
+ private:
+  thread_mutex mutex_;
+  GPUDisplayParams params_;
+
+  /* Mark texture as its content has been updated.
+   * Used from places which knows that the texture content has been brought up-to-date, so that the
+   * drawing knows whether it can be performed, and whether drawing happened with an up-to-date
+   * texture state. */
+  void mark_texture_updated();
+
+  /* State of the update process. */
+  struct {
+    /* True when update is in process, indicated by `update_begin()` / `update_end()`. */
+    bool is_active = false;
+  } update_state_;
+
+  /* State of the texture, which is needed for an integration with render session and interactive
+   * updates and navigation. */
+  struct {
+    /* Denotes whether possibly existing state of GPU side texture is still usable.
+     * It will not be usable in cases like render border did change (in this case we don't want
+     * previous texture to be rendered at all).
+     *
+     * However, if only navigation or object in scene did change, then the outdated state of the
+     * texture is still usable for draw, preventing display viewport flickering on navigation and
+     * object modifications. */
+    bool is_usable = false;
+
+    /* Texture is considered outdated after `reset()` until the next call of
+     * `copy_pixels_to_texture()`. */
+    bool is_outdated = true;
+
+    /* Texture size in pixels. */
+    int2 size = make_int2(0, 0);
+  } texture_state_;
+
+  /* State of the texture buffer. Is tracked to perform sanity checks. */
+  struct {
+    /* True when the texture buffer is mapped with `map_texture_buffer()`. */
+    bool is_mapped = false;
+  } texture_buffer_state_;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h
index 5102b182593..3584754fad1 100644
--- a/intern/cycles/render/graph.h
+++ b/intern/cycles/render/graph.h
@@ -224,10 +224,6 @@ class ShaderNode : public Node {
   {
     return false;
   }
-  virtual bool has_raytrace()
-  {
-    return false;
-  }
   vector<ShaderInput *> inputs;
   vector<ShaderOutput *> outputs;
 
@@ -242,22 +238,13 @@ class ShaderNode : public Node {
    * that those functions are for selective compilation only?
    */
 
-  /* Nodes are split into several groups, group of level 0 contains
-   * nodes which are most commonly used, further levels are extension
-   * of previous one and includes less commonly used nodes.
-   */
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_0;
-  }
-
   /* Node feature are used to disable huge nodes inside the group,
    * so it's possible to disable huge nodes inside of the required
    * nodes group.
    */
   virtual int get_feature()
   {
-    return bump == SHADER_BUMP_NONE ? 0 : NODE_FEATURE_BUMP;
+    return bump == SHADER_BUMP_NONE ? 0 : KERNEL_FEATURE_NODE_BUMP;
   }
 
   /* Get closure ID to which the node compiles into. */
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index d8749cec9fa..d74d14242bb 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -53,6 +53,8 @@ NODE_DEFINE(Integrator)
   SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7);
 
   SOCKET_INT(ao_bounces, "AO Bounces", 0);
+  SOCKET_FLOAT(ao_factor, "AO Factor", 0.0f);
+  SOCKET_FLOAT(ao_distance, "AO Distance", FLT_MAX);
 
   SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024);
   SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f);
@@ -66,33 +68,39 @@ NODE_DEFINE(Integrator)
   SOCKET_BOOLEAN(motion_blur, "Motion Blur", false);
 
   SOCKET_INT(aa_samples, "AA Samples", 0);
-  SOCKET_INT(diffuse_samples, "Diffuse Samples", 1);
-  SOCKET_INT(glossy_samples, "Glossy Samples", 1);
-  SOCKET_INT(transmission_samples, "Transmission Samples", 1);
-  SOCKET_INT(ao_samples, "AO Samples", 1);
-  SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1);
-  SOCKET_INT(subsurface_samples, "Subsurface Samples", 1);
-  SOCKET_INT(volume_samples, "Volume Samples", 1);
   SOCKET_INT(start_sample, "Start Sample", 0);
 
+  SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);
   SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
   SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
 
-  SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
-  SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
   SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
 
-  static NodeEnum method_enum;
-  method_enum.insert("path", PATH);
-  method_enum.insert("branched_path", BRANCHED_PATH);
-  SOCKET_ENUM(method, "Method", method_enum, PATH);
-
   static NodeEnum sampling_pattern_enum;
   sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
-  sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
   sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
   SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
 
+  static NodeEnum denoiser_type_enum;
+  denoiser_type_enum.insert("optix", DENOISER_OPTIX);
+  denoiser_type_enum.insert("openimagedenoise", DENOISER_OPENIMAGEDENOISE);
+
+  static NodeEnum denoiser_prefilter_enum;
+  denoiser_prefilter_enum.insert("none", DENOISER_PREFILTER_NONE);
+  denoiser_prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST);
+  denoiser_prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE);
+
+  /* Default to accurate denoising with OpenImageDenoise. For interactive viewport
+   * it's best use OptiX and disable the normal pass since it does not always have
+   * the desired effect for that denoiser. */
+  SOCKET_BOOLEAN(use_denoise, "Use Denoiser", false);
+  SOCKET_ENUM(denoiser_type, "Denoiser Type", denoiser_type_enum, DENOISER_OPENIMAGEDENOISE);
+  SOCKET_INT(denoise_start_sample, "Start Sample to Denoise", 0);
+  SOCKET_BOOLEAN(use_denoise_pass_albedo, "Use Albedo Pass for Denoiser", true);
+  SOCKET_BOOLEAN(use_denoise_pass_normal, "Use Normal Pass for Denoiser", true);
+  SOCKET_ENUM(
+      denoiser_prefilter, "Denoiser Type", denoiser_prefilter_enum, DENOISER_PREFILTER_ACCURATE);
+
   return type;
 }
 
@@ -115,13 +123,20 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
     }
   });
 
-  const bool need_update_lut = ao_samples_is_modified() || diffuse_samples_is_modified() ||
-                               glossy_samples_is_modified() || max_bounce_is_modified() ||
-                               max_transmission_bounce_is_modified() ||
-                               mesh_light_samples_is_modified() || method_is_modified() ||
-                               sampling_pattern_is_modified() ||
-                               subsurface_samples_is_modified() ||
-                               transmission_samples_is_modified() || volume_samples_is_modified();
+  KernelIntegrator *kintegrator = &dscene->data.integrator;
+
+  /* Adaptive sampling requires PMJ samples.
+   *
+   * This also makes detection of sampling pattern a bit more involved: can not rely on the changed
+   * state of socket, since its value might be different from the effective value used here. So
+   * instead compare with previous value in the KernelIntegrator. Only do it if the device was
+   * updated once (in which case the `sample_pattern_lut` will be allocated to a non-zero size). */
+  const SamplingPattern new_sampling_pattern = (use_adaptive_sampling) ? SAMPLING_PATTERN_PMJ :
+                                                                         sampling_pattern;
+
+  const bool need_update_lut = max_bounce_is_modified() || max_transmission_bounce_is_modified() ||
+                               dscene->sample_pattern_lut.size() == 0 ||
+                               kintegrator->sampling_pattern != new_sampling_pattern;
 
   if (need_update_lut) {
     dscene->sample_pattern_lut.tag_realloc();
@@ -129,8 +144,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
 
   device_free(device, dscene);
 
-  KernelIntegrator *kintegrator = &dscene->data.integrator;
-
   /* integrator parameters */
   kintegrator->min_bounce = min_bounce + 1;
   kintegrator->max_bounce = max_bounce + 1;
@@ -143,12 +156,9 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
   kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
   kintegrator->transparent_max_bounce = transparent_max_bounce + 1;
 
-  if (ao_bounces == 0) {
-    kintegrator->ao_bounces = INT_MAX;
-  }
-  else {
-    kintegrator->ao_bounces = ao_bounces - 1;
-  }
+  kintegrator->ao_bounces = ao_bounces;
+  kintegrator->ao_bounces_distance = ao_distance;
+  kintegrator->ao_bounces_factor = ao_factor;
 
   /* Transparent Shadows
    * We only need to enable transparent shadows, if we actually have
@@ -171,10 +181,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
   kintegrator->caustics_refractive = caustics_refractive;
   kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy;
 
-  kintegrator->seed = hash_uint2(seed, 0);
-
-  kintegrator->use_ambient_occlusion = ((Pass::contains(scene->passes, PASS_AO)) ||
-                                        dscene->data.background.ao_factor != 0.0f);
+  kintegrator->seed = seed;
 
   kintegrator->sample_clamp_direct = (sample_clamp_direct == 0.0f) ? FLT_MAX :
                                                                      sample_clamp_direct * 3.0f;
@@ -182,51 +189,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
                                            FLT_MAX :
                                            sample_clamp_indirect * 3.0f;
 
-  kintegrator->branched = (method == BRANCHED_PATH) && device->info.has_branched_path;
-  kintegrator->volume_decoupled = device->info.has_volume_decoupled;
-  kintegrator->diffuse_samples = diffuse_samples;
-  kintegrator->glossy_samples = glossy_samples;
-  kintegrator->transmission_samples = transmission_samples;
-  kintegrator->ao_samples = ao_samples;
-  kintegrator->mesh_light_samples = mesh_light_samples;
-  kintegrator->subsurface_samples = subsurface_samples;
-  kintegrator->volume_samples = volume_samples;
-  kintegrator->start_sample = start_sample;
-
-  if (kintegrator->branched) {
-    kintegrator->sample_all_lights_direct = sample_all_lights_direct;
-    kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
-  }
-  else {
-    kintegrator->sample_all_lights_direct = false;
-    kintegrator->sample_all_lights_indirect = false;
-  }
-
-  kintegrator->sampling_pattern = sampling_pattern;
-  kintegrator->aa_samples = aa_samples;
-  if (aa_samples > 0 && adaptive_min_samples == 0) {
-    kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
-    VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
-            << kintegrator->adaptive_min_samples;
-  }
-  else {
-    kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
-  }
-
-  kintegrator->adaptive_step = 4;
-  kintegrator->adaptive_stop_per_sample = device->info.has_adaptive_stop_per_sample;
-
-  /* Adaptive step must be a power of two for bitwise operations to work. */
-  assert((kintegrator->adaptive_step & (kintegrator->adaptive_step - 1)) == 0);
-
-  if (aa_samples > 0 && adaptive_threshold == 0.0f) {
-    kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
-    VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
-            << kintegrator->adaptive_threshold;
-  }
-  else {
-    kintegrator->adaptive_threshold = adaptive_threshold;
-  }
+  kintegrator->sampling_pattern = new_sampling_pattern;
 
   if (light_sampling_threshold > 0.0f) {
     kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -236,29 +199,15 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
   }
 
   /* sobol directions table */
-  int max_samples = 1;
-
-  if (kintegrator->branched) {
-    foreach (Light *light, scene->lights)
-      max_samples = max(max_samples, light->get_samples());
-
-    max_samples = max(max_samples,
-                      max(diffuse_samples, max(glossy_samples, transmission_samples)));
-    max_samples = max(max_samples, max(ao_samples, max(mesh_light_samples, subsurface_samples)));
-    max_samples = max(max_samples, volume_samples);
-  }
-
-  uint total_bounces = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX +
-                       max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES);
-
-  max_samples *= total_bounces;
+  int max_samples = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX +
+                    max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES);
 
   int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
   dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
 
   if (need_update_lut) {
-    if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
-      uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);
+    if (kintegrator->sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+      uint *directions = (uint *)dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);
 
       sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
 
@@ -276,10 +225,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
             function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
       }
       pool.wait_work();
+
       dscene->sample_pattern_lut.copy_to_device();
     }
   }
 
+  kintegrator->has_shadow_catcher = scene->has_shadow_catcher();
+
   dscene->sample_pattern_lut.clear_modified();
   clear_modified();
 }
@@ -295,17 +247,12 @@ void Integrator::tag_update(Scene *scene, uint32_t flag)
     tag_modified();
   }
 
-  if (flag & (AO_PASS_MODIFIED | BACKGROUND_AO_MODIFIED)) {
+  if (flag & AO_PASS_MODIFIED) {
     /* tag only the ao_bounces socket as modified so we avoid updating sample_pattern_lut
      * unnecessarily */
     tag_ao_bounces_modified();
   }
 
-  if ((flag & LIGHT_SAMPLES_MODIFIED) && (method == BRANCHED_PATH)) {
-    /* the number of light samples may affect the size of the sample_pattern_lut */
-    tag_sampling_pattern_modified();
-  }
-
   if (filter_glossy_is_modified()) {
     foreach (Shader *shader, scene->shaders) {
       if (shader->has_integrator_dependency) {
@@ -321,4 +268,65 @@ void Integrator::tag_update(Scene *scene, uint32_t flag)
   }
 }
 
+AdaptiveSampling Integrator::get_adaptive_sampling() const
+{
+  AdaptiveSampling adaptive_sampling;
+
+  adaptive_sampling.use = use_adaptive_sampling;
+
+  if (!adaptive_sampling.use) {
+    return adaptive_sampling;
+  }
+
+  if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+    adaptive_sampling.threshold = max(0.001f, 1.0f / (float)aa_samples);
+    VLOG(1) << "Cycles adaptive sampling: automatic threshold = " << adaptive_sampling.threshold;
+  }
+  else {
+    adaptive_sampling.threshold = adaptive_threshold;
+  }
+
+  if (adaptive_sampling.threshold > 0 && adaptive_min_samples == 0) {
+    /* Threshold 0.1 -> 32, 0.01 -> 64, 0.001 -> 128.
+     * This is highly scene dependent, we make a guess that seemed to work well
+     * in various test scenes. */
+    const int min_samples = (int)ceilf(16.0f / powf(adaptive_sampling.threshold, 0.3f));
+    adaptive_sampling.min_samples = max(4, min_samples);
+    VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+            << adaptive_sampling.min_samples;
+  }
+  else {
+    adaptive_sampling.min_samples = max(4, adaptive_min_samples);
+  }
+
+  /* Arbitrary factor that makes the threshold more similar to what is was before,
+   * and gives arguably more intuitive values. */
+  adaptive_sampling.threshold *= 5.0f;
+
+  adaptive_sampling.adaptive_step = 16;
+
+  DCHECK(is_power_of_two(adaptive_sampling.adaptive_step))
+      << "Adaptive step must be a power of two for bitwise operations to work";
+
+  return adaptive_sampling;
+}
+
+DenoiseParams Integrator::get_denoise_params() const
+{
+  DenoiseParams denoise_params;
+
+  denoise_params.use = use_denoise;
+
+  denoise_params.type = denoiser_type;
+
+  denoise_params.start_sample = denoise_start_sample;
+
+  denoise_params.use_pass_albedo = use_denoise_pass_albedo;
+  denoise_params.use_pass_normal = use_denoise_pass_normal;
+
+  denoise_params.prefilter = denoiser_prefilter;
+
+  return denoise_params;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 4eeeda92d41..32e108d62ca 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -19,7 +19,9 @@
 
 #include "kernel/kernel_types.h"
 
+#include "device/device_denoise.h" /* For the paramaters and type enum. */
 #include "graph/node.h"
+#include "integrator/adaptive_sampling.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -43,6 +45,8 @@ class Integrator : public Node {
   NODE_SOCKET_API(int, transparent_max_bounce)
 
   NODE_SOCKET_API(int, ao_bounces)
+  NODE_SOCKET_API(float, ao_factor)
+  NODE_SOCKET_API(float, ao_distance)
 
   NODE_SOCKET_API(int, volume_max_steps)
   NODE_SOCKET_API(float, volume_step_rate)
@@ -62,37 +66,26 @@ class Integrator : public Node {
   static const int MAX_SAMPLES = (1 << 24);
 
   NODE_SOCKET_API(int, aa_samples)
-  NODE_SOCKET_API(int, diffuse_samples)
-  NODE_SOCKET_API(int, glossy_samples)
-  NODE_SOCKET_API(int, transmission_samples)
-  NODE_SOCKET_API(int, ao_samples)
-  NODE_SOCKET_API(int, mesh_light_samples)
-  NODE_SOCKET_API(int, subsurface_samples)
-  NODE_SOCKET_API(int, volume_samples)
   NODE_SOCKET_API(int, start_sample)
 
-  NODE_SOCKET_API(bool, sample_all_lights_direct)
-  NODE_SOCKET_API(bool, sample_all_lights_indirect)
   NODE_SOCKET_API(float, light_sampling_threshold)
 
+  NODE_SOCKET_API(bool, use_adaptive_sampling)
   NODE_SOCKET_API(int, adaptive_min_samples)
   NODE_SOCKET_API(float, adaptive_threshold)
 
-  enum Method {
-    BRANCHED_PATH = 0,
-    PATH = 1,
-
-    NUM_METHODS,
-  };
-
-  NODE_SOCKET_API(Method, method)
-
   NODE_SOCKET_API(SamplingPattern, sampling_pattern)
 
+  NODE_SOCKET_API(bool, use_denoise);
+  NODE_SOCKET_API(DenoiserType, denoiser_type);
+  NODE_SOCKET_API(int, denoise_start_sample);
+  NODE_SOCKET_API(bool, use_denoise_pass_albedo);
+  NODE_SOCKET_API(bool, use_denoise_pass_normal);
+  NODE_SOCKET_API(DenoiserPrefilter, denoiser_prefilter);
+
   enum : uint32_t {
     AO_PASS_MODIFIED = (1 << 0),
-    BACKGROUND_AO_MODIFIED = (1 << 1),
-    LIGHT_SAMPLES_MODIFIED = (1 << 2),
+    OBJECT_MANAGER = (1 << 1),
 
     /* tag everything in the manager for an update */
     UPDATE_ALL = ~0u,
@@ -107,6 +100,9 @@ class Integrator : public Node {
   void device_free(Device *device, DeviceScene *dscene, bool force_free = false);
 
   void tag_update(Scene *scene, uint32_t flag);
+
+  AdaptiveSampling get_adaptive_sampling() const;
+  DenoiseParams get_denoise_params() const;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp
index fc47b0e8f0a..e31f8abd446 100644
--- a/intern/cycles/render/jitter.cpp
+++ b/intern/cycles/render/jitter.cpp
@@ -242,12 +242,6 @@ class PMJ02_Generator : public PMJ_Generator {
 
 static void shuffle(float2 points[], int size, int rng_seed)
 {
-  /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
-  for (int i = 0; i < size; ++i) {
-    points[i].x += 1.0f;
-    points[i].y += 1.0f;
-  }
-
   if (rng_seed == 0) {
     return;
   }
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 15aa4e047b5..ae1150fc07b 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#include "render/light.h"
 #include "device/device.h"
+
 #include "render/background.h"
 #include "render/film.h"
 #include "render/graph.h"
 #include "render/integrator.h"
+#include "render/light.h"
 #include "render/mesh.h"
 #include "render/nodes.h"
 #include "render/object.h"
@@ -27,6 +28,8 @@
 #include "render/shader.h"
 #include "render/stats.h"
 
+#include "integrator/shader_eval.h"
+
 #include "util/util_foreach.h"
 #include "util/util_hash.h"
 #include "util/util_logging.h"
@@ -43,63 +46,49 @@ static void shade_background_pixels(Device *device,
                                     vector<float3> &pixels,
                                     Progress &progress)
 {
-  /* create input */
-  device_vector<uint4> d_input(device, "background_input", MEM_READ_ONLY);
-  device_vector<float4> d_output(device, "background_output", MEM_READ_WRITE);
-
-  uint4 *d_input_data = d_input.alloc(width * height);
-
-  for (int y = 0; y < height; y++) {
-    for (int x = 0; x < width; x++) {
-      float u = (x + 0.5f) / width;
-      float v = (y + 0.5f) / height;
-
-      uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0);
-      d_input_data[x + y * width] = in;
-    }
-  }
-
-  /* compute on device */
-  d_output.alloc(width * height);
-  d_output.zero_to_device();
-  d_input.copy_to_device();
-
+  /* Needs to be up to data for attribute access. */
   device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
 
-  DeviceTask main_task(DeviceTask::SHADER);
-  main_task.shader_input = d_input.device_pointer;
-  main_task.shader_output = d_output.device_pointer;
-  main_task.shader_eval_type = SHADER_EVAL_BACKGROUND;
-  main_task.shader_x = 0;
-  main_task.shader_w = width * height;
-  main_task.num_samples = 1;
-  main_task.get_cancel = function_bind(&Progress::get_cancel, &progress);
-
-  /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */
-  list<DeviceTask> split_tasks;
-  main_task.split(split_tasks, 1, 128 * 128);
-
-  foreach (DeviceTask &task, split_tasks) {
-    device->task_add(task);
-    device->task_wait();
-    d_output.copy_from_device(task.shader_x, 1, task.shader_w);
-  }
-
-  d_input.free();
-
-  float4 *d_output_data = d_output.data();
-
-  pixels.resize(width * height);
-
-  for (int y = 0; y < height; y++) {
-    for (int x = 0; x < width; x++) {
-      pixels[y * width + x].x = d_output_data[y * width + x].x;
-      pixels[y * width + x].y = d_output_data[y * width + x].y;
-      pixels[y * width + x].z = d_output_data[y * width + x].z;
-    }
-  }
+  const int size = width * height;
+  pixels.resize(size);
+
+  /* Evaluate shader on device. */
+  ShaderEval shader_eval(device, progress);
+  shader_eval.eval(
+      SHADER_EVAL_BACKGROUND,
+      size,
+      [&](device_vector<KernelShaderEvalInput> &d_input) {
+        /* Fill coordinates for shading. */
+        KernelShaderEvalInput *d_input_data = d_input.data();
+
+        for (int y = 0; y < height; y++) {
+          for (int x = 0; x < width; x++) {
+            float u = (x + 0.5f) / width;
+            float v = (y + 0.5f) / height;
+
+            KernelShaderEvalInput in;
+            in.object = OBJECT_NONE;
+            in.prim = PRIM_NONE;
+            in.u = u;
+            in.v = v;
+            d_input_data[x + y * width] = in;
+          }
+        }
 
-  d_output.free();
+        return size;
+      },
+      [&](device_vector<float4> &d_output) {
+        /* Copy output to pixel buffer. */
+        float4 *d_output_data = d_output.data();
+
+        for (int y = 0; y < height; y++) {
+          for (int x = 0; x < width; x++) {
+            pixels[y * width + x].x = d_output_data[y * width + x].x;
+            pixels[y * width + x].y = d_output_data[y * width + x].y;
+            pixels[y * width + x].z = d_output_data[y * width + x].z;
+          }
+        }
+      });
 }
 
 /* Light */
@@ -140,15 +129,16 @@ NODE_DEFINE(Light)
 
   SOCKET_BOOLEAN(cast_shadow, "Cast Shadow", true);
   SOCKET_BOOLEAN(use_mis, "Use Mis", false);
+  SOCKET_BOOLEAN(use_camera, "Use Camera", true);
   SOCKET_BOOLEAN(use_diffuse, "Use Diffuse", true);
   SOCKET_BOOLEAN(use_glossy, "Use Glossy", true);
   SOCKET_BOOLEAN(use_transmission, "Use Transmission", true);
   SOCKET_BOOLEAN(use_scatter, "Use Scatter", true);
 
-  SOCKET_INT(samples, "Samples", 1);
   SOCKET_INT(max_bounces, "Max Bounces", 1024);
   SOCKET_UINT(random_id, "Random ID", 0);
 
+  SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", true);
   SOCKET_BOOLEAN(is_portal, "Is Portal", false);
   SOCKET_BOOLEAN(is_enabled, "Is Enabled", true);
 
@@ -166,10 +156,6 @@ void Light::tag_update(Scene *scene)
 {
   if (is_modified()) {
     scene->light_manager->tag_update(scene, LightManager::LIGHT_MODIFIED);
-
-    if (samples_is_modified()) {
-      scene->integrator->tag_update(scene, Integrator::LIGHT_SAMPLES_MODIFIED);
-    }
   }
 }
 
@@ -193,7 +179,6 @@ LightManager::LightManager()
 {
   update_flags = UPDATE_ALL;
   need_update_background = true;
-  use_light_visibility = false;
   last_background_enabled = false;
   last_background_resolution = 0;
 }
@@ -357,21 +342,23 @@ void LightManager::device_update_distribution(Device *,
     int object_id = j;
     int shader_flag = 0;
 
+    if (!(object->get_visibility() & PATH_RAY_CAMERA)) {
+      shader_flag |= SHADER_EXCLUDE_CAMERA;
+    }
     if (!(object->get_visibility() & PATH_RAY_DIFFUSE)) {
       shader_flag |= SHADER_EXCLUDE_DIFFUSE;
-      use_light_visibility = true;
     }
     if (!(object->get_visibility() & PATH_RAY_GLOSSY)) {
       shader_flag |= SHADER_EXCLUDE_GLOSSY;
-      use_light_visibility = true;
     }
     if (!(object->get_visibility() & PATH_RAY_TRANSMIT)) {
       shader_flag |= SHADER_EXCLUDE_TRANSMIT;
-      use_light_visibility = true;
     }
     if (!(object->get_visibility() & PATH_RAY_VOLUME_SCATTER)) {
       shader_flag |= SHADER_EXCLUDE_SCATTER;
-      use_light_visibility = true;
+    }
+    if (!(object->get_is_shadow_catcher())) {
+      shader_flag |= SHADER_EXCLUDE_SHADOW_CATCHER;
     }
 
     size_t mesh_num_triangles = mesh->num_triangles();
@@ -496,10 +483,10 @@ void LightManager::device_update_distribution(Device *,
     kfilm->pass_shadow_scale = 1.0f;
 
     if (kintegrator->pdf_triangles != 0.0f)
-      kfilm->pass_shadow_scale *= 0.5f;
+      kfilm->pass_shadow_scale /= 0.5f;
 
     if (num_background_lights < num_lights)
-      kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights) / (float)num_lights;
+      kfilm->pass_shadow_scale /= (float)(num_lights - num_background_lights) / (float)num_lights;
 
     /* CDF */
     dscene->light_distribution.copy_to_device();
@@ -766,25 +753,26 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
     if (!light->cast_shadow)
       shader_id &= ~SHADER_CAST_SHADOW;
 
+    if (!light->use_camera) {
+      shader_id |= SHADER_EXCLUDE_CAMERA;
+    }
     if (!light->use_diffuse) {
       shader_id |= SHADER_EXCLUDE_DIFFUSE;
-      use_light_visibility = true;
     }
     if (!light->use_glossy) {
       shader_id |= SHADER_EXCLUDE_GLOSSY;
-      use_light_visibility = true;
     }
     if (!light->use_transmission) {
       shader_id |= SHADER_EXCLUDE_TRANSMIT;
-      use_light_visibility = true;
     }
     if (!light->use_scatter) {
       shader_id |= SHADER_EXCLUDE_SCATTER;
-      use_light_visibility = true;
+    }
+    if (!light->is_shadow_catcher) {
+      shader_id |= SHADER_EXCLUDE_SHADOW_CATCHER;
     }
 
     klights[light_index].type = light->light_type;
-    klights[light_index].samples = light->samples;
     klights[light_index].strength[0] = light->strength.x;
     klights[light_index].strength[1] = light->strength.y;
     klights[light_index].strength[2] = light->strength.z;
@@ -836,19 +824,15 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
 
       if (!(visibility & PATH_RAY_DIFFUSE)) {
         shader_id |= SHADER_EXCLUDE_DIFFUSE;
-        use_light_visibility = true;
       }
       if (!(visibility & PATH_RAY_GLOSSY)) {
         shader_id |= SHADER_EXCLUDE_GLOSSY;
-        use_light_visibility = true;
       }
       if (!(visibility & PATH_RAY_TRANSMIT)) {
         shader_id |= SHADER_EXCLUDE_TRANSMIT;
-        use_light_visibility = true;
       }
       if (!(visibility & PATH_RAY_VOLUME_SCATTER)) {
         shader_id |= SHADER_EXCLUDE_SCATTER;
-        use_light_visibility = true;
       }
     }
     else if (light->light_type == LIGHT_AREA) {
@@ -998,8 +982,6 @@ void LightManager::device_update(Device *device,
 
   device_free(device, dscene, need_update_background);
 
-  use_light_visibility = false;
-
   device_update_points(device, dscene, scene);
   if (progress.get_cancel())
     return;
@@ -1018,8 +1000,6 @@ void LightManager::device_update(Device *device,
   if (progress.get_cancel())
     return;
 
-  scene->film->set_use_light_visibility(use_light_visibility);
-
   update_flags = UPDATE_NONE;
   need_update_background = false;
 }
diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h
index fbd709125ff..7f86237c8b3 100644
--- a/intern/cycles/render/light.h
+++ b/intern/cycles/render/light.h
@@ -69,16 +69,17 @@ class Light : public Node {
 
   NODE_SOCKET_API(bool, cast_shadow)
   NODE_SOCKET_API(bool, use_mis)
+  NODE_SOCKET_API(bool, use_camera)
   NODE_SOCKET_API(bool, use_diffuse)
   NODE_SOCKET_API(bool, use_glossy)
   NODE_SOCKET_API(bool, use_transmission)
   NODE_SOCKET_API(bool, use_scatter)
 
+  NODE_SOCKET_API(bool, is_shadow_catcher)
   NODE_SOCKET_API(bool, is_portal)
   NODE_SOCKET_API(bool, is_enabled)
 
   NODE_SOCKET_API(Shader *, shader)
-  NODE_SOCKET_API(int, samples)
   NODE_SOCKET_API(int, max_bounces)
   NODE_SOCKET_API(uint, random_id)
 
@@ -108,8 +109,6 @@ class LightManager {
     UPDATE_NONE = 0u,
   };
 
-  bool use_light_visibility;
-
   /* Need to update background (including multiple importance map) */
   bool need_update_background;
 
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index b39d81023d9..c00c4c24211 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -16,6 +16,8 @@
 
 #include "device/device.h"
 
+#include "integrator/shader_eval.h"
+
 #include "render/mesh.h"
 #include "render/object.h"
 #include "render/scene.h"
@@ -43,40 +45,28 @@ static float3 compute_face_normal(const Mesh::Triangle &t, float3 *verts)
   return norm / normlen;
 }
 
-bool GeometryManager::displace(
-    Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress)
+/* Fill in coordinates for mesh displacement shader evaluation on device. */
+static int fill_shader_input(const Scene *scene,
+                             const Mesh *mesh,
+                             const int object_index,
+                             device_vector<KernelShaderEvalInput> &d_input)
 {
-  /* verify if we have a displacement shader */
-  if (!mesh->has_true_displacement()) {
-    return false;
-  }
-
-  string msg = string_printf("Computing Displacement %s", mesh->name.c_str());
-  progress.set_status("Updating Mesh", msg);
+  int d_input_size = 0;
+  KernelShaderEvalInput *d_input_data = d_input.data();
 
-  /* find object index. todo: is arbitrary */
-  size_t object_index = OBJECT_NONE;
+  const array<int> &mesh_shaders = mesh->get_shader();
+  const array<Node *> &mesh_used_shaders = mesh->get_used_shaders();
+  const array<float3> &mesh_verts = mesh->get_verts();
 
-  for (size_t i = 0; i < scene->objects.size(); i++) {
-    if (scene->objects[i]->get_geometry() == mesh) {
-      object_index = i;
-      break;
-    }
-  }
-
-  /* setup input for device task */
-  const size_t num_verts = mesh->verts.size();
+  const int num_verts = mesh_verts.size();
   vector<bool> done(num_verts, false);
-  device_vector<uint4> d_input(device, "displace_input", MEM_READ_ONLY);
-  uint4 *d_input_data = d_input.alloc(num_verts);
-  size_t d_input_size = 0;
 
-  size_t num_triangles = mesh->num_triangles();
-  for (size_t i = 0; i < num_triangles; i++) {
+  int num_triangles = mesh->num_triangles();
+  for (int i = 0; i < num_triangles; i++) {
     Mesh::Triangle t = mesh->get_triangle(i);
-    int shader_index = mesh->shader[i];
-    Shader *shader = (shader_index < mesh->used_shaders.size()) ?
-                         static_cast<Shader *>(mesh->used_shaders[shader_index]) :
+    int shader_index = mesh_shaders[i];
+    Shader *shader = (shader_index < mesh_used_shaders.size()) ?
+                         static_cast<Shader *>(mesh_used_shaders[shader_index]) :
                          scene->default_surface;
 
     if (!shader->has_displacement || shader->get_displacement_method() == DISPLACE_BUMP) {
@@ -110,57 +100,41 @@ bool GeometryManager::displace(
       }
 
       /* back */
-      uint4 in = make_uint4(object, prim, __float_as_int(u), __float_as_int(v));
+      KernelShaderEvalInput in;
+      in.object = object;
+      in.prim = prim;
+      in.u = u;
+      in.v = v;
       d_input_data[d_input_size++] = in;
     }
   }
 
-  if (d_input_size == 0)
-    return false;
-
-  /* run device task */
-  device_vector<float4> d_output(device, "displace_output", MEM_READ_WRITE);
-  d_output.alloc(d_input_size);
-  d_output.zero_to_device();
-  d_input.copy_to_device();
-
-  /* needs to be up to data for attribute access */
-  device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
-
-  DeviceTask task(DeviceTask::SHADER);
-  task.shader_input = d_input.device_pointer;
-  task.shader_output = d_output.device_pointer;
-  task.shader_eval_type = SHADER_EVAL_DISPLACE;
-  task.shader_x = 0;
-  task.shader_w = d_output.size();
-  task.num_samples = 1;
-  task.get_cancel = function_bind(&Progress::get_cancel, &progress);
-
-  device->task_add(task);
-  device->task_wait();
-
-  if (progress.get_cancel()) {
-    d_input.free();
-    d_output.free();
-    return false;
-  }
+  return d_input_size;
+}
 
-  d_output.copy_from_device(0, 1, d_output.size());
-  d_input.free();
+/* Read back mesh displacement shader output. */
+static void read_shader_output(const Scene *scene,
+                               Mesh *mesh,
+                               const device_vector<float4> &d_output)
+{
+  const array<int> &mesh_shaders = mesh->get_shader();
+  const array<Node *> &mesh_used_shaders = mesh->get_used_shaders();
+  array<float3> &mesh_verts = mesh->get_verts();
 
-  /* read result */
-  done.clear();
-  done.resize(num_verts, false);
-  int k = 0;
+  const int num_verts = mesh_verts.size();
+  const int num_motion_steps = mesh->get_motion_steps();
+  vector<bool> done(num_verts, false);
 
-  float4 *offset = d_output.data();
+  const float4 *d_output_data = d_output.data();
+  int d_output_index = 0;
 
   Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  for (size_t i = 0; i < num_triangles; i++) {
+  int num_triangles = mesh->num_triangles();
+  for (int i = 0; i < num_triangles; i++) {
     Mesh::Triangle t = mesh->get_triangle(i);
-    int shader_index = mesh->shader[i];
-    Shader *shader = (shader_index < mesh->used_shaders.size()) ?
-                         static_cast<Shader *>(mesh->used_shaders[shader_index]) :
+    int shader_index = mesh_shaders[i];
+    Shader *shader = (shader_index < mesh_used_shaders.size()) ?
+                         static_cast<Shader *>(mesh_used_shaders[shader_index]) :
                          scene->default_surface;
 
     if (!shader->has_displacement || shader->get_displacement_method() == DISPLACE_BUMP) {
@@ -170,12 +144,12 @@ bool GeometryManager::displace(
     for (int j = 0; j < 3; j++) {
       if (!done[t.v[j]]) {
         done[t.v[j]] = true;
-        float3 off = float4_to_float3(offset[k++]);
+        float3 off = float4_to_float3(d_output_data[d_output_index++]);
         /* Avoid illegal vertex coordinates. */
         off = ensure_finite3(off);
-        mesh->verts[t.v[j]] += off;
+        mesh_verts[t.v[j]] += off;
         if (attr_mP != NULL) {
-          for (int step = 0; step < mesh->motion_steps - 1; step++) {
+          for (int step = 0; step < num_motion_steps - 1; step++) {
             float3 *mP = attr_mP->data_float3() + step * num_verts;
             mP[t.v[j]] += off;
           }
@@ -183,8 +157,47 @@ bool GeometryManager::displace(
       }
     }
   }
+}
 
-  d_output.free();
+bool GeometryManager::displace(
+    Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress)
+{
+  /* verify if we have a displacement shader */
+  if (!mesh->has_true_displacement()) {
+    return false;
+  }
+
+  const size_t num_verts = mesh->verts.size();
+  const size_t num_triangles = mesh->num_triangles();
+
+  if (num_triangles == 0) {
+    return false;
+  }
+
+  string msg = string_printf("Computing Displacement %s", mesh->name.c_str());
+  progress.set_status("Updating Mesh", msg);
+
+  /* find object index. todo: is arbitrary */
+  size_t object_index = OBJECT_NONE;
+
+  for (size_t i = 0; i < scene->objects.size(); i++) {
+    if (scene->objects[i]->get_geometry() == mesh) {
+      object_index = i;
+      break;
+    }
+  }
+
+  /* Needs to be up to data for attribute access. */
+  device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
+
+  /* Evaluate shader on device. */
+  ShaderEval shader_eval(device, progress);
+  if (!shader_eval.eval(SHADER_EVAL_DISPLACE,
+                        num_verts,
+                        function_bind(&fill_shader_input, scene, mesh, object_index, _1),
+                        function_bind(&read_shader_output, scene, mesh, _1))) {
+    return false;
+  }
 
   /* stitch */
   unordered_set<int> stitch_keys;
@@ -297,8 +310,7 @@ bool GeometryManager::displace(
     }
 
     /* normalize vertex normals */
-    done.clear();
-    done.resize(num_verts, false);
+    vector<bool> done(num_verts, false);
 
     for (size_t i = 0; i < num_triangles; i++) {
       if (tri_has_true_disp[i]) {
@@ -368,8 +380,7 @@ bool GeometryManager::displace(
         }
 
         /* normalize vertex normals */
-        done.clear();
-        done.resize(num_verts, false);
+        vector<bool> done(num_verts, false);
 
         for (size_t i = 0; i < num_triangles; i++) {
           if (tri_has_true_disp[i]) {
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 795166bcf4c..5303d55242e 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -2736,18 +2736,21 @@ NODE_DEFINE(PrincipledBsdfNode)
       distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID);
 
   static NodeEnum subsurface_method_enum;
-  subsurface_method_enum.insert("burley", CLOSURE_BSSRDF_PRINCIPLED_ID);
-  subsurface_method_enum.insert("random_walk", CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
+  subsurface_method_enum.insert("random_walk_fixed_radius",
+                                CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+  subsurface_method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID);
   SOCKET_ENUM(subsurface_method,
               "Subsurface Method",
               subsurface_method_enum,
-              CLOSURE_BSSRDF_PRINCIPLED_ID);
+              CLOSURE_BSSRDF_RANDOM_WALK_ID);
 
   SOCKET_IN_COLOR(base_color, "Base Color", make_float3(0.8f, 0.8f, 0.8f));
   SOCKET_IN_COLOR(subsurface_color, "Subsurface Color", make_float3(0.8f, 0.8f, 0.8f));
   SOCKET_IN_FLOAT(metallic, "Metallic", 0.0f);
   SOCKET_IN_FLOAT(subsurface, "Subsurface", 0.0f);
   SOCKET_IN_VECTOR(subsurface_radius, "Subsurface Radius", make_float3(0.1f, 0.1f, 0.1f));
+  SOCKET_IN_FLOAT(subsurface_ior, "Subsurface IOR", 1.4f);
+  SOCKET_IN_FLOAT(subsurface_anisotropy, "Subsurface Anisotropy", 0.0f);
   SOCKET_IN_FLOAT(specular, "Specular", 0.0f);
   SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f);
   SOCKET_IN_FLOAT(specular_tint, "Specular Tint", 0.0f);
@@ -2857,6 +2860,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler,
                                  ShaderInput *p_metallic,
                                  ShaderInput *p_subsurface,
                                  ShaderInput *p_subsurface_radius,
+                                 ShaderInput *p_subsurface_ior,
+                                 ShaderInput *p_subsurface_anisotropy,
                                  ShaderInput *p_specular,
                                  ShaderInput *p_roughness,
                                  ShaderInput *p_specular_tint,
@@ -2896,6 +2901,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler,
   int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness);
   int anisotropic_rotation_offset = compiler.stack_assign(p_anisotropic_rotation);
   int subsurface_radius_offset = compiler.stack_assign(p_subsurface_radius);
+  int subsurface_ior_offset = compiler.stack_assign(p_subsurface_ior);
+  int subsurface_anisotropy_offset = compiler.stack_assign(p_subsurface_anisotropy);
 
   compiler.add_node(NODE_CLOSURE_BSDF,
                     compiler.encode_uchar4(closure,
@@ -2929,8 +2936,10 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler,
       __float_as_int(bc_default.y),
       __float_as_int(bc_default.z));
 
-  compiler.add_node(
-      clearcoat_normal_offset, subsurface_radius_offset, SVM_STACK_INVALID, SVM_STACK_INVALID);
+  compiler.add_node(clearcoat_normal_offset,
+                    subsurface_radius_offset,
+                    subsurface_ior_offset,
+                    subsurface_anisotropy_offset);
 
   float3 ss_default = get_float3(subsurface_color_in->socket_type);
 
@@ -2953,6 +2962,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler)
           input("Metallic"),
           input("Subsurface"),
           input("Subsurface Radius"),
+          input("Subsurface IOR"),
+          input("Subsurface Anisotropy"),
           input("Specular"),
           input("Roughness"),
           input("Specular Tint"),
@@ -3048,16 +3059,16 @@ NODE_DEFINE(SubsurfaceScatteringNode)
   SOCKET_IN_NORMAL(normal, "Normal", zero_float3(), SocketType::LINK_NORMAL);
   SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL);
 
-  static NodeEnum falloff_enum;
-  falloff_enum.insert("cubic", CLOSURE_BSSRDF_CUBIC_ID);
-  falloff_enum.insert("gaussian", CLOSURE_BSSRDF_GAUSSIAN_ID);
-  falloff_enum.insert("burley", CLOSURE_BSSRDF_BURLEY_ID);
-  falloff_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID);
-  SOCKET_ENUM(falloff, "Falloff", falloff_enum, CLOSURE_BSSRDF_BURLEY_ID);
+  static NodeEnum method_enum;
+  method_enum.insert("random_walk_fixed_radius", CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+  method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID);
+  SOCKET_ENUM(method, "Method", method_enum, CLOSURE_BSSRDF_RANDOM_WALK_ID);
+
   SOCKET_IN_FLOAT(scale, "Scale", 0.01f);
   SOCKET_IN_VECTOR(radius, "Radius", make_float3(0.1f, 0.1f, 0.1f));
-  SOCKET_IN_FLOAT(sharpness, "Sharpness", 0.0f);
-  SOCKET_IN_FLOAT(texture_blur, "Texture Blur", 1.0f);
+
+  SOCKET_IN_FLOAT(subsurface_ior, "IOR", 1.4f);
+  SOCKET_IN_FLOAT(subsurface_anisotropy, "Anisotropy", 0.0f);
 
   SOCKET_OUT_CLOSURE(BSSRDF, "BSSRDF");
 
@@ -3066,20 +3077,19 @@ NODE_DEFINE(SubsurfaceScatteringNode)
 
 SubsurfaceScatteringNode::SubsurfaceScatteringNode() : BsdfNode(get_node_type())
 {
-  closure = falloff;
+  closure = method;
 }
 
 void SubsurfaceScatteringNode::compile(SVMCompiler &compiler)
 {
-  closure = falloff;
-  BsdfNode::compile(
-      compiler, input("Scale"), input("Texture Blur"), input("Radius"), input("Sharpness"));
+  closure = method;
+  BsdfNode::compile(compiler, input("Scale"), input("IOR"), input("Radius"), input("Anisotropy"));
 }
 
 void SubsurfaceScatteringNode::compile(OSLCompiler &compiler)
 {
-  closure = falloff;
-  compiler.parameter(this, "falloff");
+  closure = method;
+  compiler.parameter(this, "method");
   compiler.add(this, "node_subsurface_scattering");
 }
 
@@ -3786,20 +3796,6 @@ void GeometryNode::compile(OSLCompiler &compiler)
   compiler.add(this, "node_geometry");
 }
 
-int GeometryNode::get_group()
-{
-  ShaderOutput *out;
-  int result = ShaderNode::get_group();
-
-  /* Backfacing uses NODE_LIGHT_PATH */
-  out = output("Backfacing");
-  if (!out->links.empty()) {
-    result = max(result, NODE_GROUP_LEVEL_1);
-  }
-
-  return result;
-}
-
 /* TextureCoordinate */
 
 NODE_DEFINE(TextureCoordinateNode)
@@ -5926,33 +5922,33 @@ NODE_DEFINE(OutputAOVNode)
 OutputAOVNode::OutputAOVNode() : ShaderNode(get_node_type())
 {
   special_type = SHADER_SPECIAL_TYPE_OUTPUT_AOV;
-  slot = -1;
+  offset = -1;
 }
 
 void OutputAOVNode::simplify_settings(Scene *scene)
 {
-  slot = scene->film->get_aov_offset(scene, name.string(), is_color);
-  if (slot == -1) {
-    slot = scene->film->get_aov_offset(scene, name.string(), is_color);
+  offset = scene->film->get_aov_offset(scene, name.string(), is_color);
+  if (offset == -1) {
+    offset = scene->film->get_aov_offset(scene, name.string(), is_color);
   }
 
-  if (slot == -1 || is_color) {
+  if (offset == -1 || is_color) {
     input("Value")->disconnect();
   }
-  if (slot == -1 || !is_color) {
+  if (offset == -1 || !is_color) {
     input("Color")->disconnect();
   }
 }
 
 void OutputAOVNode::compile(SVMCompiler &compiler)
 {
-  assert(slot >= 0);
+  assert(offset >= 0);
 
   if (is_color) {
-    compiler.add_node(NODE_AOV_COLOR, compiler.stack_assign(input("Color")), slot);
+    compiler.add_node(NODE_AOV_COLOR, compiler.stack_assign(input("Color")), offset);
   }
   else {
-    compiler.add_node(NODE_AOV_VALUE, compiler.stack_assign(input("Value")), slot);
+    compiler.add_node(NODE_AOV_VALUE, compiler.stack_assign(input("Value")), offset);
   }
 }
 
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 3013e9b1866..22bdb06b059 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -143,10 +143,6 @@ class EnvironmentTextureNode : public ImageSlotTextureNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 
   virtual bool equals(const ShaderNode &other)
   {
@@ -170,11 +166,6 @@ class SkyTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(SkyTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   NODE_SOCKET_API(NodeSkyType, sky_type)
   NODE_SOCKET_API(float3, sun_direction)
   NODE_SOCKET_API(float, turbidity)
@@ -224,18 +215,13 @@ class OutputAOVNode : public ShaderNode {
 
   NODE_SOCKET_API(ustring, name)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_4;
-  }
-
   /* Don't allow output node de-duplication. */
   virtual bool equals(const ShaderNode & /*other*/)
   {
     return false;
   }
 
-  int slot;
+  int offset;
   bool is_color;
 };
 
@@ -243,11 +229,6 @@ class GradientTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(GradientTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   NODE_SOCKET_API(NodeGradientType, gradient_type)
   NODE_SOCKET_API(float3, vector)
 };
@@ -269,19 +250,14 @@ class VoronoiTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(VoronoiTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   virtual int get_feature()
   {
     int result = ShaderNode::get_feature();
     if (dimensions == 4) {
-      result |= NODE_FEATURE_VORONOI_EXTRA;
+      result |= KERNEL_FEATURE_NODE_VORONOI_EXTRA;
     }
     else if (dimensions >= 2 && feature == NODE_VORONOI_SMOOTH_F1) {
-      result |= NODE_FEATURE_VORONOI_EXTRA;
+      result |= KERNEL_FEATURE_NODE_VORONOI_EXTRA;
     }
     return result;
   }
@@ -301,11 +277,6 @@ class MusgraveTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(MusgraveTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   NODE_SOCKET_API(int, dimensions)
   NODE_SOCKET_API(NodeMusgraveType, musgrave_type)
   NODE_SOCKET_API(float, w)
@@ -322,11 +293,6 @@ class WaveTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(WaveTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   NODE_SOCKET_API(NodeWaveType, wave_type)
   NODE_SOCKET_API(NodeWaveBandsDirection, bands_direction)
   NODE_SOCKET_API(NodeWaveRingsDirection, rings_direction)
@@ -345,11 +311,6 @@ class MagicTextureNode : public TextureNode {
  public:
   SHADER_NODE_CLASS(MagicTextureNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
-
   NODE_SOCKET_API(int, depth)
   NODE_SOCKET_API(float3, vector)
   NODE_SOCKET_API(float, scale)
@@ -364,11 +325,6 @@ class CheckerTextureNode : public TextureNode {
   NODE_SOCKET_API(float3, color1)
   NODE_SOCKET_API(float3, color2)
   NODE_SOCKET_API(float, scale)
-
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 };
 
 class BrickTextureNode : public TextureNode {
@@ -390,20 +346,11 @@ class BrickTextureNode : public TextureNode {
   NODE_SOCKET_API(float, brick_width)
   NODE_SOCKET_API(float, row_height)
   NODE_SOCKET_API(float3, vector)
-
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 };
 
 class PointDensityTextureNode : public ShaderNode {
  public:
   SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_4;
-  }
 
   ~PointDensityTextureNode();
   ShaderNode *clone(ShaderGraph *graph) const;
@@ -443,10 +390,6 @@ class IESLightNode : public TextureNode {
 
   ~IESLightNode();
   ShaderNode *clone(ShaderGraph *graph) const;
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 
   NODE_SOCKET_API(ustring, filename)
   NODE_SOCKET_API(ustring, ies)
@@ -464,10 +407,6 @@ class IESLightNode : public TextureNode {
 class WhiteNoiseTextureNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(WhiteNoiseTextureNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 
   NODE_SOCKET_API(int, dimensions)
   NODE_SOCKET_API(float3, vector)
@@ -477,10 +416,6 @@ class WhiteNoiseTextureNode : public ShaderNode {
 class MappingNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(MappingNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
   void constant_fold(const ConstantFolder &folder);
 
   NODE_SOCKET_API(float3, vector)
@@ -546,6 +481,11 @@ class BsdfBaseNode : public ShaderNode {
     return false;
   }
 
+  virtual int get_feature()
+  {
+    return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_BSDF;
+  }
+
  protected:
   ClosureType closure;
 };
@@ -606,6 +546,8 @@ class PrincipledBsdfNode : public BsdfBaseNode {
                ShaderInput *metallic,
                ShaderInput *subsurface,
                ShaderInput *subsurface_radius,
+               ShaderInput *subsurface_ior,
+               ShaderInput *subsurface_anisotropy,
                ShaderInput *specular,
                ShaderInput *roughness,
                ShaderInput *specular_tint,
@@ -622,6 +564,8 @@ class PrincipledBsdfNode : public BsdfBaseNode {
   NODE_SOCKET_API(float3, base_color)
   NODE_SOCKET_API(float3, subsurface_color)
   NODE_SOCKET_API(float3, subsurface_radius)
+  NODE_SOCKET_API(float, subsurface_ior)
+  NODE_SOCKET_API(float, subsurface_anisotropy)
   NODE_SOCKET_API(float, metallic)
   NODE_SOCKET_API(float, subsurface)
   NODE_SOCKET_API(float, specular)
@@ -758,14 +702,14 @@ class SubsurfaceScatteringNode : public BsdfNode {
   bool has_bssrdf_bump();
   ClosureType get_closure_type()
   {
-    return falloff;
+    return method;
   }
 
   NODE_SOCKET_API(float, scale)
   NODE_SOCKET_API(float3, radius)
-  NODE_SOCKET_API(float, sharpness)
-  NODE_SOCKET_API(float, texture_blur)
-  NODE_SOCKET_API(ClosureType, falloff)
+  NODE_SOCKET_API(float, subsurface_ior)
+  NODE_SOCKET_API(float, subsurface_anisotropy)
+  NODE_SOCKET_API(ClosureType, method)
 };
 
 class EmissionNode : public ShaderNode {
@@ -782,6 +726,11 @@ class EmissionNode : public ShaderNode {
     return true;
   }
 
+  virtual int get_feature()
+  {
+    return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_EMISSION;
+  }
+
   NODE_SOCKET_API(float3, color)
   NODE_SOCKET_API(float, strength)
   NODE_SOCKET_API(float, surface_mix_weight)
@@ -792,6 +741,11 @@ class BackgroundNode : public ShaderNode {
   SHADER_NODE_CLASS(BackgroundNode)
   void constant_fold(const ConstantFolder &folder);
 
+  virtual int get_feature()
+  {
+    return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_EMISSION;
+  }
+
   NODE_SOCKET_API(float3, color)
   NODE_SOCKET_API(float, strength)
   NODE_SOCKET_API(float, surface_mix_weight)
@@ -800,10 +754,6 @@ class BackgroundNode : public ShaderNode {
 class HoldoutNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(HoldoutNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
   virtual ClosureType get_closure_type()
   {
     return CLOSURE_HOLDOUT_ID;
@@ -821,13 +771,9 @@ class AmbientOcclusionNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
-  virtual bool has_raytrace()
+  virtual int get_feature()
   {
-    return true;
+    return KERNEL_FEATURE_NODE_RAYTRACE;
   }
 
   NODE_SOCKET_API(float3, color)
@@ -845,13 +791,9 @@ class VolumeNode : public ShaderNode {
   SHADER_NODE_BASE_CLASS(VolumeNode)
 
   void compile(SVMCompiler &compiler, ShaderInput *param1, ShaderInput *param2);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
   virtual int get_feature()
   {
-    return ShaderNode::get_feature() | NODE_FEATURE_VOLUME;
+    return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_VOLUME;
   }
   virtual ClosureType get_closure_type()
   {
@@ -1013,10 +955,6 @@ class UVMapNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API(ustring, attribute)
   NODE_SOCKET_API(bool, from_dupli)
@@ -1025,10 +963,6 @@ class UVMapNode : public ShaderNode {
 class LightPathNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(LightPathNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 };
 
 class LightFalloffNode : public ShaderNode {
@@ -1038,10 +972,6 @@ class LightFalloffNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 
   NODE_SOCKET_API(float, strength)
   NODE_SOCKET_API(float, smooth)
@@ -1050,10 +980,6 @@ class LightFalloffNode : public ShaderNode {
 class ObjectInfoNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(ObjectInfoNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 };
 
 class ParticleInfoNode : public ShaderNode {
@@ -1064,10 +990,6 @@ class ParticleInfoNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 };
 
 class HairInfoNode : public ShaderNode {
@@ -1083,13 +1005,9 @@ class HairInfoNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
   virtual int get_feature()
   {
-    return ShaderNode::get_feature() | NODE_FEATURE_HAIR;
+    return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_HAIR;
   }
 };
 
@@ -1168,10 +1086,6 @@ class InvertNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(InvertNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, fac)
   NODE_SOCKET_API(float3, color)
@@ -1182,11 +1096,6 @@ class MixNode : public ShaderNode {
   SHADER_NODE_CLASS(MixNode)
   void constant_fold(const ConstantFolder &folder);
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
-
   NODE_SOCKET_API(NodeMix, mix_type)
   NODE_SOCKET_API(bool, use_clamp)
   NODE_SOCKET_API(float3, color1)
@@ -1198,10 +1107,6 @@ class CombineRGBNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(CombineRGBNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, r)
   NODE_SOCKET_API(float, g)
@@ -1212,10 +1117,6 @@ class CombineHSVNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(CombineHSVNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, h)
   NODE_SOCKET_API(float, s)
@@ -1226,10 +1127,6 @@ class CombineXYZNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(CombineXYZNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, x)
   NODE_SOCKET_API(float, y)
@@ -1240,10 +1137,6 @@ class GammaNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(GammaNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API(float3, color)
   NODE_SOCKET_API(float, gamma)
@@ -1253,10 +1146,6 @@ class BrightContrastNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(BrightContrastNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API(float3, color)
   NODE_SOCKET_API(float, bright)
@@ -1267,10 +1156,6 @@ class SeparateRGBNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(SeparateRGBNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float3, color)
 };
@@ -1279,10 +1164,6 @@ class SeparateHSVNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(SeparateHSVNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float3, color)
 };
@@ -1291,10 +1172,6 @@ class SeparateXYZNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(SeparateXYZNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float3, vector)
 };
@@ -1333,10 +1210,6 @@ class CameraNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 };
 
 class FresnelNode : public ShaderNode {
@@ -1346,10 +1219,6 @@ class FresnelNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API(float3, normal)
   NODE_SOCKET_API(float, IOR)
@@ -1362,10 +1231,6 @@ class LayerWeightNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API(float3, normal)
   NODE_SOCKET_API(float, blend)
@@ -1378,10 +1243,6 @@ class WireframeNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, size)
   NODE_SOCKET_API(bool, use_pixel_size)
@@ -1390,10 +1251,6 @@ class WireframeNode : public ShaderNode {
 class WavelengthNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(WavelengthNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, wavelength)
 };
@@ -1402,10 +1259,6 @@ class BlackbodyNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(BlackbodyNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(float, temperature)
 };
@@ -1413,10 +1266,6 @@ class BlackbodyNode : public ShaderNode {
 class MapRangeNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(MapRangeNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
   void expand(ShaderGraph *graph);
 
   NODE_SOCKET_API(float, value)
@@ -1433,10 +1282,6 @@ class ClampNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(ClampNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
   NODE_SOCKET_API(float, value)
   NODE_SOCKET_API(float, min)
   NODE_SOCKET_API(float, max)
@@ -1446,10 +1291,6 @@ class ClampNode : public ShaderNode {
 class MathNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(MathNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
   void expand(ShaderGraph *graph);
   void constant_fold(const ConstantFolder &folder);
 
@@ -1463,10 +1304,6 @@ class MathNode : public ShaderNode {
 class NormalNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(NormalNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_2;
-  }
 
   NODE_SOCKET_API(float3, direction)
   NODE_SOCKET_API(float3, normal)
@@ -1475,10 +1312,6 @@ class NormalNode : public ShaderNode {
 class VectorMathNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(VectorMathNode)
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
   void constant_fold(const ConstantFolder &folder);
 
   NODE_SOCKET_API(float3, vector1)
@@ -1492,10 +1325,6 @@ class VectorRotateNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(VectorRotateNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
   NODE_SOCKET_API(NodeVectorRotateType, rotate_type)
   NODE_SOCKET_API(bool, invert)
   NODE_SOCKET_API(float3, vector)
@@ -1509,11 +1338,6 @@ class VectorTransformNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(VectorTransformNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
-
   NODE_SOCKET_API(NodeVectorTransformType, transform_type)
   NODE_SOCKET_API(NodeVectorTransformConvertSpace, convert_from)
   NODE_SOCKET_API(NodeVectorTransformConvertSpace, convert_to)
@@ -1530,7 +1354,7 @@ class BumpNode : public ShaderNode {
   }
   virtual int get_feature()
   {
-    return NODE_FEATURE_BUMP;
+    return KERNEL_FEATURE_NODE_BUMP;
   }
 
   NODE_SOCKET_API(bool, invert)
@@ -1549,11 +1373,6 @@ class CurvesNode : public ShaderNode {
   explicit CurvesNode(const NodeType *node_type);
   SHADER_NODE_BASE_CLASS(CurvesNode)
 
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
-
   NODE_SOCKET_API_ARRAY(array<float3>, curves)
   NODE_SOCKET_API(float, min_x)
   NODE_SOCKET_API(float, max_x)
@@ -1583,10 +1402,6 @@ class RGBRampNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(RGBRampNode)
   void constant_fold(const ConstantFolder &folder);
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_1;
-  }
 
   NODE_SOCKET_API_ARRAY(array<float3>, ramp)
   NODE_SOCKET_API_ARRAY(array<float>, ramp_alpha)
@@ -1656,10 +1471,6 @@ class NormalMapNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(NodeNormalMapSpace, space)
   NODE_SOCKET_API(ustring, attribute)
@@ -1680,10 +1491,6 @@ class TangentNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
 
   NODE_SOCKET_API(NodeTangentDirectionType, direction_type)
   NODE_SOCKET_API(NodeTangentAxis, axis)
@@ -1698,13 +1505,9 @@ class BevelNode : public ShaderNode {
   {
     return true;
   }
-  virtual int get_group()
-  {
-    return NODE_GROUP_LEVEL_3;
-  }
-  virtual bool has_raytrace()
+  virtual int get_feature()
   {
-    return true;
+    return KERNEL_FEATURE_NODE_RAYTRACE;
   }
 
   NODE_SOCKET_API(float, radius)
@@ -1718,7 +1521,7 @@ class DisplacementNode : public ShaderNode {
   void constant_fold(const ConstantFolder &folder);
   virtual int get_feature()
   {
-    return NODE_FEATURE_BUMP;
+    return KERNEL_FEATURE_NODE_BUMP;
   }
 
   NODE_SOCKET_API(NodeNormalMapSpace, space)
@@ -1739,7 +1542,7 @@ class VectorDisplacementNode : public ShaderNode {
   void constant_fold(const ConstantFolder &folder);
   virtual int get_feature()
   {
-    return NODE_FEATURE_BUMP;
+    return KERNEL_FEATURE_NODE_BUMP;
   }
 
   NODE_SOCKET_API(NodeNormalMapSpace, space)
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index c88d94fe4c2..4637f8fe989 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -216,6 +216,10 @@ void Object::tag_update(Scene *scene)
     if (use_holdout_is_modified()) {
       flag |= ObjectManager::HOLDOUT_MODIFIED;
     }
+
+    if (is_shadow_catcher_is_modified()) {
+      scene->tag_shadow_catcher_modified();
+    }
   }
 
   if (geometry) {
@@ -273,14 +277,7 @@ bool Object::is_traceable() const
 
 uint Object::visibility_for_tracing() const
 {
-  uint trace_visibility = visibility;
-  if (is_shadow_catcher) {
-    trace_visibility &= ~PATH_RAY_SHADOW_NON_CATCHER;
-  }
-  else {
-    trace_visibility &= ~PATH_RAY_SHADOW_CATCHER;
-  }
-  return trace_visibility;
+  return SHADOW_CATCHER_OBJECT_VISIBILITY(is_shadow_catcher, visibility & PATH_RAY_ALL_VISIBILITY);
 }
 
 float Object::compute_volume_step_size() const
@@ -680,7 +677,7 @@ void ObjectManager::device_update(Device *device,
 
   /* prepare for static BVH building */
   /* todo: do before to support getting object level coords? */
-  if (scene->params.bvh_type == SceneParams::BVH_STATIC) {
+  if (scene->params.bvh_type == BVH_TYPE_STATIC) {
     scoped_callback_timer timer([scene](double time) {
       if (scene->update_stats) {
         scene->update_stats->object.times.add_entry(
@@ -932,6 +929,11 @@ void ObjectManager::tag_update(Scene *scene, uint32_t flag)
   }
 
   scene->light_manager->tag_update(scene, LightManager::OBJECT_MANAGER);
+
+  /* Integrator's shadow catcher settings depends on object visibility settings. */
+  if (flag & (OBJECT_ADDED | OBJECT_REMOVED | OBJECT_MODIFIED)) {
+    scene->integrator->tag_update(scene, Integrator::OBJECT_MANAGER);
+  }
 }
 
 bool ObjectManager::need_update() const
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 7dc79f48145..d28b222c10e 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -113,7 +113,7 @@ void OSLShaderManager::device_update_specific(Device *device,
   scene->image_manager->set_osl_texture_system((void *)ts);
 
   /* create shaders */
-  OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+  OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
   Shader *background_shader = scene->background->get_shader(scene);
 
   foreach (Shader *shader, scene->shaders) {
@@ -174,7 +174,7 @@ void OSLShaderManager::device_update_specific(Device *device,
 
 void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
 {
-  OSLGlobals *og = (OSLGlobals *)device->osl_memory();
+  OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
 
   device_free_common(device, dscene, scene);
 
@@ -257,25 +257,36 @@ void OSLShaderManager::shading_system_init()
 
     /* our own ray types */
     static const char *raytypes[] = {
-        "camera",      /* PATH_RAY_CAMERA */
-        "reflection",  /* PATH_RAY_REFLECT */
-        "refraction",  /* PATH_RAY_TRANSMIT */
-        "diffuse",     /* PATH_RAY_DIFFUSE */
-        "glossy",      /* PATH_RAY_GLOSSY */
-        "singular",    /* PATH_RAY_SINGULAR */
-        "transparent", /* PATH_RAY_TRANSPARENT */
-
-        "shadow", /* PATH_RAY_SHADOW_OPAQUE_NON_CATCHER */
-        "shadow", /* PATH_RAY_SHADOW_OPAQUE_CATCHER */
-        "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER */
-        "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_CATCHER */
-
-        "__unused__",  "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
-        "__unused__",
-
-        "__unused__",  "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
-        "__unused__",  "__unused__",       "__unused__", "__unused__",
-        "__unused__",  "__unused__",       "__unused__",
+        "camera",         /* PATH_RAY_CAMERA */
+        "reflection",     /* PATH_RAY_REFLECT */
+        "refraction",     /* PATH_RAY_TRANSMIT */
+        "diffuse",        /* PATH_RAY_DIFFUSE */
+        "glossy",         /* PATH_RAY_GLOSSY */
+        "singular",       /* PATH_RAY_SINGULAR */
+        "transparent",    /* PATH_RAY_TRANSPARENT */
+        "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
+
+        "shadow", /* PATH_RAY_SHADOW_OPAQUE */
+        "shadow", /* PATH_RAY_SHADOW_TRANSPARENT */
+
+        "__unused__", /* PATH_RAY_NODE_UNALIGNED */
+        "__unused__", /* PATH_RAY_MIS_SKIP */
+
+        "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
+
+        "__unused__", /* PATH_RAY_SINGLE_PASS_DONE */
+        "__unused__", /* PATH_RAY_TRANSPARENT_BACKGROUND */
+        "__unused__", /* PATH_RAY_TERMINATE_IMMEDIATE */
+        "__unused__", /* PATH_RAY_TERMINATE_AFTER_TRANSPARENT */
+        "__unused__", /* PATH_RAY_EMISSION */
+        "__unused__", /* PATH_RAY_SUBSURFACE */
+        "__unused__", /* PATH_RAY_DENOISING_FEATURES */
+        "__unused__", /* PATH_RAY_REFLECT_PASS */
+        "__unused__", /* PATH_RAY_TRANSMISSION_PASS */
+        "__unused__", /* PATH_RAY_VOLUME_PASS */
+        "__unused__", /* PATH_RAY_SHADOW_FOR_LIGHT */
+        "__unused__", /* PATH_RAY_SHADOW_CATCHER_HIT */
+        "__unused__", /* PATH_RAY_SHADOW_CATCHER_PASS */
     };
 
     const int nraytypes = sizeof(raytypes) / sizeof(raytypes[0]);
@@ -758,7 +769,8 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
         current_shader->has_surface_bssrdf = true;
         current_shader->has_bssrdf_bump = true; /* can't detect yet */
       }
-      current_shader->has_bump = true; /* can't detect yet */
+      current_shader->has_bump = true;             /* can't detect yet */
+      current_shader->has_surface_raytrace = true; /* can't detect yet */
     }
 
     if (node->has_spatial_varying()) {
@@ -1054,6 +1066,8 @@ void OSLCompiler::generate_nodes(const ShaderNodeSet &nodes)
               current_shader->has_surface_emission = true;
             if (node->has_surface_transparent())
               current_shader->has_surface_transparent = true;
+            if (node->get_feature() & KERNEL_FEATURE_NODE_RAYTRACE)
+              current_shader->has_surface_raytrace = true;
             if (node->has_spatial_varying())
               current_shader->has_surface_spatial_varying = true;
             if (node->has_surface_bssrdf()) {
diff --git a/intern/cycles/render/pass.cpp b/intern/cycles/render/pass.cpp
new file mode 100644
index 00000000000..27ad7c0db97
--- /dev/null
+++ b/intern/cycles/render/pass.cpp
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/pass.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_logging.h"
+
+CCL_NAMESPACE_BEGIN
+
+const char *pass_type_as_string(const PassType type)
+{
+  const int type_int = static_cast<int>(type);
+
+  const NodeEnum *type_enum = Pass::get_type_enum();
+
+  if (!type_enum->exists(type_int)) {
+    LOG(DFATAL) << "Unhandled pass type " << static_cast<int>(type) << ", not supposed to happen.";
+    return "UNKNOWN";
+  }
+
+  return (*type_enum)[type_int].c_str();
+}
+
+const char *pass_mode_as_string(PassMode mode)
+{
+  switch (mode) {
+    case PassMode::NOISY:
+      return "NOISY";
+    case PassMode::DENOISED:
+      return "DENOISED";
+  }
+
+  LOG(DFATAL) << "Unhandled pass mode " << static_cast<int>(mode) << ", should never happen.";
+  return "UNKNOWN";
+}
+
+std::ostream &operator<<(std::ostream &os, PassMode mode)
+{
+  os << pass_mode_as_string(mode);
+  return os;
+}
+
+const NodeEnum *Pass::get_type_enum()
+{
+  static NodeEnum pass_type_enum;
+
+  if (pass_type_enum.empty()) {
+
+    /* Light Passes. */
+    pass_type_enum.insert("combined", PASS_COMBINED);
+    pass_type_enum.insert("emission", PASS_EMISSION);
+    pass_type_enum.insert("background", PASS_BACKGROUND);
+    pass_type_enum.insert("ao", PASS_AO);
+    pass_type_enum.insert("shadow", PASS_SHADOW);
+    pass_type_enum.insert("diffuse", PASS_DIFFUSE);
+    pass_type_enum.insert("diffuse_direct", PASS_DIFFUSE_DIRECT);
+    pass_type_enum.insert("diffuse_indirect", PASS_DIFFUSE_INDIRECT);
+    pass_type_enum.insert("glossy", PASS_GLOSSY);
+    pass_type_enum.insert("glossy_direct", PASS_GLOSSY_DIRECT);
+    pass_type_enum.insert("glossy_indirect", PASS_GLOSSY_INDIRECT);
+    pass_type_enum.insert("transmission", PASS_TRANSMISSION);
+    pass_type_enum.insert("transmission_direct", PASS_TRANSMISSION_DIRECT);
+    pass_type_enum.insert("transmission_indirect", PASS_TRANSMISSION_INDIRECT);
+    pass_type_enum.insert("volume", PASS_VOLUME);
+    pass_type_enum.insert("volume_direct", PASS_VOLUME_DIRECT);
+    pass_type_enum.insert("volume_indirect", PASS_VOLUME_INDIRECT);
+
+    /* Data passes. */
+    pass_type_enum.insert("depth", PASS_DEPTH);
+    pass_type_enum.insert("position", PASS_POSITION);
+    pass_type_enum.insert("normal", PASS_NORMAL);
+    pass_type_enum.insert("roughness", PASS_ROUGHNESS);
+    pass_type_enum.insert("uv", PASS_UV);
+    pass_type_enum.insert("object_id", PASS_OBJECT_ID);
+    pass_type_enum.insert("material_id", PASS_MATERIAL_ID);
+    pass_type_enum.insert("motion", PASS_MOTION);
+    pass_type_enum.insert("motion_weight", PASS_MOTION_WEIGHT);
+    pass_type_enum.insert("render_time", PASS_RENDER_TIME);
+    pass_type_enum.insert("cryptomatte", PASS_CRYPTOMATTE);
+    pass_type_enum.insert("aov_color", PASS_AOV_COLOR);
+    pass_type_enum.insert("aov_value", PASS_AOV_VALUE);
+    pass_type_enum.insert("adaptive_aux_buffer", PASS_ADAPTIVE_AUX_BUFFER);
+    pass_type_enum.insert("sample_count", PASS_SAMPLE_COUNT);
+    pass_type_enum.insert("diffuse_color", PASS_DIFFUSE_COLOR);
+    pass_type_enum.insert("glossy_color", PASS_GLOSSY_COLOR);
+    pass_type_enum.insert("transmission_color", PASS_TRANSMISSION_COLOR);
+    pass_type_enum.insert("mist", PASS_MIST);
+    pass_type_enum.insert("denoising_normal", PASS_DENOISING_NORMAL);
+    pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO);
+
+    pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER);
+    pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT);
+    pass_type_enum.insert("shadow_catcher_matte", PASS_SHADOW_CATCHER_MATTE);
+
+    pass_type_enum.insert("bake_primitive", PASS_BAKE_PRIMITIVE);
+    pass_type_enum.insert("bake_differential", PASS_BAKE_DIFFERENTIAL);
+  }
+
+  return &pass_type_enum;
+}
+
+const NodeEnum *Pass::get_mode_enum()
+{
+  static NodeEnum pass_mode_enum;
+
+  if (pass_mode_enum.empty()) {
+    pass_mode_enum.insert("noisy", static_cast<int>(PassMode::NOISY));
+    pass_mode_enum.insert("denoised", static_cast<int>(PassMode::DENOISED));
+  }
+
+  return &pass_mode_enum;
+}
+
+NODE_DEFINE(Pass)
+{
+  NodeType *type = NodeType::add("pass", create);
+
+  const NodeEnum *pass_type_enum = get_type_enum();
+  const NodeEnum *pass_mode_enum = get_mode_enum();
+
+  SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED);
+  SOCKET_ENUM(mode, "Mode", *pass_mode_enum, static_cast<int>(PassMode::DENOISED));
+  SOCKET_STRING(name, "Name", ustring());
+  SOCKET_BOOLEAN(include_albedo, "Include Albedo", false);
+
+  return type;
+}
+
+Pass::Pass() : Node(get_node_type()), is_auto_(false)
+{
+}
+
+PassInfo Pass::get_info() const
+{
+  return get_info(type, include_albedo);
+}
+
+bool Pass::is_written() const
+{
+  return get_info().is_written;
+}
+
+PassInfo Pass::get_info(const PassType type, const bool include_albedo)
+{
+  PassInfo pass_info;
+
+  pass_info.use_filter = true;
+  pass_info.use_exposure = false;
+  pass_info.divide_type = PASS_NONE;
+  pass_info.use_compositing = false;
+  pass_info.use_denoising_albedo = true;
+
+  switch (type) {
+    case PASS_NONE:
+      pass_info.num_components = 0;
+      break;
+    case PASS_COMBINED:
+      pass_info.num_components = 4;
+      pass_info.use_exposure = true;
+      pass_info.support_denoise = true;
+      break;
+    case PASS_DEPTH:
+      pass_info.num_components = 1;
+      pass_info.use_filter = false;
+      break;
+    case PASS_MIST:
+      pass_info.num_components = 1;
+      break;
+    case PASS_POSITION:
+      pass_info.num_components = 3;
+      break;
+    case PASS_NORMAL:
+      pass_info.num_components = 3;
+      break;
+    case PASS_ROUGHNESS:
+      pass_info.num_components = 1;
+      break;
+    case PASS_UV:
+      pass_info.num_components = 3;
+      break;
+    case PASS_MOTION:
+      pass_info.num_components = 4;
+      pass_info.divide_type = PASS_MOTION_WEIGHT;
+      break;
+    case PASS_MOTION_WEIGHT:
+      pass_info.num_components = 1;
+      break;
+    case PASS_OBJECT_ID:
+    case PASS_MATERIAL_ID:
+      pass_info.num_components = 1;
+      pass_info.use_filter = false;
+      break;
+
+    case PASS_EMISSION:
+    case PASS_BACKGROUND:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      break;
+    case PASS_AO:
+      pass_info.num_components = 3;
+      break;
+    case PASS_SHADOW:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = false;
+      break;
+    case PASS_RENDER_TIME:
+      /* This pass is handled entirely on the host side. */
+      pass_info.num_components = 0;
+      break;
+
+    case PASS_DIFFUSE_COLOR:
+    case PASS_GLOSSY_COLOR:
+    case PASS_TRANSMISSION_COLOR:
+      pass_info.num_components = 3;
+      break;
+    case PASS_DIFFUSE:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.direct_type = PASS_DIFFUSE_DIRECT;
+      pass_info.indirect_type = PASS_DIFFUSE_INDIRECT;
+      pass_info.divide_type = (!include_albedo) ? PASS_DIFFUSE_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      pass_info.is_written = false;
+      break;
+    case PASS_DIFFUSE_DIRECT:
+    case PASS_DIFFUSE_INDIRECT:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.divide_type = (!include_albedo) ? PASS_DIFFUSE_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      break;
+    case PASS_GLOSSY:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.direct_type = PASS_GLOSSY_DIRECT;
+      pass_info.indirect_type = PASS_GLOSSY_INDIRECT;
+      pass_info.divide_type = (!include_albedo) ? PASS_GLOSSY_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      pass_info.is_written = false;
+      break;
+    case PASS_GLOSSY_DIRECT:
+    case PASS_GLOSSY_INDIRECT:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.divide_type = (!include_albedo) ? PASS_GLOSSY_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      break;
+    case PASS_TRANSMISSION:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.direct_type = PASS_TRANSMISSION_DIRECT;
+      pass_info.indirect_type = PASS_TRANSMISSION_INDIRECT;
+      pass_info.divide_type = (!include_albedo) ? PASS_TRANSMISSION_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      pass_info.is_written = false;
+      break;
+    case PASS_TRANSMISSION_DIRECT:
+    case PASS_TRANSMISSION_INDIRECT:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.divide_type = (!include_albedo) ? PASS_TRANSMISSION_COLOR : PASS_NONE;
+      pass_info.use_compositing = true;
+      break;
+    case PASS_VOLUME:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.direct_type = PASS_VOLUME_DIRECT;
+      pass_info.indirect_type = PASS_VOLUME_INDIRECT;
+      pass_info.use_compositing = true;
+      pass_info.is_written = false;
+      break;
+    case PASS_VOLUME_DIRECT:
+    case PASS_VOLUME_INDIRECT:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      break;
+
+    case PASS_CRYPTOMATTE:
+      pass_info.num_components = 4;
+      break;
+
+    case PASS_DENOISING_NORMAL:
+      pass_info.num_components = 3;
+      break;
+    case PASS_DENOISING_ALBEDO:
+      pass_info.num_components = 3;
+      break;
+
+    case PASS_SHADOW_CATCHER:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      pass_info.use_compositing = true;
+      pass_info.use_denoising_albedo = false;
+      pass_info.support_denoise = true;
+      break;
+    case PASS_SHADOW_CATCHER_SAMPLE_COUNT:
+      pass_info.num_components = 1;
+      break;
+    case PASS_SHADOW_CATCHER_MATTE:
+      pass_info.num_components = 4;
+      pass_info.use_exposure = true;
+      pass_info.support_denoise = true;
+      /* Without shadow catcher approximation compositing is not needed.
+       * Since we don't know here whether approximation is used or not, leave the decision up to
+       * the caller which will know that. */
+      break;
+
+    case PASS_ADAPTIVE_AUX_BUFFER:
+      pass_info.num_components = 4;
+      break;
+    case PASS_SAMPLE_COUNT:
+      pass_info.num_components = 1;
+      pass_info.use_exposure = false;
+      break;
+
+    case PASS_AOV_COLOR:
+      pass_info.num_components = 3;
+      break;
+    case PASS_AOV_VALUE:
+      pass_info.num_components = 1;
+      break;
+
+    case PASS_BAKE_PRIMITIVE:
+    case PASS_BAKE_DIFFERENTIAL:
+      pass_info.num_components = 4;
+      pass_info.use_exposure = false;
+      pass_info.use_filter = false;
+      break;
+
+    case PASS_CATEGORY_LIGHT_END:
+    case PASS_CATEGORY_DATA_END:
+    case PASS_CATEGORY_BAKE_END:
+    case PASS_NUM:
+      LOG(DFATAL) << "Unexpected pass type is used " << type;
+      pass_info.num_components = 0;
+      break;
+  }
+
+  return pass_info;
+}
+
+bool Pass::contains(const vector<Pass *> &passes, PassType type)
+{
+  for (const Pass *pass : passes) {
+    if (pass->get_type() != type) {
+      continue;
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+const Pass *Pass::find(const vector<Pass *> &passes, const string &name)
+{
+  for (const Pass *pass : passes) {
+    if (pass->get_name() == name) {
+      return pass;
+    }
+  }
+
+  return nullptr;
+}
+
+const Pass *Pass::find(const vector<Pass *> &passes, PassType type, PassMode mode)
+{
+  for (const Pass *pass : passes) {
+    if (pass->get_type() != type || pass->get_mode() != mode) {
+      continue;
+    }
+
+    return pass;
+  }
+
+  return nullptr;
+}
+
+int Pass::get_offset(const vector<Pass *> &passes, const Pass *pass)
+{
+  int pass_offset = 0;
+
+  for (const Pass *current_pass : passes) {
+    /* Note that pass name is allowed to be empty. This is why we check for type and mode. */
+    if (current_pass->get_type() == pass->get_type() &&
+        current_pass->get_mode() == pass->get_mode() &&
+        current_pass->get_name() == pass->get_name()) {
+      if (current_pass->is_written()) {
+        return pass_offset;
+      }
+      else {
+        return PASS_UNUSED;
+      }
+    }
+    if (current_pass->is_written()) {
+      pass_offset += current_pass->get_info().num_components;
+    }
+  }
+
+  return PASS_UNUSED;
+}
+
+std::ostream &operator<<(std::ostream &os, const Pass &pass)
+{
+  os << "type: " << pass_type_as_string(pass.get_type());
+  os << ", name: \"" << pass.get_name() << "\"";
+  os << ", mode: " << pass.get_mode();
+  os << ", is_written: " << string_from_bool(pass.is_written());
+
+  return os;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/pass.h b/intern/cycles/render/pass.h
new file mode 100644
index 00000000000..82230c62cb0
--- /dev/null
+++ b/intern/cycles/render/pass.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <ostream>  // NOLINT
+
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
+#include "kernel/kernel_types.h"
+
+#include "graph/node.h"
+
+CCL_NAMESPACE_BEGIN
+
+const char *pass_type_as_string(const PassType type);
+
+enum class PassMode {
+  NOISY,
+  DENOISED,
+};
+const char *pass_mode_as_string(PassMode mode);
+std::ostream &operator<<(std::ostream &os, PassMode mode);
+
+struct PassInfo {
+  int num_components = -1;
+  bool use_filter = false;
+  bool use_exposure = false;
+  bool is_written = true;
+  PassType divide_type = PASS_NONE;
+  PassType direct_type = PASS_NONE;
+  PassType indirect_type = PASS_NONE;
+
+  /* Pass access for read can not happen directly and needs some sort of compositing (for example,
+   * light passes due to divide_type, or shadow catcher pass. */
+  bool use_compositing = false;
+
+  /* Used to disable albedo pass for denoising.
+   * Light and shadow catcher passes should not have discontinuity in the denoised result based on
+   * the underlying albedo. */
+  bool use_denoising_albedo = true;
+
+  /* Pass supports denoising. */
+  bool support_denoise = false;
+};
+
+class Pass : public Node {
+ public:
+  NODE_DECLARE
+
+  NODE_SOCKET_API(PassType, type)
+  NODE_SOCKET_API(PassMode, mode)
+  NODE_SOCKET_API(ustring, name)
+  NODE_SOCKET_API(bool, include_albedo)
+
+  Pass();
+
+  PassInfo get_info() const;
+
+  /* The pass is written by the render pipeline (kernel or denoiser). If the pass is written it
+   * will have pixels allocated in a RenderBuffer. Passes which are not written do not have their
+   * pixels allocated to save memory. */
+  bool is_written() const;
+
+ protected:
+  /* The has been created automatically as a requirement to various rendering functionality (such
+   * as adaptive sampling). */
+  bool is_auto_;
+
+ public:
+  static const NodeEnum *get_type_enum();
+  static const NodeEnum *get_mode_enum();
+
+  static PassInfo get_info(PassType type, const bool include_albedo = false);
+
+  static bool contains(const vector<Pass *> &passes, PassType type);
+
+  /* Returns nullptr if there is no pass with the given name or type+mode. */
+  static const Pass *find(const vector<Pass *> &passes, const string &name);
+  static const Pass *find(const vector<Pass *> &passes,
+                          PassType type,
+                          PassMode mode = PassMode::NOISY);
+
+  /* Returns PASS_UNUSED if there is no corresponding pass. */
+  static int get_offset(const vector<Pass *> &passes, const Pass *pass);
+
+  friend class Film;
+};
+
+std::ostream &operator<<(std::ostream &os, const Pass &pass);
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index c4e7d2c79d6..a4b030190dc 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -163,12 +163,15 @@ void Scene::free_memory(bool final)
     delete p;
   foreach (Light *l, lights)
     delete l;
+  foreach (Pass *p, passes)
+    delete p;
 
   geometry.clear();
   objects.clear();
   lights.clear();
   particle_systems.clear();
   procedurals.clear();
+  passes.clear();
 
   if (device) {
     camera->device_free(device, &dscene, this);
@@ -253,7 +256,6 @@ void Scene::device_update(Device *device_, Progress &progress)
    * - Camera may be used for adaptive subdivision.
    * - Displacement shader must have all shader data available.
    * - Light manager needs lookup tables and final mesh data to compute emission CDF.
-   * - Film needs light manager to run for use_light_visibility
    * - Lookup tables are done a second time to handle film tables
    */
 
@@ -469,88 +471,110 @@ void Scene::enable_update_stats()
   }
 }
 
-DeviceRequestedFeatures Scene::get_requested_device_features()
+void Scene::update_kernel_features()
 {
-  DeviceRequestedFeatures requested_features;
+  if (!need_update()) {
+    return;
+  }
 
-  shader_manager->get_requested_features(this, &requested_features);
+  /* These features are not being tweaked as often as shaders,
+   * so could be done selective magic for the viewport as well. */
+  uint kernel_features = shader_manager->get_kernel_features(this);
 
-  /* This features are not being tweaked as often as shaders,
-   * so could be done selective magic for the viewport as well.
-   */
   bool use_motion = need_motion() == Scene::MotionType::MOTION_BLUR;
-  requested_features.use_hair = false;
-  requested_features.use_hair_thick = (params.hair_shape == CURVE_THICK);
-  requested_features.use_object_motion = false;
-  requested_features.use_camera_motion = use_motion && camera->use_motion();
+  kernel_features |= KERNEL_FEATURE_PATH_TRACING;
+  if (params.hair_shape == CURVE_THICK) {
+    kernel_features |= KERNEL_FEATURE_HAIR_THICK;
+  }
+  if (use_motion && camera->use_motion()) {
+    kernel_features |= KERNEL_FEATURE_CAMERA_MOTION;
+  }
   foreach (Object *object, objects) {
     Geometry *geom = object->get_geometry();
     if (use_motion) {
-      requested_features.use_object_motion |= object->use_motion() | geom->get_use_motion_blur();
-      requested_features.use_camera_motion |= geom->get_use_motion_blur();
+      if (object->use_motion() || geom->get_use_motion_blur()) {
+        kernel_features |= KERNEL_FEATURE_OBJECT_MOTION;
+      }
+      if (geom->get_use_motion_blur()) {
+        kernel_features |= KERNEL_FEATURE_CAMERA_MOTION;
+      }
     }
     if (object->get_is_shadow_catcher()) {
-      requested_features.use_shadow_tricks = true;
+      kernel_features |= KERNEL_FEATURE_SHADOW_CATCHER;
     }
     if (geom->is_mesh()) {
       Mesh *mesh = static_cast<Mesh *>(geom);
 #ifdef WITH_OPENSUBDIV
       if (mesh->get_subdivision_type() != Mesh::SUBDIVISION_NONE) {
-        requested_features.use_patch_evaluation = true;
+        kernel_features |= KERNEL_FEATURE_PATCH_EVALUATION;
       }
 #endif
-      requested_features.use_true_displacement |= mesh->has_true_displacement();
     }
     else if (geom->is_hair()) {
-      requested_features.use_hair = true;
+      kernel_features |= KERNEL_FEATURE_HAIR;
     }
   }
 
-  requested_features.use_background_light = light_manager->has_background_light(this);
-
-  requested_features.use_baking = bake_manager->get_baking();
-  requested_features.use_integrator_branched = (integrator->get_method() ==
-                                                Integrator::BRANCHED_PATH);
-  if (film->get_denoising_data_pass()) {
-    requested_features.use_denoising = true;
-    requested_features.use_shadow_tricks = true;
+  if (bake_manager->get_baking()) {
+    kernel_features |= KERNEL_FEATURE_BAKING;
   }
 
-  return requested_features;
-}
+  kernel_features |= film->get_kernel_features(this);
 
-bool Scene::update(Progress &progress, bool &kernel_switch_needed)
-{
-  /* update scene */
-  if (need_update()) {
-    /* Update max_closures. */
-    KernelIntegrator *kintegrator = &dscene.data.integrator;
-    if (params.background) {
-      kintegrator->max_closures = get_max_closure_count();
-    }
-    else {
-      /* Currently viewport render is faster with higher max_closures, needs investigating. */
-      kintegrator->max_closures = MAX_CLOSURE;
-    }
-
-    /* Load render kernels, before device update where we upload data to the GPU. */
-    bool new_kernels_needed = load_kernels(progress, false);
-
-    progress.set_status("Updating Scene");
-    MEM_GUARDED_CALL(&progress, device_update, device, progress);
+  dscene.data.kernel_features = kernel_features;
 
-    DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
-    kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
-                           kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
-    if (new_kernels_needed || kernel_switch_needed) {
-      progress.set_kernel_status("Compiling render kernels");
-      device->wait_for_availability(loaded_kernel_features);
-      progress.set_kernel_status("");
-    }
+  /* Currently viewport render is faster with higher max_closures, needs investigating. */
+  const uint max_closures = (params.background) ? get_max_closure_count() : MAX_CLOSURE;
+  dscene.data.max_closures = max_closures;
+  dscene.data.max_shaders = shaders.size();
+}
 
-    return true;
+bool Scene::update(Progress &progress)
+{
+  if (!need_update()) {
+    return false;
   }
-  return false;
+
+  /* Load render kernels, before device update where we upload data to the GPU. */
+  load_kernels(progress, false);
+
+  /* Upload scene data to the GPU. */
+  progress.set_status("Updating Scene");
+  MEM_GUARDED_CALL(&progress, device_update, device, progress);
+
+  return true;
+}
+
+static void log_kernel_features(const uint features)
+{
+  VLOG(2) << "Requested features:\n";
+  VLOG(2) << "Use BSDF " << string_from_bool(features & KERNEL_FEATURE_NODE_BSDF) << "\n";
+  VLOG(2) << "Use Principled BSDF " << string_from_bool(features & KERNEL_FEATURE_PRINCIPLED)
+          << "\n";
+  VLOG(2) << "Use Emission " << string_from_bool(features & KERNEL_FEATURE_NODE_EMISSION) << "\n";
+  VLOG(2) << "Use Volume " << string_from_bool(features & KERNEL_FEATURE_NODE_VOLUME) << "\n";
+  VLOG(2) << "Use Hair " << string_from_bool(features & KERNEL_FEATURE_NODE_HAIR) << "\n";
+  VLOG(2) << "Use Bump " << string_from_bool(features & KERNEL_FEATURE_NODE_BUMP) << "\n";
+  VLOG(2) << "Use Voronoi " << string_from_bool(features & KERNEL_FEATURE_NODE_VORONOI_EXTRA)
+          << "\n";
+  VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
+          << "\n";
+  VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n";
+  VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";
+  VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING)
+          << "\n";
+  VLOG(2) << "Use Hair " << string_from_bool(features & KERNEL_FEATURE_HAIR) << "\n";
+  VLOG(2) << "Use Object Motion " << string_from_bool(features & KERNEL_FEATURE_OBJECT_MOTION)
+          << "\n";
+  VLOG(2) << "Use Camera Motion " << string_from_bool(features & KERNEL_FEATURE_CAMERA_MOTION)
+          << "\n";
+  VLOG(2) << "Use Baking " << string_from_bool(features & KERNEL_FEATURE_BAKING) << "\n";
+  VLOG(2) << "Use Subsurface " << string_from_bool(features & KERNEL_FEATURE_SUBSURFACE) << "\n";
+  VLOG(2) << "Use Volume " << string_from_bool(features & KERNEL_FEATURE_VOLUME) << "\n";
+  VLOG(2) << "Use Patch Evaluation "
+          << string_from_bool(features & KERNEL_FEATURE_PATCH_EVALUATION) << "\n";
+  VLOG(2) << "Use Shadow Catcher " << string_from_bool(features & KERNEL_FEATURE_SHADOW_CATCHER)
+          << "\n";
 }
 
 bool Scene::load_kernels(Progress &progress, bool lock_scene)
@@ -560,15 +584,15 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene)
     scene_lock = thread_scoped_lock(mutex);
   }
 
-  DeviceRequestedFeatures requested_features = get_requested_device_features();
+  const uint kernel_features = dscene.data.kernel_features;
 
-  if (!kernels_loaded || loaded_kernel_features.modified(requested_features)) {
+  if (!kernels_loaded || loaded_kernel_features != kernel_features) {
     progress.set_status("Loading render kernels (may take a few minutes the first time)");
 
     scoped_timer timer;
 
-    VLOG(2) << "Requested features:\n" << requested_features;
-    if (!device->load_kernels(requested_features)) {
+    log_kernel_features(kernel_features);
+    if (!device->load_kernels(kernel_features)) {
       string message = device->error_message();
       if (message.empty())
         message = "Failed loading render kernel, see console for errors";
@@ -580,7 +604,7 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene)
     }
 
     kernels_loaded = true;
-    loaded_kernel_features = requested_features;
+    loaded_kernel_features = kernel_features;
     return true;
   }
   return false;
@@ -618,6 +642,28 @@ int Scene::get_max_closure_count()
   return max_closure_global;
 }
 
+bool Scene::has_shadow_catcher()
+{
+  if (shadow_catcher_modified_) {
+    has_shadow_catcher_ = false;
+    for (Object *object : objects) {
+      if (object->get_is_shadow_catcher()) {
+        has_shadow_catcher_ = true;
+        break;
+      }
+    }
+
+    shadow_catcher_modified_ = false;
+  }
+
+  return has_shadow_catcher_;
+}
+
+void Scene::tag_shadow_catcher_modified()
+{
+  shadow_catcher_modified_ = true;
+}
+
 template<> Light *Scene::create_node<Light>()
 {
   Light *node = new Light();
@@ -694,6 +740,15 @@ template<> AlembicProcedural *Scene::create_node<AlembicProcedural>()
 #endif
 }
 
+template<> Pass *Scene::create_node<Pass>()
+{
+  Pass *node = new Pass();
+  node->set_owner(this);
+  passes.push_back(node);
+  film->tag_modified();
+  return node;
+}
+
 template<typename T> void delete_node_from_array(vector<T> &nodes, T node)
 {
   for (size_t i = 0; i < nodes.size(); ++i) {
@@ -779,6 +834,12 @@ template<> void Scene::delete_node_impl(AlembicProcedural *node)
 #endif
 }
 
+template<> void Scene::delete_node_impl(Pass *node)
+{
+  delete_node_from_array(passes, node);
+  film->tag_modified();
+}
+
 template<typename T>
 static void remove_nodes_in_set(const set<T *> &nodes_set,
                                 vector<T *> &nodes_array,
@@ -842,4 +903,10 @@ template<> void Scene::delete_nodes(const set<Procedural *> &nodes, const NodeOw
   procedural_manager->tag_update();
 }
 
+template<> void Scene::delete_nodes(const set<Pass *> &nodes, const NodeOwner *owner)
+{
+  remove_nodes_in_set(nodes, passes, owner);
+  film->tag_modified();
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 7d8a6774381..cf4a3ba6b12 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -128,7 +128,7 @@ class DeviceScene {
   device_vector<float> lookup_table;
 
   /* integrator */
-  device_vector<uint> sample_pattern_lut;
+  device_vector<float> sample_pattern_lut;
 
   /* ies lights */
   device_vector<float> ies_lights;
@@ -142,27 +142,6 @@ class DeviceScene {
 
 class SceneParams {
  public:
-  /* Type of BVH, in terms whether it is supported dynamic updates of meshes
-   * or whether modifying geometry requires full BVH rebuild.
-   */
-  enum BVHType {
-    /* BVH supports dynamic updates of geometry.
-     *
-     * Faster for updating BVH tree when doing modifications in viewport,
-     * but slower for rendering.
-     */
-    BVH_DYNAMIC = 0,
-    /* BVH tree is calculated for specific scene, updates in geometry
-     * requires full tree rebuild.
-     *
-     * Slower to update BVH tree when modifying objects in viewport, also
-     * slower to build final BVH tree but gives best possible render speed.
-     */
-    BVH_STATIC = 1,
-
-    BVH_NUM_TYPES,
-  };
-
   ShadingSystem shadingsystem;
 
   /* Requested BVH layout.
@@ -186,7 +165,7 @@ class SceneParams {
   {
     shadingsystem = SHADINGSYSTEM_SVM;
     bvh_layout = BVH_LAYOUT_BVH2;
-    bvh_type = BVH_DYNAMIC;
+    bvh_type = BVH_TYPE_DYNAMIC;
     use_bvh_spatial_split = false;
     use_bvh_unaligned_nodes = true;
     num_bvh_time_steps = 0;
@@ -196,7 +175,7 @@ class SceneParams {
     background = true;
   }
 
-  bool modified(const SceneParams &params)
+  bool modified(const SceneParams &params) const
   {
     return !(shadingsystem == params.shadingsystem && bvh_layout == params.bvh_layout &&
              bvh_type == params.bvh_type &&
@@ -236,7 +215,7 @@ class Scene : public NodeOwner {
   vector<Shader *> shaders;
   vector<Light *> lights;
   vector<ParticleSystem *> particle_systems;
-  vector<Pass> passes;
+  vector<Pass *> passes;
   vector<Procedural *> procedurals;
 
   /* data managers */
@@ -291,7 +270,11 @@ class Scene : public NodeOwner {
 
   void enable_update_stats();
 
-  bool update(Progress &progress, bool &kernel_switch_needed);
+  void update_kernel_features();
+  bool update(Progress &progress);
+
+  bool has_shadow_catcher();
+  void tag_shadow_catcher_modified();
 
   /* This function is used to create a node of a specified type instead of
    * calling 'new', and sets the scene as the owner of the node.
@@ -348,13 +331,12 @@ class Scene : public NodeOwner {
   void free_memory(bool final);
 
   bool kernels_loaded;
-  DeviceRequestedFeatures loaded_kernel_features;
+  uint loaded_kernel_features;
 
   bool load_kernels(Progress &progress, bool lock_scene = true);
 
-  /* ** Split kernel routines ** */
-
-  DeviceRequestedFeatures get_requested_device_features();
+  bool has_shadow_catcher_ = false;
+  bool shadow_catcher_modified_ = true;
 
   /* Maximum number of closure during session lifetime. */
   int max_closure_global;
@@ -384,6 +366,8 @@ template<> Shader *Scene::create_node<Shader>();
 
 template<> AlembicProcedural *Scene::create_node<AlembicProcedural>();
 
+template<> Pass *Scene::create_node<Pass>();
+
 template<> void Scene::delete_node_impl(Light *node);
 
 template<> void Scene::delete_node_impl(Mesh *node);
@@ -404,6 +388,8 @@ template<> void Scene::delete_node_impl(Procedural *node);
 
 template<> void Scene::delete_node_impl(AlembicProcedural *node);
 
+template<> void Scene::delete_node_impl(Pass *node);
+
 template<> void Scene::delete_nodes(const set<Light *> &nodes, const NodeOwner *owner);
 
 template<> void Scene::delete_nodes(const set<Geometry *> &nodes, const NodeOwner *owner);
@@ -416,6 +402,8 @@ template<> void Scene::delete_nodes(const set<Shader *> &nodes, const NodeOwner
 
 template<> void Scene::delete_nodes(const set<Procedural *> &nodes, const NodeOwner *owner);
 
+template<> void Scene::delete_nodes(const set<Pass *> &nodes, const NodeOwner *owner);
+
 CCL_NAMESPACE_END
 
 #endif /*  __SCENE_H__ */
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 1b91c49f0ea..47eeffd97fe 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -17,10 +17,15 @@
 #include <limits.h>
 #include <string.h>
 
+#include "device/cpu/device.h"
 #include "device/device.h"
+#include "integrator/pass_accessor_cpu.h"
+#include "integrator/path_trace.h"
+#include "render/background.h"
 #include "render/bake.h"
 #include "render/buffers.h"
 #include "render/camera.h"
+#include "render/gpu_display.h"
 #include "render/graph.h"
 #include "render/integrator.h"
 #include "render/light.h"
@@ -39,70 +44,63 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Note about  preserve_tile_device option for tile manager:
- * progressive refine and viewport rendering does requires tiles to
- * always be allocated for the same device
- */
-Session::Session(const SessionParams &params_)
-    : params(params_),
-      tile_manager(params.progressive,
-                   params.samples,
-                   params.tile_size,
-                   params.start_resolution,
-                   params.background == false || params.progressive_refine,
-                   params.background,
-                   params.tile_order,
-                   max(params.device.multi_devices.size(), 1),
-                   params.pixel_size),
-      stats(),
-      profiler()
+Session::Session(const SessionParams &params_, const SceneParams &scene_params)
+    : params(params_), render_scheduler_(tile_manager_, params)
 {
-  device_use_gl_ = ((params.device.type != DEVICE_CPU) && !params.background);
-
   TaskScheduler::init(params.threads);
 
-  session_thread_ = NULL;
-  scene = NULL;
-
-  reset_time_ = 0.0;
-  last_update_time_ = 0.0;
+  session_thread_ = nullptr;
 
   delayed_reset_.do_reset = false;
-  delayed_reset_.samples = 0;
-
-  display_outdated_ = false;
-  gpu_draw_ready_ = false;
-  gpu_need_display_buffer_update_ = false;
 
   pause_ = false;
   cancel_ = false;
   new_work_added_ = false;
 
-  buffers = NULL;
-  display = NULL;
+  device = Device::create(params.device, stats, profiler);
 
-  /* Validate denoising parameters. */
-  set_denoising(params.denoising);
+  scene = new Scene(scene_params, device);
 
-  /* Create CPU/GPU devices. */
-  device = Device::create(params.device, stats, profiler, params.background);
-
-  if (!device->error_message().empty()) {
-    progress.set_error(device->error_message());
-    return;
-  }
+  /* Configure path tracer. */
+  path_trace_ = make_unique<PathTrace>(
+      device, scene->film, &scene->dscene, render_scheduler_, tile_manager_);
+  path_trace_->set_progress(&progress);
+  path_trace_->tile_buffer_update_cb = [&]() {
+    if (!update_render_tile_cb) {
+      return;
+    }
+    update_render_tile_cb();
+  };
+  path_trace_->tile_buffer_write_cb = [&]() {
+    if (!write_render_tile_cb) {
+      return;
+    }
+    write_render_tile_cb();
+  };
+  path_trace_->tile_buffer_read_cb = [&]() -> bool {
+    if (!read_render_tile_cb) {
+      return false;
+    }
+    read_render_tile_cb();
+    return true;
+  };
+  path_trace_->progress_update_cb = [&]() { update_status_time(); };
 
-  /* Create buffers for interactive rendering. */
-  if (!(params.background && !params.write_render_cb)) {
-    buffers = new RenderBuffers(device);
-    display = new DisplayBuffer(device, params.display_buffer_linear);
-  }
+  tile_manager_.full_buffer_written_cb = [&](string_view filename) {
+    if (!full_buffer_written_cb) {
+      return;
+    }
+    full_buffer_written_cb(filename);
+  };
 }
 
 Session::~Session()
 {
   cancel();
 
+  /* TODO(sergey): Bring the passes in viewport back.
+   * It is unclear why there is such an exception needed though. */
+#if 0
   if (buffers && params.write_render_cb) {
     /* Copy to display buffer and write out image if requested */
     delete display;
@@ -116,12 +114,14 @@ Session::~Session()
     uchar4 *pixels = display->rgba_byte.copy_from_device(0, w, h);
     params.write_render_cb((uchar *)pixels, w, h, 4);
   }
+#endif
 
-  /* clean up */
-  tile_manager.device_free();
+  /* Make sure path tracer is destroyed before the device. This is needed because destruction might
+   * need to access device for device memory free. */
+  /* TODO(sergey): Convert device to be unique_ptr, and rely on C++ to destruct objects in the
+   * pre-defined order. */
+  path_trace_.reset();
 
-  delete buffers;
-  delete display;
   delete scene;
   delete device;
 
@@ -135,15 +135,16 @@ void Session::start()
   }
 }
 
-void Session::cancel()
+void Session::cancel(bool quick)
 {
+  if (quick && path_trace_) {
+    path_trace_->cancel();
+  }
+
   if (session_thread_) {
     /* wait for session thread to end */
     progress.set_cancel("Exiting");
 
-    gpu_need_display_buffer_update_ = false;
-    gpu_need_display_buffer_update_cond_.notify_all();
-
     {
       thread_scoped_lock pause_lock(pause_mutex_);
       pause_ = false;
@@ -157,570 +158,43 @@ void Session::cancel()
 
 bool Session::ready_to_reset()
 {
-  double dt = time_dt() - reset_time_;
-
-  if (!display_outdated_)
-    return (dt > params.reset_timeout);
-  else
-    return (dt > params.cancel_timeout);
+  return path_trace_->ready_to_reset();
 }
 
-/* GPU Session */
-
-void Session::reset_gpu(BufferParams &buffer_params, int samples)
+void Session::run_main_render_loop()
 {
-  thread_scoped_lock pause_lock(pause_mutex_);
-
-  /* block for buffer access and reset immediately. we can't do this
-   * in the thread, because we need to allocate an OpenGL buffer, and
-   * that only works in the main thread */
-  thread_scoped_lock display_lock(display_mutex_);
-  thread_scoped_lock buffers_lock(buffers_mutex_);
+  path_trace_->clear_gpu_display();
 
-  display_outdated_ = true;
-  reset_time_ = time_dt();
+  while (true) {
+    RenderWork render_work = run_update_for_next_iteration();
 
-  reset_(buffer_params, samples);
-
-  gpu_need_display_buffer_update_ = false;
-  gpu_need_display_buffer_update_cond_.notify_all();
-
-  new_work_added_ = true;
-
-  pause_cond_.notify_all();
-}
-
-bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
-{
-  /* block for buffer access */
-  thread_scoped_lock display_lock(display_mutex_);
-
-  /* first check we already rendered something */
-  if (gpu_draw_ready_) {
-    /* then verify the buffers have the expected size, so we don't
-     * draw previous results in a resized window */
-    if (buffer_params.width == display->params.width &&
-        buffer_params.height == display->params.height) {
-      /* for CUDA we need to do tone-mapping still, since we can
-       * only access GL buffers from the main thread. */
-      if (gpu_need_display_buffer_update_) {
-        thread_scoped_lock buffers_lock(buffers_mutex_);
-        copy_to_display_buffer(tile_manager.state.sample);
-        gpu_need_display_buffer_update_ = false;
-        gpu_need_display_buffer_update_cond_.notify_all();
+    if (!render_work) {
+      if (VLOG_IS_ON(2)) {
+        double total_time, render_time;
+        progress.get_time(total_time, render_time);
+        VLOG(2) << "Rendering in main loop is done in " << render_time << " seconds.";
+        VLOG(2) << path_trace_->full_report();
       }
 
-      display->draw(device, draw_params);
-
-      if (display_outdated_ && (time_dt() - reset_time_) > params.text_timeout)
-        return false;
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-void Session::run_gpu()
-{
-  bool tiles_written = false;
-
-  reset_time_ = time_dt();
-  last_update_time_ = time_dt();
-  last_display_time_ = last_update_time_;
-
-  progress.set_render_start_time();
-
-  while (!progress.get_cancel()) {
-    const bool no_tiles = !run_update_for_next_iteration();
-
-    if (no_tiles) {
       if (params.background) {
-        /* if no work left and in background mode, we can stop immediately */
+        /* if no work left and in background mode, we can stop immediately. */
         progress.set_status("Finished");
         break;
       }
     }
 
-    if (run_wait_for_work(no_tiles)) {
-      continue;
-    }
-
-    if (progress.get_cancel()) {
-      break;
-    }
-
-    if (!no_tiles) {
-      if (!device->error_message().empty())
-        progress.set_error(device->error_message());
-
-      if (progress.get_cancel())
-        break;
-
-      /* buffers mutex is locked entirely while rendering each
-       * sample, and released/reacquired on each iteration to allow
-       * reset and draw in between */
-      thread_scoped_lock buffers_lock(buffers_mutex_);
-
-      /* update status and timing */
-      update_status_time();
-
-      /* render */
-      bool delayed_denoise = false;
-      const bool need_denoise = render_need_denoise(delayed_denoise);
-      render(need_denoise);
-
-      device->task_wait();
-
-      if (!device->error_message().empty())
-        progress.set_cancel(device->error_message());
-
-      /* update status and timing */
-      update_status_time();
-
-      gpu_need_display_buffer_update_ = !delayed_denoise;
-      gpu_draw_ready_ = true;
-      progress.set_update();
-
-      /* wait for until display buffer is updated */
-      if (!params.background) {
-        while (gpu_need_display_buffer_update_) {
-          if (progress.get_cancel())
-            break;
-
-          gpu_need_display_buffer_update_cond_.wait(buffers_lock);
-        }
-      }
-
-      if (!device->error_message().empty())
-        progress.set_error(device->error_message());
-
-      tiles_written = update_progressive_refine(progress.get_cancel());
-
-      if (progress.get_cancel())
-        break;
-    }
-  }
-
-  if (!tiles_written)
-    update_progressive_refine(true);
-}
-
-/* CPU Session */
-
-void Session::reset_cpu(BufferParams &buffer_params, int samples)
-{
-  thread_scoped_lock reset_lock(delayed_reset_.mutex);
-  thread_scoped_lock pause_lock(pause_mutex_);
-
-  display_outdated_ = true;
-  reset_time_ = time_dt();
-
-  delayed_reset_.params = buffer_params;
-  delayed_reset_.samples = samples;
-  delayed_reset_.do_reset = true;
-  device->task_cancel();
-
-  pause_cond_.notify_all();
-}
-
-bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
-{
-  thread_scoped_lock display_lock(display_mutex_);
-
-  /* first check we already rendered something */
-  if (display->draw_ready()) {
-    /* then verify the buffers have the expected size, so we don't
-     * draw previous results in a resized window */
-    if (buffer_params.width == display->params.width &&
-        buffer_params.height == display->params.height) {
-      display->draw(device, draw_params);
-
-      if (display_outdated_ && (time_dt() - reset_time_) > params.text_timeout)
-        return false;
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool Session::steal_tile(RenderTile &rtile, Device *tile_device, thread_scoped_lock &tile_lock)
-{
-  /* Devices that can get their tiles stolen don't steal tiles themselves.
-   * Additionally, if there are no stealable tiles in flight, give up here. */
-  if (tile_device->info.type == DEVICE_CPU || stealable_tiles_ == 0) {
-    return false;
-  }
-
-  /* Wait until no other thread is trying to steal a tile. */
-  while (tile_stealing_state_ != NOT_STEALING && stealable_tiles_ > 0) {
-    /* Someone else is currently trying to get a tile.
-     * Wait on the condition variable and try later. */
-    tile_steal_cond_.wait(tile_lock);
-  }
-  /* If another thread stole the last stealable tile in the meantime, give up. */
-  if (stealable_tiles_ == 0) {
-    return false;
-  }
-
-  /* There are stealable tiles in flight, so signal that one should be released. */
-  tile_stealing_state_ = WAITING_FOR_TILE;
-
-  /* Wait until a device notices the signal and releases its tile. */
-  while (tile_stealing_state_ != GOT_TILE && stealable_tiles_ > 0) {
-    tile_steal_cond_.wait(tile_lock);
-  }
-  /* If the last stealable tile finished on its own, give up. */
-  if (tile_stealing_state_ != GOT_TILE) {
-    tile_stealing_state_ = NOT_STEALING;
-    return false;
-  }
-
-  /* Successfully stole a tile, now move it to the new device. */
-  rtile = stolen_tile_;
-  rtile.buffers->buffer.move_device(tile_device);
-  rtile.buffer = rtile.buffers->buffer.device_pointer;
-  rtile.stealing_state = RenderTile::NO_STEALING;
-  rtile.num_samples -= (rtile.sample - rtile.start_sample);
-  rtile.start_sample = rtile.sample;
-
-  tile_stealing_state_ = NOT_STEALING;
-
-  /* Poke any threads which might be waiting for NOT_STEALING above. */
-  tile_steal_cond_.notify_one();
-
-  return true;
-}
-
-bool Session::get_tile_stolen()
-{
-  /* If tile_stealing_state is WAITING_FOR_TILE, atomically set it to RELEASING_TILE
-   * and return true. */
-  TileStealingState expected = WAITING_FOR_TILE;
-  return tile_stealing_state_.compare_exchange_weak(expected, RELEASING_TILE);
-}
-
-bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
-{
-  if (progress.get_cancel()) {
-    if (params.progressive_refine == false) {
-      /* for progressive refine current sample should be finished for all tiles */
-      return false;
-    }
-  }
-
-  thread_scoped_lock tile_lock(tile_mutex_);
-
-  /* get next tile from manager */
-  Tile *tile;
-  int device_num = device->device_number(tile_device);
-
-  while (!tile_manager.next_tile(tile, device_num, tile_types)) {
-    /* Can only steal tiles on devices that support rendering
-     * This is because denoising tiles cannot be stolen (see below)
-     */
-    if ((tile_types & (RenderTile::PATH_TRACE | RenderTile::BAKE)) &&
-        steal_tile(rtile, tile_device, tile_lock)) {
-      return true;
-    }
-
-    /* Wait for denoising tiles to become available */
-    if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) {
-      denoising_cond_.wait(tile_lock);
-      continue;
-    }
-
-    return false;
-  }
-
-  /* fill render tile */
-  rtile.x = tile_manager.state.buffer.full_x + tile->x;
-  rtile.y = tile_manager.state.buffer.full_y + tile->y;
-  rtile.w = tile->w;
-  rtile.h = tile->h;
-  rtile.start_sample = tile_manager.state.sample;
-  rtile.num_samples = tile_manager.state.num_samples;
-  rtile.resolution = tile_manager.state.resolution_divider;
-  rtile.tile_index = tile->index;
-  rtile.stealing_state = RenderTile::NO_STEALING;
-
-  if (tile->state == Tile::DENOISE) {
-    rtile.task = RenderTile::DENOISE;
-  }
-  else {
-    if (tile_device->info.type == DEVICE_CPU) {
-      stealable_tiles_++;
-      rtile.stealing_state = RenderTile::CAN_BE_STOLEN;
-    }
-
-    if (read_bake_tile_cb) {
-      rtile.task = RenderTile::BAKE;
-    }
-    else {
-      rtile.task = RenderTile::PATH_TRACE;
-    }
-  }
-
-  tile_lock.unlock();
-
-  /* in case of a permanent buffer, return it, otherwise we will allocate
-   * a new temporary buffer */
-  if (buffers) {
-    tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
-
-    rtile.buffer = buffers->buffer.device_pointer;
-    rtile.buffers = buffers;
-
-    device->map_tile(tile_device, rtile);
-
-    /* Reset copy state, since buffer contents change after the tile was acquired */
-    buffers->map_neighbor_copied = false;
-
-    /* This hack ensures that the copy in 'MultiDevice::map_neighbor_tiles' accounts
-     * for the buffer resolution divider. */
-    buffers->buffer.data_width = (buffers->params.width * buffers->params.get_passes_size()) /
-                                 tile_manager.state.resolution_divider;
-    buffers->buffer.data_height = buffers->params.height / tile_manager.state.resolution_divider;
-
-    return true;
-  }
-
-  if (tile->buffers == NULL) {
-    /* fill buffer parameters */
-    BufferParams buffer_params = tile_manager.params;
-    buffer_params.full_x = rtile.x;
-    buffer_params.full_y = rtile.y;
-    buffer_params.width = rtile.w;
-    buffer_params.height = rtile.h;
-
-    /* allocate buffers */
-    tile->buffers = new RenderBuffers(tile_device);
-    tile->buffers->reset(buffer_params);
-  }
-  else if (tile->buffers->buffer.device != tile_device) {
-    /* Move buffer to current tile device again in case it was stolen before.
-     * Not needed for denoising since that already handles mapping of tiles and
-     * neighbors to its own device. */
-    if (rtile.task != RenderTile::DENOISE) {
-      tile->buffers->buffer.move_device(tile_device);
-    }
-  }
-
-  tile->buffers->map_neighbor_copied = false;
-
-  tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
-
-  rtile.buffer = tile->buffers->buffer.device_pointer;
-  rtile.buffers = tile->buffers;
-  rtile.sample = tile_manager.state.sample;
-
-  if (read_bake_tile_cb) {
-    /* This will read any passes needed as input for baking. */
-    if (tile_manager.state.sample == tile_manager.range_start_sample) {
-      {
-        thread_scoped_lock tile_lock(tile_mutex_);
-        read_bake_tile_cb(rtile);
-      }
-      rtile.buffers->buffer.copy_to_device();
-    }
-  }
-  else {
-    /* This will tag tile as IN PROGRESS in blender-side render pipeline,
-     * which is needed to highlight currently rendering tile before first
-     * sample was processed for it. */
-    update_tile_sample(rtile);
-  }
-
-  return true;
-}
-
-void Session::update_tile_sample(RenderTile &rtile)
-{
-  thread_scoped_lock tile_lock(tile_mutex_);
-
-  if (update_render_tile_cb) {
-    if (params.progressive_refine == false) {
-      /* todo: optimize this by making it thread safe and removing lock */
-
-      update_render_tile_cb(rtile, true);
-    }
-  }
-
-  update_status_time();
-}
-
-void Session::release_tile(RenderTile &rtile, const bool need_denoise)
-{
-  thread_scoped_lock tile_lock(tile_mutex_);
-
-  if (rtile.stealing_state != RenderTile::NO_STEALING) {
-    stealable_tiles_--;
-    if (rtile.stealing_state == RenderTile::WAS_STOLEN) {
-      /* If the tile is being stolen, don't release it here - the new device will pick up where
-       * the old one left off. */
-
-      assert(tile_stealing_state_ == RELEASING_TILE);
-      assert(rtile.sample < rtile.start_sample + rtile.num_samples);
-
-      tile_stealing_state_ = GOT_TILE;
-      stolen_tile_ = rtile;
-      tile_steal_cond_.notify_all();
-      return;
-    }
-    else if (stealable_tiles_ == 0) {
-      /* If this was the last stealable tile, wake up any threads still waiting for one. */
-      tile_steal_cond_.notify_all();
-    }
-  }
-
-  progress.add_finished_tile(rtile.task == RenderTile::DENOISE);
-
-  bool delete_tile;
-
-  if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) {
-    /* Finished tile pixels write. */
-    if (write_render_tile_cb && params.progressive_refine == false) {
-      write_render_tile_cb(rtile);
-    }
-
-    if (delete_tile) {
-      delete rtile.buffers;
-      tile_manager.state.tiles[rtile.tile_index].buffers = NULL;
-    }
-  }
-  else {
-    /* In progress tile pixels update. */
-    if (update_render_tile_cb && params.progressive_refine == false) {
-      update_render_tile_cb(rtile, false);
-    }
-  }
-
-  update_status_time();
-
-  /* Notify denoising thread that a tile was finished. */
-  denoising_cond_.notify_all();
-}
-
-void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
-{
-  thread_scoped_lock tile_lock(tile_mutex_);
-
-  const int4 image_region = make_int4(
-      tile_manager.state.buffer.full_x,
-      tile_manager.state.buffer.full_y,
-      tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
-      tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
-
-  RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
-
-  if (!tile_manager.schedule_denoising) {
-    /* Fix up tile slices with overlap. */
-    if (tile_manager.slice_overlap != 0) {
-      int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y);
-      center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap,
-                          image_region.w) -
-                      y;
-      center_tile.y = y;
-    }
-
-    /* Tiles are not being denoised individually, which means the entire image is processed. */
-    neighbors.set_bounds_from_center();
-  }
-  else {
-    int center_idx = center_tile.tile_index;
-    assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
-
-    for (int dy = -1, i = 0; dy <= 1; dy++) {
-      for (int dx = -1; dx <= 1; dx++, i++) {
-        RenderTile &rtile = neighbors.tiles[i];
-        int nindex = tile_manager.get_neighbor_index(center_idx, i);
-        if (nindex >= 0) {
-          Tile *tile = &tile_manager.state.tiles[nindex];
-
-          rtile.x = image_region.x + tile->x;
-          rtile.y = image_region.y + tile->y;
-          rtile.w = tile->w;
-          rtile.h = tile->h;
-
-          if (buffers) {
-            tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
-
-            rtile.buffer = buffers->buffer.device_pointer;
-            rtile.buffers = buffers;
-          }
-          else {
-            assert(tile->buffers);
-            tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
-
-            rtile.buffer = tile->buffers->buffer.device_pointer;
-            rtile.buffers = tile->buffers;
-          }
-        }
-        else {
-          int px = center_tile.x + dx * params.tile_size.x;
-          int py = center_tile.y + dy * params.tile_size.y;
-
-          rtile.x = clamp(px, image_region.x, image_region.z);
-          rtile.y = clamp(py, image_region.y, image_region.w);
-          rtile.w = rtile.h = 0;
-
-          rtile.buffer = (device_ptr)NULL;
-          rtile.buffers = NULL;
-        }
-      }
-    }
-  }
-
-  assert(center_tile.buffers);
-  device->map_neighbor_tiles(tile_device, neighbors);
-
-  /* The denoised result is written back to the original tile. */
-  neighbors.target = center_tile;
-}
-
-void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
-{
-  thread_scoped_lock tile_lock(tile_mutex_);
-  device->unmap_neighbor_tiles(tile_device, neighbors);
-}
-
-void Session::run_cpu()
-{
-  bool tiles_written = false;
-
-  last_update_time_ = time_dt();
-  last_display_time_ = last_update_time_;
-
-  while (!progress.get_cancel()) {
-    const bool no_tiles = !run_update_for_next_iteration();
-    bool need_copy_to_display_buffer = false;
-
-    if (no_tiles) {
-      if (params.background) {
-        /* if no work left and in background mode, we can stop immediately */
-        progress.set_status("Finished");
+    const bool did_cancel = progress.get_cancel();
+    if (did_cancel) {
+      render_scheduler_.render_work_reschedule_on_cancel(render_work);
+      if (!render_work) {
         break;
       }
     }
-
-    if (run_wait_for_work(no_tiles)) {
+    else if (run_wait_for_work(render_work)) {
       continue;
     }
 
-    if (progress.get_cancel()) {
-      break;
-    }
-
-    if (!no_tiles) {
-      if (!device->error_message().empty())
-        progress.set_error(device->error_message());
-
-      if (progress.get_cancel())
-        break;
-
+    {
       /* buffers mutex is locked entirely while rendering each
        * sample, and released/reacquired on each iteration to allow
        * reset and draw in between */
@@ -730,49 +204,25 @@ void Session::run_cpu()
       update_status_time();
 
       /* render */
-      bool delayed_denoise = false;
-      const bool need_denoise = render_need_denoise(delayed_denoise);
-      render(need_denoise);
+      path_trace_->render(render_work);
 
       /* update status and timing */
       update_status_time();
 
-      if (!params.background)
-        need_copy_to_display_buffer = !delayed_denoise;
-
-      if (!device->error_message().empty())
-        progress.set_error(device->error_message());
-    }
-
-    device->task_wait();
-
-    {
-      thread_scoped_lock reset_lock(delayed_reset_.mutex);
-      thread_scoped_lock buffers_lock(buffers_mutex_);
-      thread_scoped_lock display_lock(display_mutex_);
-
-      if (delayed_reset_.do_reset) {
-        /* reset rendering if request from main thread */
-        delayed_reset_.do_reset = false;
-        reset_(delayed_reset_.params, delayed_reset_.samples);
-      }
-      else if (need_copy_to_display_buffer) {
-        /* Only copy to display_buffer if we do not reset, we don't
-         * want to show the result of an incomplete sample */
-        copy_to_display_buffer(tile_manager.state.sample);
+      if (device->have_error()) {
+        const string &error_message = device->error_message();
+        progress.set_error(error_message);
+        progress.set_cancel(error_message);
+        break;
       }
-
-      if (!device->error_message().empty())
-        progress.set_error(device->error_message());
-
-      tiles_written = update_progressive_refine(progress.get_cancel());
     }
 
     progress.set_update();
-  }
 
-  if (!tiles_written)
-    update_progressive_refine(true);
+    if (did_cancel) {
+      break;
+    }
+  }
 }
 
 void Session::run()
@@ -789,10 +239,7 @@ void Session::run()
     /* reset number of rendered samples */
     progress.reset_sample();
 
-    if (device_use_gl_)
-      run_gpu();
-    else
-      run_cpu();
+    run_main_render_loop();
   }
 
   profiler.stop();
@@ -804,31 +251,92 @@ void Session::run()
     progress.set_update();
 }
 
-bool Session::run_update_for_next_iteration()
+RenderWork Session::run_update_for_next_iteration()
 {
+  RenderWork render_work;
+
   thread_scoped_lock scene_lock(scene->mutex);
   thread_scoped_lock reset_lock(delayed_reset_.mutex);
 
+  bool have_tiles = true;
+  bool switched_to_new_tile = false;
+
   if (delayed_reset_.do_reset) {
     thread_scoped_lock buffers_lock(buffers_mutex_);
-    reset_(delayed_reset_.params, delayed_reset_.samples);
-    delayed_reset_.do_reset = false;
+    do_delayed_reset();
+
+    /* After reset make sure the tile manager is at the first big tile. */
+    have_tiles = tile_manager_.next();
+    switched_to_new_tile = true;
+  }
+
+  /* Update number of samples in the integrator.
+   * Ideally this would need to happen once in `Session::set_samples()`, but the issue there is
+   * the initial configuration when Session is created where the `set_samples()` is not used. */
+  scene->integrator->set_aa_samples(params.samples);
+
+  /* Update denoiser settings. */
+  {
+    const DenoiseParams denoise_params = scene->integrator->get_denoise_params();
+    path_trace_->set_denoiser_params(denoise_params);
+  }
+
+  /* Update adaptive sampling. */
+  {
+    const AdaptiveSampling adaptive_sampling = scene->integrator->get_adaptive_sampling();
+    path_trace_->set_adaptive_sampling(adaptive_sampling);
   }
 
-  const bool have_tiles = tile_manager.next();
+  render_scheduler_.set_num_samples(params.samples);
+  render_scheduler_.set_time_limit(params.time_limit);
+
+  while (have_tiles) {
+    render_work = render_scheduler_.get_render_work();
+    if (render_work) {
+      break;
+    }
 
-  if (have_tiles) {
+    progress.add_finished_tile(false);
+
+    have_tiles = tile_manager_.next();
+    if (have_tiles) {
+      render_scheduler_.reset_for_next_tile();
+      switched_to_new_tile = true;
+    }
+  }
+
+  if (render_work) {
     scoped_timer update_timer;
-    if (update_scene()) {
+
+    if (switched_to_new_tile) {
+      BufferParams tile_params = buffer_params_;
+
+      const Tile &tile = tile_manager_.get_current_tile();
+      tile_params.width = tile.width;
+      tile_params.height = tile.height;
+      tile_params.full_x = tile.x + buffer_params_.full_x;
+      tile_params.full_y = tile.y + buffer_params_.full_y;
+      tile_params.full_width = buffer_params_.full_width;
+      tile_params.full_height = buffer_params_.full_height;
+      tile_params.update_offset_stride();
+
+      path_trace_->reset(buffer_params_, tile_params);
+    }
+
+    const int resolution = render_work.resolution_divider;
+    const int width = max(1, buffer_params_.full_width / resolution);
+    const int height = max(1, buffer_params_.full_height / resolution);
+
+    if (update_scene(width, height)) {
       profiler.reset(scene->shaders.size(), scene->objects.size());
     }
     progress.add_skip_time(update_timer, params.background);
   }
 
-  return have_tiles;
+  return render_work;
 }
 
-bool Session::run_wait_for_work(bool no_tiles)
+bool Session::run_wait_for_work(const RenderWork &render_work)
 {
   /* In an offline rendering there is no pause, and no tiles will mean the job is fully done. */
   if (params.background) {
@@ -837,19 +345,20 @@ bool Session::run_wait_for_work(bool no_tiles)
 
   thread_scoped_lock pause_lock(pause_mutex_);
 
-  if (!pause_ && !no_tiles) {
+  if (!pause_ && render_work) {
     /* Rendering is not paused and there is work to be done. No need to wait for anything. */
     return false;
   }
 
-  update_status_time(pause_, no_tiles);
+  const bool no_work = !render_work;
+  update_status_time(pause_, no_work);
 
   /* Only leave the loop when rendering is not paused. But even if the current render is un-paused
    * but there is nothing to render keep waiting until new work is added. */
   while (!cancel_) {
     scoped_timer pause_timer;
 
-    if (!pause_ && (!no_tiles || new_work_added_ || delayed_reset_.do_reset)) {
+    if (!pause_ && (render_work || new_work_added_ || delayed_reset_.do_reset)) {
       break;
     }
 
@@ -860,52 +369,89 @@ bool Session::run_wait_for_work(bool no_tiles)
       progress.add_skip_time(pause_timer, params.background);
     }
 
-    update_status_time(pause_, no_tiles);
+    update_status_time(pause_, no_work);
     progress.set_update();
   }
 
   new_work_added_ = false;
 
-  return no_tiles;
+  return no_work;
 }
 
-bool Session::draw(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+void Session::draw()
 {
-  if (device_use_gl_)
-    return draw_gpu(buffer_params, draw_params);
-  else
-    return draw_cpu(buffer_params, draw_params);
+  path_trace_->draw();
 }
 
-void Session::reset_(BufferParams &buffer_params, int samples)
+int2 Session::get_effective_tile_size() const
 {
-  if (buffers && buffer_params.modified(tile_manager.params)) {
-    gpu_draw_ready_ = false;
-    buffers->reset(buffer_params);
-    if (display) {
-      display->reset(buffer_params);
-    }
+  /* No support yet for baking with tiles. */
+  if (!params.use_auto_tile || scene->bake_manager->get_baking()) {
+    return make_int2(buffer_params_.width, buffer_params_.height);
   }
 
-  tile_manager.reset(buffer_params, samples);
-  stealable_tiles_ = 0;
-  tile_stealing_state_ = NOT_STEALING;
-  progress.reset_sample();
+  /* TODO(sergey): Take available memory into account, and if there is enough memory do not tile
+   * and prefer optimal performance. */
+
+  return make_int2(params.tile_size, params.tile_size);
+}
+
+void Session::do_delayed_reset()
+{
+  if (!delayed_reset_.do_reset) {
+    return;
+  }
+  delayed_reset_.do_reset = false;
+
+  params = delayed_reset_.session_params;
+  buffer_params_ = delayed_reset_.buffer_params;
+
+  /* Store parameters used for buffers access outside of scene graph.  */
+  buffer_params_.samples = params.samples;
+  buffer_params_.exposure = scene->film->get_exposure();
+  buffer_params_.use_approximate_shadow_catcher =
+      scene->film->get_use_approximate_shadow_catcher();
+  buffer_params_.use_transparent_background = scene->background->get_transparent();
 
-  bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
-  progress.set_total_pixel_samples(show_progress ? tile_manager.state.total_pixel_samples : 0);
+  /* Tile and work scheduling. */
+  tile_manager_.reset_scheduling(buffer_params_, get_effective_tile_size());
+  render_scheduler_.reset(buffer_params_, params.samples);
 
-  if (!params.background)
+  /* Passes. */
+  /* When multiple tiles are used SAMPLE_COUNT pass is used to keep track of possible partial
+   * tile results. It is safe to use generic update function here which checks for changes since
+   * changes in tile settings re-creates session, which ensures film is fully updated on tile
+   * changes. */
+  scene->film->update_passes(scene, tile_manager_.has_multiple_tiles());
+
+  /* Update for new state of scene and passes. */
+  buffer_params_.update_passes(scene->passes);
+  tile_manager_.update(buffer_params_, scene);
+
+  /* Progress. */
+  progress.reset_sample();
+  progress.set_total_pixel_samples(buffer_params_.width * buffer_params_.height * params.samples);
+
+  if (!params.background) {
     progress.set_start_time();
+  }
   progress.set_render_start_time();
 }
 
-void Session::reset(BufferParams &buffer_params, int samples)
+void Session::reset(const SessionParams &session_params, const BufferParams &buffer_params)
 {
-  if (device_use_gl_)
-    reset_gpu(buffer_params, samples);
-  else
-    reset_cpu(buffer_params, samples);
+  {
+    thread_scoped_lock reset_lock(delayed_reset_.mutex);
+    thread_scoped_lock pause_lock(pause_mutex_);
+
+    delayed_reset_.do_reset = true;
+    delayed_reset_.session_params = session_params;
+    delayed_reset_.buffer_params = buffer_params;
+
+    path_trace_->cancel();
+  }
+
+  pause_cond_.notify_all();
 }
 
 void Session::set_samples(int samples)
@@ -915,7 +461,22 @@ void Session::set_samples(int samples)
   }
 
   params.samples = samples;
-  tile_manager.set_samples(samples);
+
+  {
+    thread_scoped_lock pause_lock(pause_mutex_);
+    new_work_added_ = true;
+  }
+
+  pause_cond_.notify_all();
+}
+
+void Session::set_time_limit(double time_limit)
+{
+  if (time_limit == params.time_limit) {
+    return;
+  }
+
+  params.time_limit = time_limit;
 
   {
     thread_scoped_lock pause_lock(pause_mutex_);
@@ -948,38 +509,9 @@ void Session::set_pause(bool pause)
   }
 }
 
-void Session::set_denoising(const DenoiseParams &denoising)
+void Session::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
 {
-  bool need_denoise = denoising.need_denoising_task();
-
-  /* Lock buffers so no denoising operation is triggered while the settings are changed here. */
-  thread_scoped_lock buffers_lock(buffers_mutex_);
-  params.denoising = denoising;
-
-  if (!(params.device.denoisers & denoising.type)) {
-    if (need_denoise) {
-      progress.set_error("Denoiser type not supported by compute device");
-    }
-
-    params.denoising.use = false;
-    need_denoise = false;
-  }
-
-  // TODO(pmours): Query the required overlap value for denoising from the device?
-  tile_manager.slice_overlap = need_denoise && !params.background ? 64 : 0;
-
-  /* Schedule per tile denoising for final renders if we are either denoising or
-   * need prefiltered passes for the native denoiser. */
-  tile_manager.schedule_denoising = need_denoise && !buffers;
-}
-
-void Session::set_denoising_start_sample(int sample)
-{
-  if (sample != params.denoising.start_sample) {
-    params.denoising.start_sample = sample;
-
-    pause_cond_.notify_all();
-  }
+  path_trace_->set_gpu_display(move(gpu_display));
 }
 
 void Session::wait()
@@ -989,81 +521,67 @@ void Session::wait()
     delete session_thread_;
   }
 
-  session_thread_ = NULL;
+  session_thread_ = nullptr;
 }
 
-bool Session::update_scene()
+bool Session::update_scene(int width, int height)
 {
-  /* update camera if dimensions changed for progressive render. the camera
+  /* Update camera if dimensions changed for progressive render. the camera
    * knows nothing about progressive or cropped rendering, it just gets the
-   * image dimensions passed in */
+   * image dimensions passed in. */
   Camera *cam = scene->camera;
-  int width = tile_manager.state.buffer.full_width;
-  int height = tile_manager.state.buffer.full_height;
-  int resolution = tile_manager.state.resolution_divider;
-
-  cam->set_screen_size_and_resolution(width, height, resolution);
+  cam->set_screen_size(width, height);
 
-  /* number of samples is needed by multi jittered
-   * sampling pattern and by baking */
-  Integrator *integrator = scene->integrator;
-  BakeManager *bake_manager = scene->bake_manager;
+  /* First detect which kernel features are used and allocate working memory.
+   * This helps estimate how may device memory is available for the scene and
+   * how much we need to allocate on the host instead. */
+  scene->update_kernel_features();
 
-  if (integrator->get_sampling_pattern() != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) {
-    integrator->set_aa_samples(tile_manager.num_samples);
-  }
+  path_trace_->load_kernels();
+  path_trace_->alloc_work_memory();
 
-  bool kernel_switch_needed = false;
-  if (scene->update(progress, kernel_switch_needed)) {
-    if (kernel_switch_needed) {
-      reset(tile_manager.params, params.samples);
-    }
+  if (scene->update(progress)) {
     return true;
   }
+
   return false;
 }
 
+static string status_append(const string &status, const string &suffix)
+{
+  string prefix = status;
+  if (!prefix.empty()) {
+    prefix += ", ";
+  }
+  return prefix + suffix;
+}
+
 void Session::update_status_time(bool show_pause, bool show_done)
 {
-  int progressive_sample = tile_manager.state.sample;
-  int num_samples = tile_manager.get_num_effective_samples();
+  string status, substatus;
 
-  int tile = progress.get_rendered_tiles();
-  int num_tiles = tile_manager.state.num_tiles;
+  const int current_tile = progress.get_rendered_tiles();
+  const int num_tiles = tile_manager_.get_num_tiles();
 
-  /* update status */
-  string status, substatus;
+  const int current_sample = progress.get_current_sample();
+  const int num_samples = render_scheduler_.get_num_samples();
 
-  if (!params.progressive) {
-    const bool is_cpu = params.device.type == DEVICE_CPU;
-    const bool rendering_finished = (tile == num_tiles);
-    const bool is_last_tile = (tile + 1) == num_tiles;
-
-    substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles);
-
-    if (!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) {
-      /* Some devices automatically support showing the sample number:
-       * - CUDADevice
-       * - OpenCLDevice when using the megakernel (the split kernel renders multiple
-       *   samples at the same time, so the current sample isn't really defined)
-       * - CPUDevice when using one thread
-       * For these devices, the current sample is always shown.
-       *
-       * The other option is when the last tile is currently being rendered by the CPU.
-       */
-      substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
-    }
-    if (params.denoising.use && params.denoising.type != DENOISER_OPENIMAGEDENOISE) {
-      substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
-    }
-    else if (params.denoising.store_passes && params.denoising.type == DENOISER_NLM) {
-      substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles());
-    }
+  /* TIle. */
+  if (tile_manager_.has_multiple_tiles()) {
+    substatus = status_append(substatus,
+                              string_printf("Rendered %d/%d Tiles", current_tile, num_tiles));
   }
-  else if (tile_manager.num_samples == Integrator::MAX_SAMPLES)
-    substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1);
-  else
-    substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples);
+
+  /* Sample. */
+  if (num_samples == Integrator::MAX_SAMPLES) {
+    substatus = status_append(substatus, string_printf("Sample %d", current_sample));
+  }
+  else {
+    substatus = status_append(substatus,
+                              string_printf("Sample %d/%d", current_sample, num_samples));
+  }
+
+  /* TODO(sergey): Denoising status from the path trace. */
 
   if (show_pause) {
     status = "Rendering Paused";
@@ -1080,210 +598,122 @@ void Session::update_status_time(bool show_pause, bool show_done)
   progress.set_status(status, substatus);
 }
 
-bool Session::render_need_denoise(bool &delayed)
+void Session::device_free()
 {
-  delayed = false;
-
-  /* Not supported yet for baking. */
-  if (read_bake_tile_cb) {
-    return false;
-  }
-
-  /* Denoising enabled? */
-  if (!params.denoising.need_denoising_task()) {
-    return false;
-  }
-
-  if (params.background) {
-    /* Background render, only denoise when rendering the last sample. */
-    return tile_manager.done();
-  }
-
-  /* Viewport render. */
-
-  /* It can happen that denoising was already enabled, but the scene still needs an update. */
-  if (scene->film->is_modified() || !scene->film->get_denoising_data_offset()) {
-    return false;
-  }
+  scene->device_free();
+  path_trace_->device_free();
+}
 
-  /* Immediately denoise when we reach the start sample or last sample. */
-  const int num_samples_finished = tile_manager.state.sample + 1;
-  if (num_samples_finished == params.denoising.start_sample ||
-      num_samples_finished == params.samples) {
-    return true;
+void Session::collect_statistics(RenderStats *render_stats)
+{
+  scene->collect_statistics(render_stats);
+  if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
+    render_stats->collect_profiling(scene, profiler);
   }
+}
 
-  /* Do not denoise until the sample at which denoising should start is reached. */
-  if (num_samples_finished < params.denoising.start_sample) {
-    return false;
-  }
+/* --------------------------------------------------------------------
+ * Tile and tile pixels access.
+ */
 
-  /* Avoid excessive denoising in viewport after reaching a certain amount of samples. */
-  delayed = (tile_manager.state.sample >= 20 &&
-             (time_dt() - last_display_time_) < params.progressive_update_timeout);
-  return !delayed;
+bool Session::has_multiple_render_tiles() const
+{
+  return tile_manager_.has_multiple_tiles();
 }
 
-void Session::render(bool need_denoise)
+int2 Session::get_render_tile_size() const
 {
-  if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
-    /* Clear buffers. */
-    buffers->zero();
-  }
-
-  if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) {
-    return; /* Avoid empty launches. */
-  }
+  return path_trace_->get_render_tile_size();
+}
 
-  /* Add path trace task. */
-  DeviceTask task(DeviceTask::RENDER);
-
-  task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3);
-  task.release_tile = function_bind(&Session::release_tile, this, _1, need_denoise);
-  task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
-  task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
-  task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
-  task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
-  task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
-  task.get_tile_stolen = function_bind(&Session::get_tile_stolen, this);
-  task.need_finish_queue = params.progressive_refine;
-  task.integrator_branched = scene->integrator->get_method() == Integrator::BRANCHED_PATH;
-
-  task.adaptive_sampling.use = (scene->integrator->get_sampling_pattern() ==
-                                SAMPLING_PATTERN_PMJ) &&
-                               scene->dscene.data.film.pass_adaptive_aux_buffer;
-  task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples;
-  task.adaptive_sampling.adaptive_step = scene->dscene.data.integrator.adaptive_step;
-
-  /* Acquire render tiles by default. */
-  task.tile_types = RenderTile::PATH_TRACE;
-
-  if (need_denoise) {
-    task.denoising = params.denoising;
-
-    task.pass_stride = scene->film->get_pass_stride();
-    task.target_pass_stride = task.pass_stride;
-    task.pass_denoising_data = scene->film->get_denoising_data_offset();
-    task.pass_denoising_clean = scene->film->get_denoising_clean_offset();
-
-    task.denoising_from_render = true;
-
-    if (tile_manager.schedule_denoising) {
-      /* Acquire denoising tiles during rendering. */
-      task.tile_types |= RenderTile::DENOISE;
-    }
-    else {
-      assert(buffers);
-
-      /* Schedule rendering and wait for it to finish. */
-      device->task_add(task);
-      device->task_wait();
-
-      /* Then run denoising on the whole image at once. */
-      task.type = DeviceTask::DENOISE_BUFFER;
-      task.x = tile_manager.state.buffer.full_x;
-      task.y = tile_manager.state.buffer.full_y;
-      task.w = tile_manager.state.buffer.width;
-      task.h = tile_manager.state.buffer.height;
-      task.buffer = buffers->buffer.device_pointer;
-      task.sample = tile_manager.state.sample;
-      task.num_samples = tile_manager.state.num_samples;
-      tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
-      task.buffers = buffers;
-    }
-  }
+int2 Session::get_render_tile_offset() const
+{
+  return path_trace_->get_render_tile_offset();
+}
 
-  device->task_add(task);
+string_view Session::get_render_tile_layer() const
+{
+  const BufferParams &buffer_params = path_trace_->get_render_tile_params();
+  return buffer_params.layer;
 }
 
-void Session::copy_to_display_buffer(int sample)
+string_view Session::get_render_tile_view() const
 {
-  /* add film conversion task */
-  DeviceTask task(DeviceTask::FILM_CONVERT);
-
-  task.x = tile_manager.state.buffer.full_x;
-  task.y = tile_manager.state.buffer.full_y;
-  task.w = tile_manager.state.buffer.width;
-  task.h = tile_manager.state.buffer.height;
-  task.rgba_byte = display->rgba_byte.device_pointer;
-  task.rgba_half = display->rgba_half.device_pointer;
-  task.buffer = buffers->buffer.device_pointer;
-  task.sample = sample;
-  tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
-
-  if (task.w > 0 && task.h > 0) {
-    device->task_add(task);
-    device->task_wait();
-
-    /* set display to new size */
-    display->draw_set(task.w, task.h);
-
-    last_display_time_ = time_dt();
-  }
+  const BufferParams &buffer_params = path_trace_->get_render_tile_params();
+  return buffer_params.view;
+}
 
-  display_outdated_ = false;
+bool Session::copy_render_tile_from_device()
+{
+  return path_trace_->copy_render_tile_from_device();
 }
 
-bool Session::update_progressive_refine(bool cancel)
+bool Session::get_render_tile_pixels(const string &pass_name, int num_components, float *pixels)
 {
-  int sample = tile_manager.state.sample + 1;
-  bool write = sample == tile_manager.num_samples || cancel;
+  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
+   * is happening while this function runs. */
 
-  double current_time = time_dt();
+  const BufferParams &buffer_params = path_trace_->get_render_tile_params();
 
-  if (current_time - last_update_time_ < params.progressive_update_timeout) {
-    /* If last sample was processed, we need to write buffers anyway. */
-    if (!write && sample != 1)
-      return false;
+  const BufferPass *pass = buffer_params.find_pass(pass_name);
+  if (pass == nullptr) {
+    return false;
   }
 
-  if (params.progressive_refine) {
-    foreach (Tile &tile, tile_manager.state.tiles) {
-      if (!tile.buffers) {
-        continue;
-      }
-
-      RenderTile rtile;
-      rtile.x = tile_manager.state.buffer.full_x + tile.x;
-      rtile.y = tile_manager.state.buffer.full_y + tile.y;
-      rtile.w = tile.w;
-      rtile.h = tile.h;
-      rtile.sample = sample;
-      rtile.buffers = tile.buffers;
-
-      if (write) {
-        if (write_render_tile_cb)
-          write_render_tile_cb(rtile);
-      }
-      else {
-        if (update_render_tile_cb)
-          update_render_tile_cb(rtile, true);
-      }
+  const bool has_denoised_result = path_trace_->has_denoised_result();
+  if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
+    pass = buffer_params.find_pass(pass->type);
+    if (pass == nullptr) {
+      /* Happens when denoised result pass is requested but is never written by the kernel. */
+      return false;
     }
   }
 
-  last_update_time_ = current_time;
+  pass = buffer_params.get_actual_display_pass(pass);
+
+  const float exposure = buffer_params.exposure;
+  const int num_samples = path_trace_->get_num_render_tile_samples();
 
-  return write;
+  PassAccessor::PassAccessInfo pass_access_info(*pass);
+  pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
+  pass_access_info.use_approximate_shadow_catcher_background =
+      pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
+
+  const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
+  const PassAccessor::Destination destination(pixels, num_components);
+
+  return path_trace_->get_render_tile_pixels(pass_accessor, destination);
 }
 
-void Session::device_free()
+bool Session::set_render_tile_pixels(const string &pass_name,
+                                     int num_components,
+                                     const float *pixels)
 {
-  scene->device_free();
+  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
+   * is happening while this function runs. */
+
+  const BufferPass *pass = buffer_params_.find_pass(pass_name);
+  if (!pass) {
+    return false;
+  }
+
+  const float exposure = scene->film->get_exposure();
+  const int num_samples = render_scheduler_.get_num_rendered_samples();
 
-  tile_manager.device_free();
+  const PassAccessor::PassAccessInfo pass_access_info(*pass);
+  PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
+  PassAccessor::Source source(pixels, num_components);
 
-  /* used from background render only, so no need to
-   * re-create render/display buffers here
-   */
+  return path_trace_->set_render_tile_pixels(pass_accessor, source);
 }
 
-void Session::collect_statistics(RenderStats *render_stats)
+/* --------------------------------------------------------------------
+ * Full-frame on-disk storage.
+ */
+
+void Session::process_full_buffer_from_disk(string_view filename)
 {
-  scene->collect_statistics(render_stats);
-  if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
-    render_stats->collect_profiling(scene, profiler);
-  }
+  path_trace_->process_full_buffer_from_disk(filename);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 05025c10f9c..5623604bfe8 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -18,6 +18,7 @@
 #define __SESSION_H__
 
 #include "device/device.h"
+#include "integrator/render_scheduler.h"
 #include "render/buffers.h"
 #include "render/shader.h"
 #include "render/stats.h"
@@ -26,6 +27,7 @@
 #include "util/util_progress.h"
 #include "util/util_stats.h"
 #include "util/util_thread.h"
+#include "util/util_unique_ptr.h"
 #include "util/util_vector.h"
 
 CCL_NAMESPACE_BEGIN
@@ -33,41 +35,35 @@ CCL_NAMESPACE_BEGIN
 class BufferParams;
 class Device;
 class DeviceScene;
-class DeviceRequestedFeatures;
-class DisplayBuffer;
+class PathTrace;
 class Progress;
+class GPUDisplay;
 class RenderBuffers;
 class Scene;
+class SceneParams;
 
 /* Session Parameters */
 
 class SessionParams {
  public:
   DeviceInfo device;
+
+  bool headless;
   bool background;
-  bool progressive_refine;
 
-  bool progressive;
   bool experimental;
   int samples;
-  int2 tile_size;
-  TileOrder tile_order;
-  int start_resolution;
-  int denoising_start_sample;
   int pixel_size;
   int threads;
-  bool adaptive_sampling;
-
-  bool use_profiling;
 
-  bool display_buffer_linear;
+  /* Limit in seconds for how long path tracing is allowed to happen.
+   * Zero means no limit is applied. */
+  double time_limit;
 
-  DenoiseParams denoising;
+  bool use_profiling;
 
-  double cancel_timeout;
-  double reset_timeout;
-  double text_timeout;
-  double progressive_update_timeout;
+  bool use_auto_tile;
+  int tile_size;
 
   ShadingSystem shadingsystem;
 
@@ -75,50 +71,32 @@ class SessionParams {
 
   SessionParams()
   {
+    headless = false;
     background = false;
-    progressive_refine = false;
 
-    progressive = false;
     experimental = false;
     samples = 1024;
-    tile_size = make_int2(64, 64);
-    start_resolution = INT_MAX;
-    denoising_start_sample = 0;
     pixel_size = 1;
     threads = 0;
-    adaptive_sampling = false;
+    time_limit = 0.0;
 
     use_profiling = false;
 
-    display_buffer_linear = false;
-
-    cancel_timeout = 0.1;
-    reset_timeout = 0.1;
-    text_timeout = 1.0;
-    progressive_update_timeout = 1.0;
+    use_auto_tile = true;
+    tile_size = 2048;
 
     shadingsystem = SHADINGSYSTEM_SVM;
-    tile_order = TILE_CENTER;
   }
 
-  bool modified(const SessionParams &params)
+  bool modified(const SessionParams &params) const
   {
     /* Modified means we have to recreate the session, any parameter changes
      * that can be handled by an existing Session are omitted. */
-    return !(device == params.device && background == params.background &&
-             progressive_refine == params.progressive_refine &&
-             progressive == params.progressive && experimental == params.experimental &&
-             tile_size == params.tile_size && start_resolution == params.start_resolution &&
+    return !(device == params.device && headless == params.headless &&
+             background == params.background && experimental == params.experimental &&
              pixel_size == params.pixel_size && threads == params.threads &&
-             adaptive_sampling == params.adaptive_sampling &&
-             use_profiling == params.use_profiling &&
-             display_buffer_linear == params.display_buffer_linear &&
-             cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
-             text_timeout == params.text_timeout &&
-             progressive_update_timeout == params.progressive_update_timeout &&
-             tile_order == params.tile_order && shadingsystem == params.shadingsystem &&
-             denoising.type == params.denoising.type &&
-             (denoising.use == params.denoising.use || (device.denoisers & denoising.type)));
+             use_profiling == params.use_profiling && shadingsystem == params.shadingsystem &&
+             use_auto_tile == params.use_auto_tile && tile_size == params.tile_size);
   }
 };
 
@@ -131,34 +109,41 @@ class Session {
  public:
   Device *device;
   Scene *scene;
-  RenderBuffers *buffers;
-  DisplayBuffer *display;
   Progress progress;
   SessionParams params;
-  TileManager tile_manager;
   Stats stats;
   Profiler profiler;
 
-  function<void(RenderTile &)> write_render_tile_cb;
-  function<void(RenderTile &, bool)> update_render_tile_cb;
-  function<void(RenderTile &)> read_bake_tile_cb;
+  function<void(void)> write_render_tile_cb;
+  function<void(void)> update_render_tile_cb;
+  function<void(void)> read_render_tile_cb;
+
+  /* Callback is invoked by tile manager whenever on-dist tiles storage file is closed after
+   * writing. Allows an engine integration to keep track of those files without worry about
+   * transferring the information when it needs to re-create session during rendering. */
+  function<void(string_view)> full_buffer_written_cb;
 
-  explicit Session(const SessionParams &params);
+  explicit Session(const SessionParams &params, const SceneParams &scene_params);
   ~Session();
 
   void start();
-  void cancel();
-  bool draw(BufferParams &params, DeviceDrawParams &draw_params);
+
+  /* When quick cancel is requested path tracing is cancels as soon as possible, without waiting
+   * for the buffer to be uniformly sampled. */
+  void cancel(bool quick = false);
+
+  void draw();
   void wait();
 
   bool ready_to_reset();
-  void reset(BufferParams &params, int samples);
+  void reset(const SessionParams &session_params, const BufferParams &buffer_params);
+
   void set_pause(bool pause);
+
   void set_samples(int samples);
-  void set_denoising(const DenoiseParams &denoising);
-  void set_denoising_start_sample(int sample);
+  void set_time_limit(double time_limit);
 
-  bool update_scene();
+  void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
 
   void device_free();
 
@@ -168,83 +153,95 @@ class Session {
 
   void collect_statistics(RenderStats *stats);
 
- protected:
-  struct DelayedReset {
-    thread_mutex mutex;
-    bool do_reset;
-    BufferParams params;
-    int samples;
-  } delayed_reset_;
+  /* --------------------------------------------------------------------
+   * Tile and tile pixels access.
+   */
 
-  void run();
+  bool has_multiple_render_tiles() const;
 
-  bool run_update_for_next_iteration();
-  bool run_wait_for_work(bool no_tiles);
+  /* Get size and offset (relative to the buffer's full x/y) of the currently rendering tile. */
+  int2 get_render_tile_size() const;
+  int2 get_render_tile_offset() const;
 
-  void update_status_time(bool show_pause = false, bool show_done = false);
+  string_view get_render_tile_layer() const;
+  string_view get_render_tile_view() const;
 
-  void render(bool use_denoise);
-  void copy_to_display_buffer(int sample);
+  bool copy_render_tile_from_device();
 
-  void reset_(BufferParams &params, int samples);
+  bool get_render_tile_pixels(const string &pass_name, int num_components, float *pixels);
+  bool set_render_tile_pixels(const string &pass_name, int num_components, const float *pixels);
 
-  void run_cpu();
-  bool draw_cpu(BufferParams &params, DeviceDrawParams &draw_params);
-  void reset_cpu(BufferParams &params, int samples);
+  /* --------------------------------------------------------------------
+   * Full-frame on-disk storage.
+   */
 
-  void run_gpu();
-  bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
-  void reset_gpu(BufferParams &params, int samples);
+  /* Read given full-frame file from disk, perform needed processing and write it to the software
+   * via the write callback. */
+  void process_full_buffer_from_disk(string_view filename);
 
-  bool render_need_denoise(bool &delayed);
+ protected:
+  struct DelayedReset {
+    thread_mutex mutex;
+    bool do_reset;
+    SessionParams session_params;
+    BufferParams buffer_params;
+  } delayed_reset_;
 
-  bool steal_tile(RenderTile &tile, Device *tile_device, thread_scoped_lock &tile_lock);
-  bool get_tile_stolen();
-  bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types);
-  void update_tile_sample(RenderTile &tile);
-  void release_tile(RenderTile &tile, const bool need_denoise);
+  void run();
 
-  void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
-  void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
+  /* Update for the new iteration of the main loop in run implementation (run_cpu and run_gpu).
+   *
+   * Will take care of the following things:
+   *  - Delayed reset
+   *  - Scene update
+   *  - Tile manager advance
+   *  - Render scheduler work request
+   *
+   * The updates are done in a proper order with proper locking around them, which guarantees
+   * that the device side of scene and render buffers are always in a consistent state.
+   *
+   * Returns render work which is to be rendered next. */
+  RenderWork run_update_for_next_iteration();
+
+  /* Wait for rendering to be unpaused, or for new tiles for render to arrive.
+   * Returns true if new main render loop iteration is required after this function call.
+   *
+   * The `render_work` is the work which was scheduled by the render scheduler right before
+   * checking the pause. */
+  bool run_wait_for_work(const RenderWork &render_work);
+
+  void run_main_render_loop();
+
+  bool update_scene(int width, int height);
 
-  bool device_use_gl_;
+  void update_status_time(bool show_pause = false, bool show_done = false);
 
-  thread *session_thread_;
+  void do_delayed_reset();
 
-  volatile bool display_outdated_;
+  int2 get_effective_tile_size() const;
 
-  volatile bool gpu_draw_ready_;
-  volatile bool gpu_need_display_buffer_update_;
-  thread_condition_variable gpu_need_display_buffer_update_cond_;
+  thread *session_thread_;
 
-  bool pause_;
-  bool cancel_;
-  bool new_work_added_;
+  bool pause_ = false;
+  bool cancel_ = false;
+  bool new_work_added_ = false;
 
   thread_condition_variable pause_cond_;
   thread_mutex pause_mutex_;
   thread_mutex tile_mutex_;
   thread_mutex buffers_mutex_;
-  thread_mutex display_mutex_;
-  thread_condition_variable denoising_cond_;
-  thread_condition_variable tile_steal_cond_;
-
-  double reset_time_;
-  double last_update_time_;
-  double last_display_time_;
-
-  RenderTile stolen_tile_;
-  typedef enum {
-    NOT_STEALING,     /* There currently is no tile stealing in progress. */
-    WAITING_FOR_TILE, /* A device is waiting for another device to release a tile. */
-    RELEASING_TILE,   /* A device has releasing a stealable tile. */
-    GOT_TILE /* A device has released a stealable tile, which is now stored in stolen_tile. */
-  } TileStealingState;
-  std::atomic<TileStealingState> tile_stealing_state_;
-  int stealable_tiles_;
-
-  /* progressive refine */
-  bool update_progressive_refine(bool cancel);
+
+  TileManager tile_manager_;
+  BufferParams buffer_params_;
+
+  /* Render scheduler is used to get work to be rendered with the current big tile. */
+  RenderScheduler render_scheduler_;
+
+  /* Path tracer object.
+   *
+   * Is a single full-frame path tracer for interactive viewport rendering.
+   * A path tracer for the current big-tile for an offline rendering. */
+  unique_ptr<PathTrace> path_trace_;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 59b60904746..f6b23606e58 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -203,6 +203,7 @@ Shader::Shader() : Node(get_node_type())
   has_surface = false;
   has_surface_transparent = false;
   has_surface_emission = false;
+  has_surface_raytrace = false;
   has_surface_bssrdf = false;
   has_volume = false;
   has_displacement = false;
@@ -485,7 +486,7 @@ void ShaderManager::device_update(Device *device,
   device_update_specific(device, dscene, scene, progress);
 }
 
-void ShaderManager::device_update_common(Device *device,
+void ShaderManager::device_update_common(Device * /*device*/,
                                          DeviceScene *dscene,
                                          Scene *scene,
                                          Progress & /*progress*/)
@@ -508,6 +509,8 @@ void ShaderManager::device_update_common(Device *device,
       flag |= SD_HAS_EMISSION;
     if (shader->has_surface_transparent && shader->get_use_transparent_shadow())
       flag |= SD_HAS_TRANSPARENT_SHADOW;
+    if (shader->has_surface_raytrace)
+      flag |= SD_HAS_RAYTRACE;
     if (shader->has_volume) {
       flag |= SD_HAS_VOLUME;
       has_volumes = true;
@@ -528,12 +531,10 @@ void ShaderManager::device_update_common(Device *device,
       flag |= SD_NEED_VOLUME_ATTRIBUTES;
     if (shader->has_bssrdf_bump)
       flag |= SD_HAS_BSSRDF_BUMP;
-    if (device->info.has_volume_decoupled) {
-      if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_EQUIANGULAR)
-        flag |= SD_VOLUME_EQUIANGULAR;
-      if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
-        flag |= SD_VOLUME_MIS;
-    }
+    if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_EQUIANGULAR)
+      flag |= SD_VOLUME_EQUIANGULAR;
+    if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
+      flag |= SD_VOLUME_MIS;
     if (shader->get_volume_interpolation_method() == VOLUME_INTERPOLATION_CUBIC)
       flag |= SD_VOLUME_CUBIC;
     if (shader->has_bump)
@@ -682,39 +683,35 @@ void ShaderManager::add_default(Scene *scene)
   }
 }
 
-void ShaderManager::get_requested_graph_features(ShaderGraph *graph,
-                                                 DeviceRequestedFeatures *requested_features)
+uint ShaderManager::get_graph_kernel_features(ShaderGraph *graph)
 {
+  uint kernel_features = 0;
+
   foreach (ShaderNode *node, graph->nodes) {
-    requested_features->max_nodes_group = max(requested_features->max_nodes_group,
-                                              node->get_group());
-    requested_features->nodes_features |= node->get_feature();
+    kernel_features |= node->get_feature();
     if (node->special_type == SHADER_SPECIAL_TYPE_CLOSURE) {
       BsdfBaseNode *bsdf_node = static_cast<BsdfBaseNode *>(node);
       if (CLOSURE_IS_VOLUME(bsdf_node->get_closure_type())) {
-        requested_features->nodes_features |= NODE_FEATURE_VOLUME;
+        kernel_features |= KERNEL_FEATURE_NODE_VOLUME;
       }
       else if (CLOSURE_IS_PRINCIPLED(bsdf_node->get_closure_type())) {
-        requested_features->use_principled = true;
+        kernel_features |= KERNEL_FEATURE_PRINCIPLED;
       }
     }
     if (node->has_surface_bssrdf()) {
-      requested_features->use_subsurface = true;
+      kernel_features |= KERNEL_FEATURE_SUBSURFACE;
     }
     if (node->has_surface_transparent()) {
-      requested_features->use_transparent = true;
-    }
-    if (node->has_raytrace()) {
-      requested_features->use_shader_raytrace = true;
+      kernel_features |= KERNEL_FEATURE_TRANSPARENT;
     }
   }
+
+  return kernel_features;
 }
 
-void ShaderManager::get_requested_features(Scene *scene,
-                                           DeviceRequestedFeatures *requested_features)
+uint ShaderManager::get_kernel_features(Scene *scene)
 {
-  requested_features->max_nodes_group = NODE_GROUP_LEVEL_0;
-  requested_features->nodes_features = 0;
+  uint kernel_features = KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION;
   for (int i = 0; i < scene->shaders.size(); i++) {
     Shader *shader = scene->shaders[i];
     if (!shader->reference_count()) {
@@ -722,21 +719,22 @@ void ShaderManager::get_requested_features(Scene *scene,
     }
 
     /* Gather requested features from all the nodes from the graph nodes. */
-    get_requested_graph_features(shader->graph, requested_features);
+    kernel_features |= get_graph_kernel_features(shader->graph);
     ShaderNode *output_node = shader->graph->output();
     if (output_node->input("Displacement")->link != NULL) {
-      requested_features->nodes_features |= NODE_FEATURE_BUMP;
+      kernel_features |= KERNEL_FEATURE_NODE_BUMP;
       if (shader->get_displacement_method() == DISPLACE_BOTH) {
-        requested_features->nodes_features |= NODE_FEATURE_BUMP_STATE;
-        requested_features->max_nodes_group = max(requested_features->max_nodes_group,
-                                                  NODE_GROUP_LEVEL_1);
+        kernel_features |= KERNEL_FEATURE_NODE_BUMP_STATE;
       }
     }
     /* On top of volume nodes, also check if we need volume sampling because
-     * e.g. an Emission node would slip through the NODE_FEATURE_VOLUME check */
-    if (shader->has_volume)
-      requested_features->use_volume |= true;
+     * e.g. an Emission node would slip through the KERNEL_FEATURE_NODE_VOLUME check */
+    if (shader->has_volume) {
+      kernel_features |= KERNEL_FEATURE_VOLUME;
+    }
   }
+
+  return kernel_features;
 }
 
 void ShaderManager::free_memory()
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index c65cac351a4..5f9adea3949 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -38,7 +38,6 @@ CCL_NAMESPACE_BEGIN
 
 class Device;
 class DeviceScene;
-class DeviceRequestedFeatures;
 class Mesh;
 class Progress;
 class Scene;
@@ -117,6 +116,7 @@ class Shader : public Node {
   bool has_surface;
   bool has_surface_emission;
   bool has_surface_transparent;
+  bool has_surface_raytrace;
   bool has_volume;
   bool has_displacement;
   bool has_surface_bssrdf;
@@ -216,7 +216,7 @@ class ShaderManager {
   static void add_default(Scene *scene);
 
   /* Selective nodes compilation. */
-  void get_requested_features(Scene *scene, DeviceRequestedFeatures *requested_features);
+  uint get_kernel_features(Scene *scene);
 
   static void free_memory();
 
@@ -244,8 +244,7 @@ class ShaderManager {
 
   size_t beckmann_table_offset;
 
-  void get_requested_graph_features(ShaderGraph *graph,
-                                    DeviceRequestedFeatures *requested_features);
+  uint get_graph_kernel_features(ShaderGraph *graph);
 
   thread_spin_lock attribute_lock_;
 
diff --git a/intern/cycles/render/stats.cpp b/intern/cycles/render/stats.cpp
index 2c6273842e2..73eb7e21ff9 100644
--- a/intern/cycles/render/stats.cpp
+++ b/intern/cycles/render/stats.cpp
@@ -264,53 +264,34 @@ void RenderStats::collect_profiling(Scene *scene, Profiler &prof)
   has_profiling = true;
 
   kernel = NamedNestedSampleStats("Total render time", prof.get_event(PROFILING_UNKNOWN));
-
   kernel.add_entry("Ray setup", prof.get_event(PROFILING_RAY_SETUP));
-  kernel.add_entry("Result writing", prof.get_event(PROFILING_WRITE_RESULT));
-
-  NamedNestedSampleStats &integrator = kernel.add_entry("Path integration",
-                                                        prof.get_event(PROFILING_PATH_INTEGRATE));
-  integrator.add_entry("Scene intersection", prof.get_event(PROFILING_SCENE_INTERSECT));
-  integrator.add_entry("Indirect emission", prof.get_event(PROFILING_INDIRECT_EMISSION));
-  integrator.add_entry("Volumes", prof.get_event(PROFILING_VOLUME));
-
-  NamedNestedSampleStats &shading = integrator.add_entry("Shading", 0);
-  shading.add_entry("Shader Setup", prof.get_event(PROFILING_SHADER_SETUP));
-  shading.add_entry("Shader Eval", prof.get_event(PROFILING_SHADER_EVAL));
-  shading.add_entry("Shader Apply", prof.get_event(PROFILING_SHADER_APPLY));
-  shading.add_entry("Ambient Occlusion", prof.get_event(PROFILING_AO));
-  shading.add_entry("Subsurface", prof.get_event(PROFILING_SUBSURFACE));
-
-  integrator.add_entry("Connect Light", prof.get_event(PROFILING_CONNECT_LIGHT));
-  integrator.add_entry("Surface Bounce", prof.get_event(PROFILING_SURFACE_BOUNCE));
-
-  NamedNestedSampleStats &intersection = kernel.add_entry("Intersection", 0);
-  intersection.add_entry("Full Intersection", prof.get_event(PROFILING_INTERSECT));
-  intersection.add_entry("Local Intersection", prof.get_event(PROFILING_INTERSECT_LOCAL));
-  intersection.add_entry("Shadow All Intersection",
-                         prof.get_event(PROFILING_INTERSECT_SHADOW_ALL));
-  intersection.add_entry("Volume Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME));
-  intersection.add_entry("Volume All Intersection",
-                         prof.get_event(PROFILING_INTERSECT_VOLUME_ALL));
-
-  NamedNestedSampleStats &closure = kernel.add_entry("Closures", 0);
-  closure.add_entry("Surface Closure Evaluation", prof.get_event(PROFILING_CLOSURE_EVAL));
-  closure.add_entry("Surface Closure Sampling", prof.get_event(PROFILING_CLOSURE_SAMPLE));
-  closure.add_entry("Volume Closure Evaluation", prof.get_event(PROFILING_CLOSURE_VOLUME_EVAL));
-  closure.add_entry("Volume Closure Sampling", prof.get_event(PROFILING_CLOSURE_VOLUME_SAMPLE));
-
-  NamedNestedSampleStats &denoising = kernel.add_entry("Denoising",
-                                                       prof.get_event(PROFILING_DENOISING));
-  denoising.add_entry("Construct Transform",
-                      prof.get_event(PROFILING_DENOISING_CONSTRUCT_TRANSFORM));
-  denoising.add_entry("Reconstruct", prof.get_event(PROFILING_DENOISING_RECONSTRUCT));
-
-  NamedNestedSampleStats &prefilter = denoising.add_entry("Prefiltering", 0);
-  prefilter.add_entry("Divide Shadow", prof.get_event(PROFILING_DENOISING_DIVIDE_SHADOW));
-  prefilter.add_entry("Non-Local means", prof.get_event(PROFILING_DENOISING_NON_LOCAL_MEANS));
-  prefilter.add_entry("Get Feature", prof.get_event(PROFILING_DENOISING_GET_FEATURE));
-  prefilter.add_entry("Detect Outliers", prof.get_event(PROFILING_DENOISING_DETECT_OUTLIERS));
-  prefilter.add_entry("Combine Halves", prof.get_event(PROFILING_DENOISING_COMBINE_HALVES));
+  kernel.add_entry("Intersect Closest", prof.get_event(PROFILING_INTERSECT_CLOSEST));
+  kernel.add_entry("Intersect Shadow", prof.get_event(PROFILING_INTERSECT_SHADOW));
+  kernel.add_entry("Intersect Subsurface", prof.get_event(PROFILING_INTERSECT_SUBSURFACE));
+  kernel.add_entry("Intersect Volume Stack", prof.get_event(PROFILING_INTERSECT_VOLUME_STACK));
+
+  NamedNestedSampleStats &surface = kernel.add_entry("Shade Surface", 0);
+  surface.add_entry("Setup", prof.get_event(PROFILING_SHADE_SURFACE_SETUP));
+  surface.add_entry("Shader Evaluation", prof.get_event(PROFILING_SHADE_SURFACE_EVAL));
+  surface.add_entry("Render Passes", prof.get_event(PROFILING_SHADE_SURFACE_PASSES));
+  surface.add_entry("Direct Light", prof.get_event(PROFILING_SHADE_SURFACE_DIRECT_LIGHT));
+  surface.add_entry("Indirect Light", prof.get_event(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT));
+  surface.add_entry("Ambient Occlusion", prof.get_event(PROFILING_SHADE_SURFACE_AO));
+
+  NamedNestedSampleStats &volume = kernel.add_entry("Shade Volume", 0);
+  volume.add_entry("Setup", prof.get_event(PROFILING_SHADE_VOLUME_SETUP));
+  volume.add_entry("Integrate", prof.get_event(PROFILING_SHADE_VOLUME_INTEGRATE));
+  volume.add_entry("Direct Light", prof.get_event(PROFILING_SHADE_VOLUME_DIRECT_LIGHT));
+  volume.add_entry("Indirect Light", prof.get_event(PROFILING_SHADE_VOLUME_INDIRECT_LIGHT));
+
+  NamedNestedSampleStats &shadow = kernel.add_entry("Shade Shadow", 0);
+  shadow.add_entry("Setup", prof.get_event(PROFILING_SHADE_SHADOW_SETUP));
+  shadow.add_entry("Surface", prof.get_event(PROFILING_SHADE_SHADOW_SURFACE));
+  shadow.add_entry("Volume", prof.get_event(PROFILING_SHADE_SHADOW_VOLUME));
+
+  NamedNestedSampleStats &light = kernel.add_entry("Shade Light", 0);
+  light.add_entry("Setup", prof.get_event(PROFILING_SHADE_LIGHT_SETUP));
+  light.add_entry("Shader Evaluation", prof.get_event(PROFILING_SHADE_LIGHT_EVAL));
 
   shaders.entries.clear();
   foreach (Shader *shader, scene->shaders) {
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index dcb3976e15c..2379eb775a0 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -446,6 +446,8 @@ void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet &done)
   if (current_type == SHADER_TYPE_SURFACE) {
     if (node->has_spatial_varying())
       current_shader->has_surface_spatial_varying = true;
+    if (node->get_feature() & KERNEL_FEATURE_NODE_RAYTRACE)
+      current_shader->has_surface_raytrace = true;
   }
   else if (current_type == SHADER_TYPE_VOLUME) {
     if (node->has_spatial_varying())
@@ -492,6 +494,13 @@ void SVMCompiler::generate_svm_nodes(const ShaderNodeSet &nodes, CompilerState *
 
 void SVMCompiler::generate_closure_node(ShaderNode *node, CompilerState *state)
 {
+  /* Skip generating closure that are not supported or needed for a particular
+   * type of shader. For example a BSDF in a volume shader. */
+  const int node_feature = node->get_feature();
+  if ((state->node_feature_mask & node_feature) != node_feature) {
+    return;
+  }
+
   /* execute dependencies for closure */
   foreach (ShaderInput *in, node->inputs) {
     if (in->link != NULL) {
@@ -555,7 +564,7 @@ void SVMCompiler::find_aov_nodes_and_dependencies(ShaderNodeSet &aov_nodes,
   foreach (ShaderNode *node, graph->nodes) {
     if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) {
       OutputAOVNode *aov_node = static_cast<OutputAOVNode *>(node);
-      if (aov_node->slot >= 0) {
+      if (aov_node->offset >= 0) {
         aov_nodes.insert(aov_node);
         foreach (ShaderInput *in, node->inputs) {
           if (in->link != NULL) {
@@ -785,17 +794,21 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
         case SHADER_TYPE_SURFACE: /* generate surface shader */
           generate = true;
           shader->has_surface = true;
+          state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE;
           break;
         case SHADER_TYPE_VOLUME: /* generate volume shader */
           generate = true;
           shader->has_volume = true;
+          state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_VOLUME;
           break;
         case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
           generate = true;
           shader->has_displacement = true;
+          state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_DISPLACEMENT;
           break;
         case SHADER_TYPE_BUMP: /* generate bump shader */
           generate = true;
+          state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_BUMP;
           break;
         default:
           break;
@@ -867,6 +880,7 @@ void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Sum
   shader->has_surface = false;
   shader->has_surface_emission = false;
   shader->has_surface_transparent = false;
+  shader->has_surface_raytrace = false;
   shader->has_surface_bssrdf = false;
   shader->has_bump = has_bump;
   shader->has_bssrdf_bump = has_bump;
@@ -964,6 +978,7 @@ SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
     max_id = max(node->id, max_id);
   }
   nodes_done_flag.resize(max_id + 1, false);
+  node_feature_mask = 0;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h
index d23ff3e2a47..0353c393ae4 100644
--- a/intern/cycles/render/svm.h
+++ b/intern/cycles/render/svm.h
@@ -192,6 +192,9 @@ class SVMCompiler {
      * all areas to use this flags array.
      */
     vector<bool> nodes_done_flag;
+
+    /* Node features that can be compiled. */
+    uint node_feature_mask;
   };
 
   void stack_clear_temporary(ShaderNode *node);
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 375c9fd8e09..28910bffa7b 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -16,601 +16,559 @@
 
 #include "render/tile.h"
 
+#include <atomic>
+
+#include "graph/node.h"
+#include "render/background.h"
+#include "render/film.h"
+#include "render/integrator.h"
+#include "render/scene.h"
 #include "util/util_algorithm.h"
 #include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+#include "util/util_string.h"
+#include "util/util_system.h"
 #include "util/util_types.h"
 
 CCL_NAMESPACE_BEGIN
 
-namespace {
+/* --------------------------------------------------------------------
+ * Internal functions.
+ */
 
-class TileComparator {
- public:
-  TileComparator(TileOrder order_, int2 center_, Tile *tiles_)
-      : order(order_), center(center_), tiles(tiles_)
-  {
-  }
+static const char *ATTR_PASSES_COUNT = "cycles.passes.count";
+static const char *ATTR_PASS_SOCKET_PREFIX_FORMAT = "cycles.passes.%d.";
+static const char *ATTR_BUFFER_SOCKET_PREFIX = "cycles.buffer.";
+static const char *ATTR_DENOISE_SOCKET_PREFIX = "cycles.denoise.";
 
-  bool operator()(int a, int b)
-  {
-    switch (order) {
-      case TILE_CENTER: {
-        float2 dist_a = make_float2(center.x - (tiles[a].x + tiles[a].w / 2),
-                                    center.y - (tiles[a].y + tiles[a].h / 2));
-        float2 dist_b = make_float2(center.x - (tiles[b].x + tiles[b].w / 2),
-                                    center.y - (tiles[b].y + tiles[b].h / 2));
-        return dot(dist_a, dist_a) < dot(dist_b, dist_b);
-      }
-      case TILE_LEFT_TO_RIGHT:
-        return (tiles[a].x == tiles[b].x) ? (tiles[a].y < tiles[b].y) : (tiles[a].x < tiles[b].x);
-      case TILE_RIGHT_TO_LEFT:
-        return (tiles[a].x == tiles[b].x) ? (tiles[a].y < tiles[b].y) : (tiles[a].x > tiles[b].x);
-      case TILE_TOP_TO_BOTTOM:
-        return (tiles[a].y == tiles[b].y) ? (tiles[a].x < tiles[b].x) : (tiles[a].y > tiles[b].y);
-      case TILE_BOTTOM_TO_TOP:
-      default:
-        return (tiles[a].y == tiles[b].y) ? (tiles[a].x < tiles[b].x) : (tiles[a].y < tiles[b].y);
+/* Global counter of ToleManager object instances. */
+static std::atomic<uint64_t> g_instance_index = 0;
+
+/* Construct names of EXR channels which will ensure order of all channels to match exact offsets
+ * in render buffers corresponding to the given passes.
+ *
+ * Returns `std` datatypes so that it can be assigned directly to the OIIO's `ImageSpec`. */
+static std::vector<std::string> exr_channel_names_for_passes(const BufferParams &buffer_params)
+{
+  static const char *component_suffixes[] = {"R", "G", "B", "A"};
+
+  int pass_index = 0;
+  int num_channels = 0;
+  std::vector<std::string> channel_names;
+  for (const BufferPass &pass : buffer_params.passes) {
+    if (pass.offset == PASS_UNUSED) {
+      continue;
     }
-  }
 
- protected:
-  TileOrder order;
-  int2 center;
-  Tile *tiles;
-};
+    const PassInfo pass_info = pass.get_info();
+    num_channels += pass_info.num_components;
 
-inline int2 hilbert_index_to_pos(int n, int d)
-{
-  int2 r, xy = make_int2(0, 0);
-  for (int s = 1; s < n; s *= 2) {
-    r.x = (d >> 1) & 1;
-    r.y = (d ^ r.x) & 1;
-    if (!r.y) {
-      if (r.x) {
-        xy = make_int2(s - 1, s - 1) - xy;
-      }
-      swap(xy.x, xy.y);
+    /* EXR canonically expects first part of channel names to be sorted alphabetically, which is
+     * not guaranteed to be the case with passes names. Assign a prefix based on the pass index
+     * with a fixed width to ensure ordering. This makes it possible to dump existing render
+     * buffers memory to disk and read it back without doing extra mapping. */
+    const string prefix = string_printf("%08d", pass_index);
+
+    const string channel_name_prefix = prefix + string(pass.name) + ".";
+
+    for (int i = 0; i < pass_info.num_components; ++i) {
+      channel_names.push_back(channel_name_prefix + component_suffixes[i]);
     }
-    xy += r * make_int2(s, s);
-    d >>= 2;
+
+    ++pass_index;
   }
-  return xy;
+
+  return channel_names;
 }
 
-enum SpiralDirection {
-  DIRECTION_UP,
-  DIRECTION_LEFT,
-  DIRECTION_DOWN,
-  DIRECTION_RIGHT,
-};
-
-} /* namespace */
-
-TileManager::TileManager(bool progressive_,
-                         int num_samples_,
-                         int2 tile_size_,
-                         int start_resolution_,
-                         bool preserve_tile_device_,
-                         bool background_,
-                         TileOrder tile_order_,
-                         int num_devices_,
-                         int pixel_size_)
+inline string node_socket_attribute_name(const SocketType &socket, const string &attr_name_prefix)
 {
-  progressive = progressive_;
-  tile_size = tile_size_;
-  tile_order = tile_order_;
-  start_resolution = start_resolution_;
-  pixel_size = pixel_size_;
-  slice_overlap = 0;
-  num_samples = num_samples_;
-  num_devices = num_devices_;
-  preserve_tile_device = preserve_tile_device_;
-  background = background_;
-  schedule_denoising = false;
-
-  range_start_sample = 0;
-  range_num_samples = -1;
-
-  BufferParams buffer_params;
-  reset(buffer_params, 0);
+  return attr_name_prefix + string(socket.name);
 }
 
-TileManager::~TileManager()
+template<typename ValidateValueFunc, typename GetValueFunc>
+static bool node_socket_generic_to_image_spec_atttributes(
+    ImageSpec *image_spec,
+    const Node *node,
+    const SocketType &socket,
+    const string &attr_name_prefix,
+    const ValidateValueFunc &validate_value_func,
+    const GetValueFunc &get_value_func)
 {
+  if (!validate_value_func(node, socket)) {
+    return false;
+  }
+
+  image_spec->attribute(node_socket_attribute_name(socket, attr_name_prefix),
+                        get_value_func(node, socket));
+
+  return true;
 }
 
-void TileManager::device_free()
+static bool node_socket_to_image_spec_atttributes(ImageSpec *image_spec,
+                                                  const Node *node,
+                                                  const SocketType &socket,
+                                                  const string &attr_name_prefix)
 {
-  if (schedule_denoising || progressive) {
-    for (int i = 0; i < state.tiles.size(); i++) {
-      delete state.tiles[i].buffers;
-      state.tiles[i].buffers = NULL;
+  const string attr_name = node_socket_attribute_name(socket, attr_name_prefix);
+
+  switch (socket.type) {
+    case SocketType::ENUM: {
+      const ustring value = node->get_string(socket);
+
+      /* Validate that the node is consistent with the node type definition. */
+      const NodeEnum &enum_values = *socket.enum_values;
+      if (!enum_values.exists(value)) {
+        LOG(DFATAL) << "Node enum contains invalid value " << value;
+        return false;
+      }
+
+      image_spec->attribute(attr_name, value);
+
+      return true;
     }
-  }
 
-  state.tiles.clear();
+    case SocketType::STRING:
+      image_spec->attribute(attr_name, node->get_string(socket));
+      return true;
+
+    case SocketType::INT:
+      image_spec->attribute(attr_name, node->get_int(socket));
+      return true;
+
+    case SocketType::FLOAT:
+      image_spec->attribute(attr_name, node->get_float(socket));
+      return true;
+
+    case SocketType::BOOLEAN:
+      image_spec->attribute(attr_name, node->get_bool(socket));
+      return true;
+
+    default:
+      LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen.";
+      return false;
+  }
 }
 
-static int get_divider(int w, int h, int start_resolution)
+static bool node_socket_from_image_spec_atttributes(Node *node,
+                                                    const SocketType &socket,
+                                                    const ImageSpec &image_spec,
+                                                    const string &attr_name_prefix)
 {
-  int divider = 1;
-  if (start_resolution != INT_MAX) {
-    while (w * h > start_resolution * start_resolution) {
-      w = max(1, w / 2);
-      h = max(1, h / 2);
+  const string attr_name = node_socket_attribute_name(socket, attr_name_prefix);
+
+  switch (socket.type) {
+    case SocketType::ENUM: {
+      /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */
+      const ustring value(image_spec.get_string_attribute(attr_name, ""));
+
+      /* Validate that the node is consistent with the node type definition. */
+      const NodeEnum &enum_values = *socket.enum_values;
+      if (!enum_values.exists(value)) {
+        LOG(ERROR) << "Invalid enumerator value " << value;
+        return false;
+      }
 
-      divider <<= 1;
+      node->set(socket, enum_values[value]);
+
+      return true;
     }
+
+    case SocketType::STRING:
+      /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */
+      node->set(socket, ustring(image_spec.get_string_attribute(attr_name, "")));
+      return true;
+
+    case SocketType::INT:
+      node->set(socket, image_spec.get_int_attribute(attr_name, 0));
+      return true;
+
+    case SocketType::FLOAT:
+      node->set(socket, image_spec.get_float_attribute(attr_name, 0));
+      return true;
+
+    case SocketType::BOOLEAN:
+      node->set(socket, static_cast<bool>(image_spec.get_int_attribute(attr_name, 0)));
+      return true;
+
+    default:
+      LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen.";
+      return false;
   }
-  return divider;
 }
 
-void TileManager::reset(BufferParams &params_, int num_samples_)
+static bool node_to_image_spec_atttributes(ImageSpec *image_spec,
+                                           const Node *node,
+                                           const string &attr_name_prefix)
 {
-  params = params_;
-
-  set_samples(num_samples_);
-
-  state.buffer = BufferParams();
-  state.sample = range_start_sample - 1;
-  state.num_tiles = 0;
-  state.num_samples = 0;
-  state.resolution_divider = get_divider(params.width, params.height, start_resolution);
-  state.render_tiles.clear();
-  state.denoising_tiles.clear();
-  device_free();
+  for (const SocketType &socket : node->type->inputs) {
+    if (!node_socket_to_image_spec_atttributes(image_spec, node, socket, attr_name_prefix)) {
+      return false;
+    }
+  }
+
+  return true;
 }
 
-void TileManager::set_samples(int num_samples_)
+static bool node_from_image_spec_atttributes(Node *node,
+                                             const ImageSpec &image_spec,
+                                             const string &attr_name_prefix)
 {
-  num_samples = num_samples_;
+  for (const SocketType &socket : node->type->inputs) {
+    if (!node_socket_from_image_spec_atttributes(node, socket, image_spec, attr_name_prefix)) {
+      return false;
+    }
+  }
+
+  return true;
+}
 
-  /* No real progress indication is possible when using unlimited samples. */
-  if (num_samples == INT_MAX) {
-    state.total_pixel_samples = 0;
+static bool buffer_params_to_image_spec_atttributes(ImageSpec *image_spec,
+                                                    const BufferParams &buffer_params)
+{
+  if (!node_to_image_spec_atttributes(image_spec, &buffer_params, ATTR_BUFFER_SOCKET_PREFIX)) {
+    return false;
   }
-  else {
-    uint64_t pixel_samples = 0;
-    /* While rendering in the viewport, the initial preview resolution is increased to the native
-     * resolution before the actual rendering begins. Therefore, additional pixel samples will be
-     * rendered. */
-    int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size);
-    while (divider > pixel_size) {
-      int image_w = max(1, params.width / divider);
-      int image_h = max(1, params.height / divider);
-      pixel_samples += image_w * image_h;
-      divider >>= 1;
-    }
 
-    int image_w = max(1, params.width / divider);
-    int image_h = max(1, params.height / divider);
-    state.total_pixel_samples = pixel_samples +
-                                (uint64_t)get_num_effective_samples() * image_w * image_h;
-    if (schedule_denoising) {
-      state.total_pixel_samples += params.width * params.height;
+  /* Passes storage is not covered by the node socket. so "expand" the loop manually. */
+
+  const int num_passes = buffer_params.passes.size();
+  image_spec->attribute(ATTR_PASSES_COUNT, num_passes);
+
+  for (int pass_index = 0; pass_index < num_passes; ++pass_index) {
+    const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index);
+
+    const BufferPass *pass = &buffer_params.passes[pass_index];
+    if (!node_to_image_spec_atttributes(image_spec, pass, attr_name_prefix)) {
+      return false;
     }
   }
+
+  return true;
 }
 
-/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render
- * device. If sliced is true, slice image into as much pieces as how many devices are rendering
- * this image. */
-int TileManager::gen_tiles(bool sliced)
+static bool buffer_params_from_image_spec_atttributes(BufferParams *buffer_params,
+                                                      const ImageSpec &image_spec)
 {
-  int resolution = state.resolution_divider;
-  int image_w = max(1, params.width / resolution);
-  int image_h = max(1, params.height / resolution);
-  int2 center = make_int2(image_w / 2, image_h / 2);
-
-  int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1;
-  int slice_num = sliced ? num : 1;
-  int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
-
-  device_free();
-  state.render_tiles.clear();
-  state.denoising_tiles.clear();
-  state.render_tiles.resize(num);
-  state.denoising_tiles.resize(num);
-  state.tile_stride = tile_w;
-  vector<list<int>>::iterator tile_list;
-  tile_list = state.render_tiles.begin();
-
-  if (tile_order == TILE_HILBERT_SPIRAL) {
-    assert(!sliced && slice_overlap == 0);
-
-    int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
-    state.tiles.resize(tile_w * tile_h);
-
-    /* Size of blocks in tiles, must be a power of 2 */
-    const int hilbert_size = (max(tile_size.x, tile_size.y) <= 12) ? 8 : 4;
-
-    int tiles_per_device = divide_up(tile_w * tile_h, num);
-    int cur_device = 0, cur_tiles = 0;
-
-    int2 block_size = tile_size * make_int2(hilbert_size, hilbert_size);
-    /* Number of blocks to fill the image */
-    int blocks_x = (block_size.x >= image_w) ? 1 : divide_up(image_w, block_size.x);
-    int blocks_y = (block_size.y >= image_h) ? 1 : divide_up(image_h, block_size.y);
-    int n = max(blocks_x, blocks_y) | 0x1; /* Side length of the spiral (must be odd) */
-    /* Offset of spiral (to keep it centered) */
-    int2 offset = make_int2((image_w - n * block_size.x) / 2, (image_h - n * block_size.y) / 2);
-    offset = (offset / tile_size) * tile_size; /* Round to tile border. */
-
-    int2 block = make_int2(0, 0); /* Current block */
-    SpiralDirection prev_dir = DIRECTION_UP, dir = DIRECTION_UP;
-    for (int i = 0;;) {
-      /* Generate the tiles in the current block. */
-      for (int hilbert_index = 0; hilbert_index < hilbert_size * hilbert_size; hilbert_index++) {
-        int2 tile, hilbert_pos = hilbert_index_to_pos(hilbert_size, hilbert_index);
-        /* Rotate block according to spiral direction. */
-        if (prev_dir == DIRECTION_UP && dir == DIRECTION_UP) {
-          tile = make_int2(hilbert_pos.y, hilbert_pos.x);
-        }
-        else if (dir == DIRECTION_LEFT || prev_dir == DIRECTION_LEFT) {
-          tile = hilbert_pos;
-        }
-        else if (dir == DIRECTION_DOWN) {
-          tile = make_int2(hilbert_size - 1 - hilbert_pos.y, hilbert_size - 1 - hilbert_pos.x);
-        }
-        else {
-          tile = make_int2(hilbert_size - 1 - hilbert_pos.x, hilbert_size - 1 - hilbert_pos.y);
-        }
-
-        int2 pos = block * block_size + tile * tile_size + offset;
-        /* Only add tiles which are in the image (tiles outside of the image can be generated since
-         * the spiral is always square). */
-        if (pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) {
-          int w = min(tile_size.x, image_w - pos.x);
-          int h = min(tile_size.y, image_h - pos.y);
-          int2 ipos = pos / tile_size;
-          int idx = ipos.y * tile_w + ipos.x;
-          state.tiles[idx] = Tile(idx, pos.x, pos.y, w, h, cur_device, Tile::RENDER);
-          tile_list->push_front(idx);
-          cur_tiles++;
-
-          if (cur_tiles == tiles_per_device) {
-            tile_list++;
-            cur_tiles = 0;
-            cur_device++;
-          }
-        }
-      }
+  if (!node_from_image_spec_atttributes(buffer_params, image_spec, ATTR_BUFFER_SOCKET_PREFIX)) {
+    return false;
+  }
 
-      /* Stop as soon as the spiral has reached the center block. */
-      if (block.x == (n - 1) / 2 && block.y == (n - 1) / 2)
-        break;
-
-      /* Advance to next block. */
-      prev_dir = dir;
-      switch (dir) {
-        case DIRECTION_UP:
-          block.y++;
-          if (block.y == (n - i - 1)) {
-            dir = DIRECTION_LEFT;
-          }
-          break;
-        case DIRECTION_LEFT:
-          block.x++;
-          if (block.x == (n - i - 1)) {
-            dir = DIRECTION_DOWN;
-          }
-          break;
-        case DIRECTION_DOWN:
-          block.y--;
-          if (block.y == i) {
-            dir = DIRECTION_RIGHT;
-          }
-          break;
-        case DIRECTION_RIGHT:
-          block.x--;
-          if (block.x == i + 1) {
-            dir = DIRECTION_UP;
-            i++;
-          }
-          break;
-      }
-    }
-    return tile_w * tile_h;
+  /* Passes storage is not covered by the node socket. so "expand" the loop manually. */
+
+  const int num_passes = image_spec.get_int_attribute(ATTR_PASSES_COUNT, 0);
+  if (num_passes == 0) {
+    LOG(ERROR) << "Missing passes count attribute.";
+    return false;
   }
 
-  int idx = 0;
-  for (int slice = 0; slice < slice_num; slice++) {
-    int slice_y = (image_h / slice_num) * slice;
-    int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) :
-                                             image_h / slice_num;
+  for (int pass_index = 0; pass_index < num_passes; ++pass_index) {
+    const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index);
 
-    if (slice_overlap != 0) {
-      int slice_y_offset = max(slice_y - slice_overlap, 0);
-      slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset;
-      slice_y = slice_y_offset;
-    }
+    BufferPass pass;
 
-    int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
-
-    int tiles_per_device = divide_up(tile_w * tile_h, num);
-    int cur_device = 0, cur_tiles = 0;
-
-    for (int tile_y = 0; tile_y < tile_h; tile_y++) {
-      for (int tile_x = 0; tile_x < tile_w; tile_x++, idx++) {
-        int x = tile_x * tile_size.x;
-        int y = tile_y * tile_size.y;
-        int w = (tile_x == tile_w - 1) ? image_w - x : tile_size.x;
-        int h = (tile_y == tile_h - 1) ? slice_h - y : tile_size.y;
-
-        state.tiles.push_back(
-            Tile(idx, x, y + slice_y, w, h, sliced ? slice : cur_device, Tile::RENDER));
-        tile_list->push_back(idx);
-
-        if (!sliced) {
-          cur_tiles++;
-
-          if (cur_tiles == tiles_per_device) {
-            /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that
-             * case. */
-            if (tile_order != TILE_BOTTOM_TO_TOP) {
-              tile_list->sort(TileComparator(tile_order, center, &state.tiles[0]));
-            }
-            tile_list++;
-            cur_tiles = 0;
-            cur_device++;
-          }
-        }
-      }
-    }
-    if (sliced) {
-      tile_list++;
+    if (!node_from_image_spec_atttributes(&pass, image_spec, attr_name_prefix)) {
+      return false;
     }
+
+    buffer_params->passes.emplace_back(std::move(pass));
   }
 
-  return idx;
+  buffer_params->update_passes();
+
+  return true;
 }
 
-void TileManager::gen_render_tiles()
+/* Configure image specification for the given buffer parameters and passes.
+ *
+ * Image channels will be strictly ordered to match content of corresponding buffer, and the
+ * metadata will be set so that the render buffers and passes can be reconstructed from it.
+ *
+ * If the tile size different from (0, 0) the image specification will be configured to use the
+ * given tile size for tiled IO. */
+static bool configure_image_spec_from_buffer(ImageSpec *image_spec,
+                                             const BufferParams &buffer_params,
+                                             const int2 tile_size = make_int2(0, 0))
 {
-  /* Regenerate just the render tiles for progressive render. */
-  foreach (Tile &tile, state.tiles) {
-    tile.state = Tile::RENDER;
-    state.render_tiles[tile.device].push_back(tile.index);
+  const std::vector<std::string> channel_names = exr_channel_names_for_passes(buffer_params);
+  const int num_channels = channel_names.size();
+
+  *image_spec = ImageSpec(
+      buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT);
+
+  image_spec->channelnames = move(channel_names);
+
+  if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) {
+    return false;
+  }
+
+  if (tile_size.x != 0 || tile_size.y != 0) {
+    DCHECK_GT(tile_size.x, 0);
+    DCHECK_GT(tile_size.y, 0);
+
+    image_spec->tile_width = tile_size.x;
+    image_spec->tile_height = tile_size.y;
   }
+
+  return true;
 }
 
-void TileManager::set_tiles()
+/* --------------------------------------------------------------------
+ * Tile Manager.
+ */
+
+TileManager::TileManager()
 {
-  int resolution = state.resolution_divider;
-  int image_w = max(1, params.width / resolution);
-  int image_h = max(1, params.height / resolution);
+  /* Use process ID to separate different processes.
+   * To ensure uniqueness from within a process use combination of object address and instance
+   * index. This solves problem of possible object re-allocation at the same time, and solves
+   * possible conflict when the counter overflows while there are still active instances of the
+   * class. */
+  const int tile_manager_id = g_instance_index.fetch_add(1, std::memory_order_relaxed);
+  tile_file_unique_part_ = to_string(system_self_process_id()) + "-" +
+                           to_string(reinterpret_cast<uintptr_t>(this)) + "-" +
+                           to_string(tile_manager_id);
+}
 
-  state.num_tiles = gen_tiles(!background);
+TileManager::~TileManager()
+{
+}
+
+void TileManager::reset_scheduling(const BufferParams &params, int2 tile_size)
+{
+  VLOG(3) << "Using tile size of " << tile_size;
+
+  close_tile_output();
+
+  tile_size_ = tile_size;
+
+  tile_state_.num_tiles_x = divide_up(params.width, tile_size_.x);
+  tile_state_.num_tiles_y = divide_up(params.height, tile_size_.y);
+  tile_state_.num_tiles = tile_state_.num_tiles_x * tile_state_.num_tiles_y;
+
+  tile_state_.next_tile_index = 0;
+
+  tile_state_.current_tile = Tile();
+}
+
+void TileManager::update(const BufferParams &params, const Scene *scene)
+{
+  DCHECK_NE(params.pass_stride, -1);
+
+  buffer_params_ = params;
 
-  state.buffer.width = image_w;
-  state.buffer.height = image_h;
+  /* TODO(sergey): Proper Error handling, so that if configuration has failed we don't attempt to
+   * write to a partially configured file. */
+  configure_image_spec_from_buffer(&write_state_.image_spec, buffer_params_, tile_size_);
 
-  state.buffer.full_x = params.full_x / resolution;
-  state.buffer.full_y = params.full_y / resolution;
-  state.buffer.full_width = max(1, params.full_width / resolution);
-  state.buffer.full_height = max(1, params.full_height / resolution);
+  const DenoiseParams denoise_params = scene->integrator->get_denoise_params();
+  node_to_image_spec_atttributes(
+      &write_state_.image_spec, &denoise_params, ATTR_DENOISE_SOCKET_PREFIX);
 }
 
-int TileManager::get_neighbor_index(int index, int neighbor)
+bool TileManager::done()
 {
-  /* Neighbor indices:
-   *   0 1 2
-   *   3 4 5
-   *   6 7 8
-   */
-  static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1};
-  static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1};
-
-  int resolution = state.resolution_divider;
-  int image_w = max(1, params.width / resolution);
-  int image_h = max(1, params.height / resolution);
-
-  int num = min(image_h, num_devices);
-  int slice_num = !background ? num : 1;
-  int slice_h = image_h / slice_num;
-
-  int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
-  int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
-
-  /* Tiles in the state tile list are always indexed from left to right, top to bottom. */
-  int nx = (index % tile_w) + dx[neighbor];
-  int ny = (index / tile_w) + dy[neighbor];
-  if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num)
-    return -1;
-
-  return ny * state.tile_stride + nx;
+  return tile_state_.next_tile_index == tile_state_.num_tiles;
 }
 
-/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state
- * min_state. */
-bool TileManager::check_neighbor_state(int index, Tile::State min_state)
+bool TileManager::next()
 {
-  if (index < 0 || state.tiles[index].state < min_state) {
+  if (done()) {
     return false;
   }
-  for (int neighbor = 0; neighbor < 9; neighbor++) {
-    int nindex = get_neighbor_index(index, neighbor);
-    /* Out-of-bounds tiles don't matter. */
-    if (nindex >= 0 && state.tiles[nindex].state < min_state) {
-      return false;
-    }
-  }
+
+  tile_state_.current_tile = get_tile_for_index(tile_state_.next_tile_index);
+
+  ++tile_state_.next_tile_index;
 
   return true;
 }
 
-/* Returns whether the tile should be written (and freed if no denoising is used) instead of
- * updating. */
-bool TileManager::finish_tile(const int index, const bool need_denoise, bool &delete_tile)
+Tile TileManager::get_tile_for_index(int index) const
 {
-  delete_tile = false;
-
-  switch (state.tiles[index].state) {
-    case Tile::RENDER: {
-      if (!(schedule_denoising && need_denoise)) {
-        state.tiles[index].state = Tile::DONE;
-        delete_tile = !progressive;
-        return true;
-      }
-      state.tiles[index].state = Tile::RENDERED;
-      /* For each neighbor and the tile itself, check whether all of its neighbors have been
-       * rendered. If yes, it can be denoised. */
-      for (int neighbor = 0; neighbor < 9; neighbor++) {
-        int nindex = get_neighbor_index(index, neighbor);
-        if (check_neighbor_state(nindex, Tile::RENDERED)) {
-          state.tiles[nindex].state = Tile::DENOISE;
-          state.denoising_tiles[state.tiles[nindex].device].push_back(nindex);
-        }
-      }
-      return false;
-    }
-    case Tile::DENOISE: {
-      state.tiles[index].state = Tile::DENOISED;
-      /* For each neighbor and the tile itself, check whether all of its neighbors have been
-       * denoised. If yes, it can be freed. */
-      for (int neighbor = 0; neighbor < 9; neighbor++) {
-        int nindex = get_neighbor_index(index, neighbor);
-        if (check_neighbor_state(nindex, Tile::DENOISED)) {
-          state.tiles[nindex].state = Tile::DONE;
-          /* Do not delete finished tiles in progressive mode. */
-          if (!progressive) {
-            /* It can happen that the tile just finished denoising and already can be freed here.
-             * However, in that case it still has to be written before deleting, so we can't delete
-             * it yet. */
-            if (neighbor == 4) {
-              delete_tile = true;
-            }
-            else {
-              delete state.tiles[nindex].buffers;
-              state.tiles[nindex].buffers = NULL;
-            }
-          }
-        }
-      }
-      return true;
-    }
-    default:
-      assert(false);
-      return true;
+  /* TODO(sergey): Consider using hilbert spiral, or. maybe, even configurable. Not sure this
+   * brings a lot of value since this is only applicable to BIG tiles. */
+
+  const int tile_y = index / tile_state_.num_tiles_x;
+  const int tile_x = index - tile_y * tile_state_.num_tiles_x;
+
+  Tile tile;
+
+  tile.x = tile_x * tile_size_.x;
+  tile.y = tile_y * tile_size_.y;
+  tile.width = tile_size_.x;
+  tile.height = tile_size_.y;
+
+  tile.width = min(tile.width, buffer_params_.width - tile.x);
+  tile.height = min(tile.height, buffer_params_.height - tile.y);
+
+  return tile;
+}
+
+const Tile &TileManager::get_current_tile() const
+{
+  return tile_state_.current_tile;
+}
+
+bool TileManager::open_tile_output()
+{
+  write_state_.filename = path_temp_get("cycles-tile-buffer-" + tile_file_unique_part_ + "-" +
+                                        to_string(write_state_.tile_file_index) + ".exr");
+
+  write_state_.tile_out = ImageOutput::create(write_state_.filename);
+  if (!write_state_.tile_out) {
+    LOG(ERROR) << "Error creating image output for " << write_state_.filename;
+    return false;
+  }
+
+  if (!write_state_.tile_out->supports("tiles")) {
+    LOG(ERROR) << "Progress tile file format does not support tiling.";
+    return false;
   }
+
+  write_state_.tile_out->open(write_state_.filename, write_state_.image_spec);
+  write_state_.num_tiles_written = 0;
+
+  VLOG(3) << "Opened tile file " << write_state_.filename;
+
+  return true;
 }
 
-bool TileManager::next_tile(Tile *&tile, int device, uint tile_types)
+bool TileManager::close_tile_output()
 {
-  /* Preserve device if requested, unless this is a separate denoising device that just wants to
-   * grab any available tile. */
-  const bool preserve_device = preserve_tile_device && device < num_devices;
-
-  if (tile_types & RenderTile::DENOISE) {
-    int tile_index = -1;
-    int logical_device = preserve_device ? device : 0;
-
-    while (logical_device < state.denoising_tiles.size()) {
-      if (state.denoising_tiles[logical_device].empty()) {
-        if (preserve_device) {
-          break;
-        }
-        else {
-          logical_device++;
-          continue;
-        }
-      }
+  if (!write_state_.tile_out) {
+    return true;
+  }
 
-      tile_index = state.denoising_tiles[logical_device].front();
-      state.denoising_tiles[logical_device].pop_front();
-      break;
-    }
+  const bool success = write_state_.tile_out->close();
+  write_state_.tile_out = nullptr;
 
-    if (tile_index >= 0) {
-      tile = &state.tiles[tile_index];
-      return true;
-    }
+  if (!success) {
+    LOG(ERROR) << "Error closing tile file.";
+    return false;
   }
 
-  if (tile_types & RenderTile::PATH_TRACE) {
-    int tile_index = -1;
-    int logical_device = preserve_device ? device : 0;
-
-    while (logical_device < state.render_tiles.size()) {
-      if (state.render_tiles[logical_device].empty()) {
-        if (preserve_device) {
-          break;
-        }
-        else {
-          logical_device++;
-          continue;
-        }
-      }
+  VLOG(3) << "Tile output is closed.";
 
-      tile_index = state.render_tiles[logical_device].front();
-      state.render_tiles[logical_device].pop_front();
-      break;
+  return true;
+}
+
+bool TileManager::write_tile(const RenderBuffers &tile_buffers)
+{
+  if (!write_state_.tile_out) {
+    if (!open_tile_output()) {
+      return false;
     }
+  }
 
-    if (tile_index >= 0) {
-      tile = &state.tiles[tile_index];
-      return true;
+  DCHECK_EQ(tile_buffers.params.pass_stride, buffer_params_.pass_stride);
+
+  const BufferParams &tile_params = tile_buffers.params;
+
+  vector<float> pixel_storage;
+  const float *pixels = tile_buffers.buffer.data();
+
+  /* Tiled writing expects pixels to contain data for an entire tile. Pad the render buffers with
+   * empty pixels for tiles which are on the image boundary. */
+  if (tile_params.width != tile_size_.x || tile_params.height != tile_size_.y) {
+    const int64_t pass_stride = tile_params.pass_stride;
+    const int64_t src_row_stride = tile_params.width * pass_stride;
+
+    const int64_t dst_row_stride = tile_size_.x * pass_stride;
+    pixel_storage.resize(dst_row_stride * tile_size_.y);
+
+    const float *src = tile_buffers.buffer.data();
+    float *dst = pixel_storage.data();
+    pixels = dst;
+
+    for (int y = 0; y < tile_params.height; ++y, src += src_row_stride, dst += dst_row_stride) {
+      memcpy(dst, src, src_row_stride * sizeof(float));
     }
   }
 
-  return false;
-}
+  const int tile_x = tile_params.full_x - buffer_params_.full_x;
+  const int tile_y = tile_params.full_y - buffer_params_.full_y;
 
-bool TileManager::done()
-{
-  int end_sample = (range_num_samples == -1) ? num_samples :
-                                               range_start_sample + range_num_samples;
-  return (state.resolution_divider == pixel_size) &&
-         (state.sample + state.num_samples >= end_sample);
+  VLOG(3) << "Write tile at " << tile_x << ", " << tile_y;
+  if (!write_state_.tile_out->write_tile(tile_x, tile_y, 0, TypeDesc::FLOAT, pixels)) {
+    LOG(ERROR) << "Error writing tile " << write_state_.tile_out->geterror();
+  }
+
+  ++write_state_.num_tiles_written;
+
+  return true;
 }
 
-bool TileManager::has_tiles()
+void TileManager::finish_write_tiles()
 {
-  foreach (Tile &tile, state.tiles) {
-    if (tile.state != Tile::DONE) {
-      return true;
+  if (!write_state_.tile_out) {
+    /* None of the tiles were written hence the file was not created.
+     * Avoid creation of fully empty file since it is redundant. */
+    return;
+  }
+
+  /* EXR expects all tiles to present in file. So explicitly write missing tiles as all-zero. */
+  if (write_state_.num_tiles_written < tile_state_.num_tiles) {
+    vector<float> pixel_storage(tile_size_.x * tile_size_.y * buffer_params_.pass_stride);
+
+    for (int tile_index = write_state_.num_tiles_written; tile_index < tile_state_.num_tiles;
+         ++tile_index) {
+      const Tile tile = get_tile_for_index(tile_index);
+
+      VLOG(3) << "Write dummy tile at " << tile.x << ", " << tile.y;
+
+      write_state_.tile_out->write_tile(tile.x, tile.y, 0, TypeDesc::FLOAT, pixel_storage.data());
     }
   }
-  return false;
+
+  close_tile_output();
+
+  if (full_buffer_written_cb) {
+    full_buffer_written_cb(write_state_.filename);
+  }
+
+  /* Advance the counter upon explicit finish of the file.
+   * Makes it possible to re-use tile manager for another scene, and avoids unnecessary increments
+   * of the tile-file-within-session index. */
+  ++write_state_.tile_file_index;
+
+  write_state_.filename = "";
 }
 
-bool TileManager::next()
+bool TileManager::read_full_buffer_from_disk(const string_view filename,
+                                             RenderBuffers *buffers,
+                                             DenoiseParams *denoise_params)
 {
-  if (done())
+  unique_ptr<ImageInput> in(ImageInput::open(filename));
+  if (!in) {
+    LOG(ERROR) << "Error opening tile file " << filename;
     return false;
+  }
+
+  const ImageSpec &image_spec = in->spec();
 
-  if (progressive && state.resolution_divider > pixel_size) {
-    state.sample = 0;
-    state.resolution_divider = max(state.resolution_divider / 2, pixel_size);
-    state.num_samples = 1;
-    set_tiles();
+  BufferParams buffer_params;
+  if (!buffer_params_from_image_spec_atttributes(&buffer_params, image_spec)) {
+    return false;
   }
-  else {
-    state.sample++;
+  buffers->reset(buffer_params);
 
-    if (progressive)
-      state.num_samples = 1;
-    else if (range_num_samples == -1)
-      state.num_samples = num_samples;
-    else
-      state.num_samples = range_num_samples;
+  if (!node_from_image_spec_atttributes(denoise_params, image_spec, ATTR_DENOISE_SOCKET_PREFIX)) {
+    return false;
+  }
 
-    state.resolution_divider = pixel_size;
+  if (!in->read_image(TypeDesc::FLOAT, buffers->buffer.data())) {
+    LOG(ERROR) << "Error reading pixels from the tile file " << in->geterror();
+    return false;
+  }
 
-    if (state.sample == range_start_sample) {
-      set_tiles();
-    }
-    else {
-      gen_render_tiles();
-    }
+  if (!in->close()) {
+    LOG(ERROR) << "Error closing tile file " << in->geterror();
+    return false;
   }
 
   return true;
 }
 
-int TileManager::get_num_effective_samples()
-{
-  return (range_num_samples == -1) ? num_samples : range_num_samples;
-}
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index 790a56f9445..71b9e966278 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -14,159 +14,151 @@
  * limitations under the License.
  */
 
-#ifndef __TILE_H__
-#define __TILE_H__
-
-#include <limits.h>
+#pragma once
 
 #include "render/buffers.h"
-#include "util/util_list.h"
+#include "util/util_image.h"
+#include "util/util_string.h"
+#include "util/util_unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
-/* Tile */
+class DenoiseParams;
+class Scene;
+
+/* --------------------------------------------------------------------
+ * Tile.
+ */
 
 class Tile {
  public:
-  int index;
-  int x, y, w, h;
-  int device;
-  /* RENDER: The tile has to be rendered.
-   * RENDERED: The tile has been rendered, but can't be denoised yet (waiting for neighbors).
-   * DENOISE: The tile can be denoised now.
-   * DENOISED: The tile has been denoised, but can't be freed yet (waiting for neighbors).
-   * DONE: The tile is finished and has been freed. */
-  typedef enum { RENDER = 0, RENDERED, DENOISE, DENOISED, DONE } State;
-  State state;
-  RenderBuffers *buffers;
+  int x = 0, y = 0;
+  int width = 0, height = 0;
 
   Tile()
   {
   }
-
-  Tile(int index_, int x_, int y_, int w_, int h_, int device_, State state_ = RENDER)
-      : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), state(state_), buffers(NULL)
-  {
-  }
 };
 
-/* Tile order */
-
-/* Note: this should match enum_tile_order in properties.py */
-enum TileOrder {
-  TILE_CENTER = 0,
-  TILE_RIGHT_TO_LEFT = 1,
-  TILE_LEFT_TO_RIGHT = 2,
-  TILE_TOP_TO_BOTTOM = 3,
-  TILE_BOTTOM_TO_TOP = 4,
-  TILE_HILBERT_SPIRAL = 5,
-};
-
-/* Tile Manager */
+/* --------------------------------------------------------------------
+ * Tile Manager.
+ */
 
 class TileManager {
  public:
-  BufferParams params;
-
-  struct State {
-    vector<Tile> tiles;
-    int tile_stride;
-    BufferParams buffer;
-    int sample;
-    int num_samples;
-    int resolution_divider;
-    int num_tiles;
-
-    /* Total samples over all pixels: Generally num_samples*num_pixels,
-     * but can be higher due to the initial resolution division for previews. */
-    uint64_t total_pixel_samples;
-
-    /* These lists contain the indices of the tiles to be rendered/denoised and are used
-     * when acquiring a new tile for the device.
-     * Each list in each vector is for one logical device. */
-    vector<list<int>> render_tiles;
-    vector<list<int>> denoising_tiles;
-  } state;
-
-  int num_samples;
-  int slice_overlap;
-
-  TileManager(bool progressive,
-              int num_samples,
-              int2 tile_size,
-              int start_resolution,
-              bool preserve_tile_device,
-              bool background,
-              TileOrder tile_order,
-              int num_devices = 1,
-              int pixel_size = 1);
+  /* This callback is invoked by whenever on-dist tiles storage file is closed after writing. */
+  function<void(string_view)> full_buffer_written_cb;
+
+  TileManager();
   ~TileManager();
 
-  void device_free();
-  void reset(BufferParams &params, int num_samples);
-  void set_samples(int num_samples);
+  TileManager(const TileManager &other) = delete;
+  TileManager(TileManager &&other) noexcept = delete;
+  TileManager &operator=(const TileManager &other) = delete;
+  TileManager &operator=(TileManager &&other) = delete;
+
+  /* Reset current progress and start new rendering of the full-frame parameters in tiles of the
+   * given size.
+   * Only touches scheduling-related state of the tile manager. */
+  /* TODO(sergey): Consider using tile area instead of exact size to help dealing with extreme
+   * cases of stretched renders. */
+  void reset_scheduling(const BufferParams &params, int2 tile_size);
+
+  /* Update for the known buffer passes and scene parameters.
+   * Will store all parameters needed for buffers access outside of the scene graph. */
+  void update(const BufferParams &params, const Scene *scene);
+
+  inline int get_num_tiles() const
+  {
+    return tile_state_.num_tiles;
+  }
+
+  inline bool has_multiple_tiles() const
+  {
+    return tile_state_.num_tiles > 1;
+  }
+
   bool next();
-  bool next_tile(Tile *&tile, int device, uint tile_types);
-  bool finish_tile(const int index, const bool need_denoise, bool &delete_tile);
   bool done();
-  bool has_tiles();
 
-  void set_tile_order(TileOrder tile_order_)
+  const Tile &get_current_tile() const;
+
+  /* Write render buffer of a tile to a file on disk.
+   *
+   * Opens file for write when first tile is written.
+   *
+   * Returns true on success. */
+  bool write_tile(const RenderBuffers &tile_buffers);
+
+  /* Inform the tile manager that no more tiles will be written to disk.
+   * The file will be considered final, all handles to it will be closed. */
+  void finish_write_tiles();
+
+  /* Check whether any tile has been written to disk. */
+  inline bool has_written_tiles() const
   {
-    tile_order = tile_order_;
+    return write_state_.num_tiles_written != 0;
   }
 
-  int get_neighbor_index(int index, int neighbor);
-  bool check_neighbor_state(int index, Tile::State state);
+  /* Read full frame render buffer from tiles file on disk.
+   *
+   * Returns true on success. */
+  bool read_full_buffer_from_disk(string_view filename,
+                                  RenderBuffers *buffers,
+                                  DenoiseParams *denoise_params);
 
-  /* ** Sample range rendering. ** */
+ protected:
+  /* Get tile configuration for its index.
+   * The tile index must be within [0, state_.tile_state_). */
+  Tile get_tile_for_index(int index) const;
 
-  /* Start sample in the range. */
-  int range_start_sample;
+  bool open_tile_output();
+  bool close_tile_output();
 
-  /* Number to samples in the rendering range. */
-  int range_num_samples;
+  /* Part of an on-disk tile file name which avoids conflicts between several Cycles instances or
+   * several sessions. */
+  string tile_file_unique_part_;
 
-  /* Get number of actual samples to render. */
-  int get_num_effective_samples();
+  int2 tile_size_ = make_int2(0, 0);
 
-  /* Schedule tiles for denoising after they've been rendered. */
-  bool schedule_denoising;
+  BufferParams buffer_params_;
 
- protected:
-  void set_tiles();
-
-  bool progressive;
-  int2 tile_size;
-  TileOrder tile_order;
-  int start_resolution;
-  int pixel_size;
-  int num_devices;
-
-  /* in some cases it is important that the same tile will be returned for the same
-   * device it was originally generated for (i.e. viewport rendering when buffer is
-   * allocating once for tile and then always used by it)
-   *
-   * in other cases any tile could be handled by any device (i.e. final rendering
-   * without progressive refine)
-   */
-  bool preserve_tile_device;
-
-  /* for background render tiles should exactly match render parts generated from
-   * blender side, which means image first gets split into tiles and then tiles are
-   * assigning to render devices
-   *
-   * however viewport rendering expects tiles to be allocated in a special way,
-   * meaning image is being sliced horizontally first and every device handles
-   * its own slice
-   */
-  bool background;
-
-  /* Generate tile list, return number of tiles. */
-  int gen_tiles(bool sliced);
-  void gen_render_tiles();
+  /* Tile scheduling state. */
+  struct {
+    int num_tiles_x = 0;
+    int num_tiles_y = 0;
+    int num_tiles = 0;
+
+    int next_tile_index;
+
+    Tile current_tile;
+  } tile_state_;
+
+  /* State of tiles writing to a file on disk. */
+  struct {
+    /* Index of a tile file used during the current session.
+     * This number is used for the file name construction, making it possible to render several
+     * scenes throughout duration of the session and keep all results available for later read
+     * access. */
+    int tile_file_index = 0;
+
+    string filename;
+
+    /* Specification of the tile image which corresponds to the buffer parameters.
+     * Contains channels configured according to the passes configuration in the path traces.
+     *
+     * Output images are saved using this specification, input images are expected to have matched
+     * specification. */
+    ImageSpec image_spec;
+
+    /* Output handle for the tile file.
+     *
+     * This file can not be closed until all tiles has been provided, so the handle is stored in
+     * the state and is created whenever writing is requested. */
+    unique_ptr<ImageOutput> tile_out;
+
+    int num_tiles_written = 0;
+  } write_state_;
 };
 
 CCL_NAMESPACE_END
-
-#endif /* __TILE_H__ */