Merge branch 'master' into functions

author: Jacques Lucke <jacques@blender.org> 2020-04-09 12:59:24 +0300
committer: Jacques Lucke <jacques@blender.org> 2020-04-09 12:59:24 +0300
commit: dabd59ba23f877f68aaf73e79f0d58118723d9b7 (patch)
tree: e5a161e9bfe31645e711350fa071ea470673c506 /intern/cycles
parent: 43f895a59247ea4058cb3f019cd4dabd9ad9b0e4 (diff)
parent: 80255e67e30c4d77be64a5fa8dc68c0a55f39ebc (diff)
18 files changed, 139 insertions, 109 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index d94d409175b..dfbf57e8b88 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -597,6 +597,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
 
   info.has_half_images = true;
   info.has_volume_decoupled = true;
+  info.has_adaptive_stop_per_sample = true;
   info.has_osl = true;
   info.has_profiling = true;
 
@@ -639,6 +640,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
     /* Accumulate device info. */
     info.has_half_images &= device.has_half_images;
     info.has_volume_decoupled &= device.has_volume_decoupled;
+    info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
     info.has_osl &= device.has_osl;
     info.has_profiling &= device.has_profiling;
   }
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index a98ac171709..c55dfb3a83b 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -75,12 +75,13 @@ class DeviceInfo {
   string description;
   string id; /* used for user preferences, should stay fixed with changing hardware config */
   int num;
-  bool display_device;       /* GPU is used as a display device. */
-  bool has_half_images;      /* Support half-float textures. */
-  bool has_volume_decoupled; /* Decoupled volume shading. */
-  bool has_osl;              /* Support Open Shading Language. */
-  bool use_split_kernel;     /* Use split or mega kernel. */
-  bool has_profiling;        /* Supports runtime collection of profiling info. */
+  bool display_device;               /* GPU is used as a display device. */
+  bool has_half_images;              /* Support half-float textures. */
+  bool has_volume_decoupled;         /* Decoupled volume shading. */
+  bool has_adaptive_stop_per_sample; /* Per-sample adaptive sampling stopping. */
+  bool has_osl;                      /* Support Open Shading Language. */
+  bool use_split_kernel;             /* Use split or mega kernel. */
+  bool has_profiling;                /* Supports runtime collection of profiling info. */
   int cpu_threads;
   vector<DeviceInfo> multi_devices;
   vector<DeviceInfo> denoising_devices;
@@ -94,6 +95,7 @@ class DeviceInfo {
     display_device = false;
     has_half_images = false;
     has_volume_decoupled = false;
+    has_adaptive_stop_per_sample = false;
     has_osl = false;
     use_split_kernel = false;
     has_profiling = false;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 57e8523e02a..c701c14318f 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -839,7 +839,7 @@ class CPUDevice : public Device {
     return true;
   }
 
-  bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile)
+  bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample)
   {
     WorkTile wtile;
     wtile.x = tile.x;
@@ -850,11 +850,24 @@ class CPUDevice : public Device {
     wtile.stride = tile.stride;
     wtile.buffer = (float *)tile.buffer;
 
+    /* For CPU we do adaptive stopping per sample so we can stop earlier, but
+     * for combined CPU + GPU rendering we match the GPU and do it per tile
+     * after a given number of sample steps. */
+    if (!kernel_data.integrator.adaptive_stop_per_sample) {
+      for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
+        for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
+          const int index = wtile.offset + x + y * wtile.stride;
+          float *buffer = wtile.buffer + index * kernel_data.film.pass_stride;
+          kernel_do_adaptive_stopping(kg, buffer, sample);
+        }
+      }
+    }
+
     bool any = false;
-    for (int y = tile.y; y < tile.y + tile.h; ++y) {
+    for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
       any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
     }
-    for (int x = tile.x; x < tile.x + tile.w; ++x) {
+    for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
       any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
     }
     return (!any);
@@ -917,7 +930,7 @@ class CPUDevice : public Device {
       tile.sample = sample + 1;
 
       if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
-        const bool stop = adaptive_sampling_filter(kg, tile);
+        const bool stop = adaptive_sampling_filter(kg, tile, sample);
         if (stop) {
           const int num_progress_samples = end_sample - sample;
           tile.sample = end_sample;
@@ -1327,6 +1340,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
   info.id = "CPU";
   info.num = 0;
   info.has_volume_decoupled = true;
+  info.has_adaptive_stop_per_sample = true;
   info.has_osl = true;
   info.has_half_images = true;
   info.has_profiling = true;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 9a703b45c0a..4a53fcd151d 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -129,6 +129,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
 
     info.has_half_images = (major >= 3);
     info.has_volume_decoupled = false;
+    info.has_adaptive_stop_per_sample = false;
 
     int pci_location[3] = {0, 0, 0};
     cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 2742cbf53aa..0933d51f321 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -311,6 +311,7 @@ void device_network_info(vector<DeviceInfo> &devices)
 
   /* todo: get this info from device */
   info.has_volume_decoupled = false;
+  info.has_adaptive_stop_per_sample = false;
   info.has_osl = false;
 
   devices.push_back(info);
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 891b73351a0..8a0b128697f 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -119,6 +119,7 @@ void device_opencl_info(vector<DeviceInfo> &devices)
     info.display_device = true;
     info.use_split_kernel = true;
     info.has_volume_decoupled = false;
+    info.has_adaptive_stop_per_sample = false;
     info.id = id;
 
     /* Check OpenCL extensions */
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index c36b1344c3b..d2447eae867 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -138,8 +138,7 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
 
 /* Adaptive Sampling */
 
-AdaptiveSampling::AdaptiveSampling()
-    : use(true), adaptive_step(ADAPTIVE_SAMPLE_STEP), min_samples(0)
+AdaptiveSampling::AdaptiveSampling() : use(true), adaptive_step(0), min_samples(0)
 {
 }
 
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index 047fe8c92ec..ee4d1507ef1 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -150,6 +150,7 @@ ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
   }
 #endif /* __DENOISING_FEATURES__ */
 
+  /* Cryptomatte. */
   if (kernel_data.film.cryptomatte_passes) {
     int num_slots = 0;
     num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
@@ -162,6 +163,14 @@ ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
       id_buffer[slot].y *= sample_multiplier;
     }
   }
+
+  /* AOVs. */
+  for (int i = 0; i < kernel_data.film.pass_aov_value_num; i++) {
+    *(buffer + kernel_data.film.pass_aov_value + i) *= sample_multiplier;
+  }
+  for (int i = 0; i < kernel_data.film.pass_aov_color_num; i++) {
+    *((ccl_global float4 *)(buffer + kernel_data.film.pass_aov_color) + i) *= sample_multiplier;
+  }
 }
 
 /* This is a simple box filter in two passes.
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 98136bc7047..7437e540a1f 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -403,9 +403,13 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
                                make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
     }
 #ifdef __KERNEL_CPU__
-    if (sample > kernel_data.integrator.adaptive_min_samples &&
-        (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) {
-      kernel_do_adaptive_stopping(kg, buffer, sample);
+    if ((sample > kernel_data.integrator.adaptive_min_samples) &&
+        kernel_data.integrator.adaptive_stop_per_sample) {
+      const int step = kernel_data.integrator.adaptive_step;
+
+      if ((sample & (step - 1)) == (step - 1)) {
+        kernel_do_adaptive_stopping(kg, buffer, sample);
+      }
     }
 #endif
   }
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index b6d319311a1..a1f8c35348d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,11 +63,6 @@ CCL_NAMESPACE_BEGIN
 
 #define VOLUME_STACK_SIZE 32
 
-/* Adaptive sampling constants */
-#define ADAPTIVE_SAMPLE_STEP 4
-static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0,
-              "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work");
-
 /* Split kernel constants */
 #define WORK_POOL_SIZE_GPU 64
 #define WORK_POOL_SIZE_CPU 1
@@ -1242,7 +1237,9 @@ typedef struct KernelFilm {
 
   int pass_aov_color;
   int pass_aov_value;
-  int pad1;
+  int pass_aov_color_num;
+  int pass_aov_value_num;
+  int pad1, pad2, pad3;
 
   /* XYZ to rendering color space transform. float4 instead of float3 to
    * ensure consistent padding/alignment across devices. */
@@ -1265,7 +1262,7 @@ typedef struct KernelFilm {
   int use_display_exposure;
   int use_display_pass_alpha;
 
-  int pad3, pad4, pad5;
+  int pad4, pad5, pad6;
 } KernelFilm;
 static_assert_align(KernelFilm, 16);
 
@@ -1348,6 +1345,8 @@ typedef struct KernelIntegrator {
   int sampling_pattern;
   int aa_samples;
   int adaptive_min_samples;
+  int adaptive_step;
+  int adaptive_stop_per_sample;
   float adaptive_threshold;
 
   /* volume render */
@@ -1360,7 +1359,7 @@ typedef struct KernelIntegrator {
 
   int max_closures;
 
-  int pad1, pad2, pad3;
+  int pad1;
 } KernelIntegrator;
 static_assert_align(KernelIntegrator, 16);
 
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 22db8e875dc..2d89fb9ffba 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -165,35 +165,6 @@ bool RenderBuffers::copy_from_device()
   return true;
 }
 
-static const float *get_sample_count_pass(const vector<Pass> &passes, device_vector<float> &buffer)
-{
-  int sample_offset = 0;
-
-  for (const Pass &pass : passes) {
-    if (pass.type != PASS_SAMPLE_COUNT) {
-      sample_offset += pass.components;
-    }
-    else {
-      return buffer.data() + sample_offset;
-    }
-  }
-
-  return NULL;
-}
-
-static float get_pixel_pass_scale(const float rcp_sample,
-                                  const float *sample_count,
-                                  const int i,
-                                  const int pass_stride)
-{
-  if (sample_count) {
-    return 1.0f / fabsf(sample_count[i * pass_stride]);
-  }
-  else {
-    return rcp_sample;
-  }
-}
-
 bool RenderBuffers::get_denoising_pass_rect(
     int type, float exposure, int sample, int components, float *pixels)
 {
@@ -289,7 +260,22 @@ bool RenderBuffers::get_pass_rect(
     return false;
   }
 
-  const float *sample_count = get_sample_count_pass(params.passes, buffer);
+  float *sample_count = NULL;
+  if (name == "Combined") {
+    int sample_offset = 0;
+    for (size_t j = 0; j < params.passes.size(); j++) {
+      Pass &pass = params.passes[j];
+      if (pass.type != PASS_SAMPLE_COUNT) {
+        sample_offset += pass.components;
+        continue;
+      }
+      else {
+        sample_count = buffer.data() + sample_offset;
+        break;
+      }
+    }
+  }
+
   int pass_offset = 0;
 
   for (size_t j = 0; j < params.passes.size(); j++) {
@@ -307,8 +293,8 @@ bool RenderBuffers::get_pass_rect(
     float *in = buffer.data() + pass_offset;
     int pass_stride = params.get_passes_size();
 
-    const float rcp_sample = 1.0f / (float)sample;
-    const float pass_exposure = (pass.exposure) ? exposure : 1.0f;
+    float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f;
+    float scale_exposure = (pass.exposure) ? scale * exposure : scale;
 
     int size = params.width * params.height;
 
@@ -326,36 +312,28 @@ bool RenderBuffers::get_pass_rect(
       if (type == PASS_DEPTH) {
         for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
           float f = *in;
-          pixels[0] = (f == 0.0f) ? 1e10f : f;
-        }
-      }
-      else if (type == PASS_OBJECT_ID || type == PASS_MATERIAL_ID) {
-        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          pixels[0] = *in;
+          pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
         }
       }
       else if (type == PASS_MIST) {
         for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float f = *in;
-          pixels[0] = saturate(f * scale);
+          float f = *in;
+          pixels[0] = saturate(f * scale_exposure);
         }
       }
 #ifdef WITH_CYCLES_DEBUG
       else if (type == PASS_BVH_TRAVERSED_NODES || type == PASS_BVH_TRAVERSED_INSTANCES ||
                type == PASS_BVH_INTERSECTIONS || type == PASS_RAY_BOUNCES) {
         for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float f = *in;
+          float f = *in;
           pixels[0] = f * scale;
         }
       }
 #endif
       else {
         for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float f = *in;
-          pixels[0] = f * scale * pass_exposure;
+          float f = *in;
+          pixels[0] = f * scale_exposure;
         }
       }
     }
@@ -389,7 +367,7 @@ bool RenderBuffers::get_pass_rect(
           float3 f = make_float3(in[0], in[1], in[2]);
           float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
 
-          f = safe_divide_even_color(f * pass_exposure, f_divide);
+          f = safe_divide_even_color(f * exposure, f_divide);
 
           pixels[0] = f.x;
           pixels[1] = f.y;
@@ -399,9 +377,7 @@ bool RenderBuffers::get_pass_rect(
       else {
         /* RGB/vector */
         for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float scale_exposure = scale * pass_exposure;
-          const float3 f = make_float3(in[0], in[1], in[2]);
+          float3 f = make_float3(in[0], in[1], in[2]);
 
           pixels[0] = f.x * scale_exposure;
           pixels[1] = f.y * scale_exposure;
@@ -449,9 +425,7 @@ bool RenderBuffers::get_pass_rect(
       }
       else if (type == PASS_CRYPTOMATTE) {
         for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float4 f = make_float4(in[0], in[1], in[2], in[3]);
-
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
           /* x and z contain integer IDs, don't rescale them.
              y and w contain matte weights, they get scaled. */
           pixels[0] = f.x;
@@ -462,9 +436,12 @@ bool RenderBuffers::get_pass_rect(
       }
       else {
         for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
-          const float scale = get_pixel_pass_scale(rcp_sample, sample_count, i, pass_stride);
-          const float scale_exposure = scale * pass_exposure;
-          const float4 f = make_float4(in[0], in[1], in[2], in[3]);
+          if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+            scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+            scale_exposure = (pass.exposure) ? scale * exposure : scale;
+          }
+
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
 
           pixels[0] = f.x * scale_exposure;
           pixels[1] = f.y * scale_exposure;
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index c29810d1494..26eda93fadd 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -76,6 +76,7 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
   Pass pass;
 
   pass.type = type;
+  pass.filter = true;
   pass.exposure = false;
   pass.divide_type = PASS_NONE;
   if (name) {
@@ -92,6 +93,7 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
       break;
     case PASS_DEPTH:
       pass.components = 1;
+      pass.filter = false;
       break;
     case PASS_MIST:
       pass.components = 1;
@@ -112,6 +114,7 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
     case PASS_OBJECT_ID:
     case PASS_MATERIAL_ID:
       pass.components = 1;
+      pass.filter = false;
       break;
 
     case PASS_EMISSION:
@@ -359,8 +362,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
   kfilm->light_pass_flag = 0;
   kfilm->pass_stride = 0;
   kfilm->use_light_pass = use_light_visibility;
+  kfilm->pass_aov_value_num = 0;
+  kfilm->pass_aov_color_num = 0;
 
-  bool have_cryptomatte = false, have_aov_color = false, have_aov_value = false;
+  bool have_cryptomatte = false;
 
   for (size_t i = 0; i < passes.size(); i++) {
     Pass &pass = passes[i];
@@ -495,16 +500,16 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
         kfilm->pass_sample_count = kfilm->pass_stride;
         break;
       case PASS_AOV_COLOR:
-        if (!have_aov_color) {
+        if (kfilm->pass_aov_color_num == 0) {
           kfilm->pass_aov_color = kfilm->pass_stride;
-          have_aov_color = true;
         }
+        kfilm->pass_aov_color_num++;
         break;
       case PASS_AOV_VALUE:
-        if (!have_aov_value) {
+        if (kfilm->pass_aov_value_num == 0) {
           kfilm->pass_aov_value = kfilm->pass_stride;
-          have_aov_value = true;
         }
+        kfilm->pass_aov_value_num++;
         break;
       default:
         assert(false);
diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h
index 0fe4fe89d5e..aae8fb404b0 100644
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -42,6 +42,7 @@ class Pass {
  public:
   PassType type;
   int components;
+  bool filter;
   bool exposure;
   PassType divide_type;
   string name;
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 67ed1176171..75050b66bf2 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -283,6 +283,7 @@ void ImageManager::set_osl_texture_system(void *texture_system)
 bool ImageManager::set_animation_frame_update(int frame)
 {
   if (frame != animation_frame) {
+    thread_scoped_lock device_lock(images_mutex);
     animation_frame = frame;
 
     for (size_t slot = 0; slot < images.size(); slot++) {
@@ -377,7 +378,7 @@ int ImageManager::add_image_slot(ImageLoader *loader,
   Image *img;
   size_t slot;
 
-  thread_scoped_lock device_lock(device_mutex);
+  thread_scoped_lock device_lock(images_mutex);
 
   /* Fnd existing image. */
   for (slot = 0; slot < images.size(); slot++) {
@@ -418,6 +419,7 @@ int ImageManager::add_image_slot(ImageLoader *loader,
 
 void ImageManager::add_image_user(int slot)
 {
+  thread_scoped_lock device_lock(images_mutex);
   Image *image = images[slot];
   assert(image && image->users >= 1);
 
@@ -426,6 +428,7 @@ void ImageManager::add_image_user(int slot)
 
 void ImageManager::remove_image_user(int slot)
 {
+  thread_scoped_lock device_lock(images_mutex);
   Image *image = images[slot];
   assert(image && image->users >= 1);
 
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 00ab12afd7a..2000582ce70 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -206,6 +206,7 @@ class ImageManager {
   bool has_half_images;
 
   thread_mutex device_mutex;
+  thread_mutex images_mutex;
   int animation_frame;
 
   vector<Image *> images;
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 2f9d088899e..d4beb06e57b 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -190,6 +190,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
   else {
     kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
   }
+
+  kintegrator->adaptive_step = 4;
+  kintegrator->adaptive_stop_per_sample = device->info.has_adaptive_stop_per_sample;
+
+  /* Adaptive step must be a power of two for bitwise operations to work. */
+  assert((kintegrator->adaptive_step & (kintegrator->adaptive_step - 1)) == 0);
+
   if (aa_samples > 0 && adaptive_threshold == 0.0f) {
     kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
     VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp
index d73ba3b06dd..74b8fc9e5ba 100644
--- a/intern/cycles/render/mesh_volume.cpp
+++ b/intern/cycles/render/mesh_volume.cpp
@@ -25,18 +25,18 @@
 
 CCL_NAMESPACE_BEGIN
 
-static size_t compute_voxel_index(const int3 &resolution, size_t x, size_t y, size_t z)
+const int64_t VOXEL_INDEX_NONE = -1;
+
+static int64_t compute_voxel_index(const int3 &resolution, int64_t x, int64_t y, int64_t z)
 {
-  if (x == -1 || x >= resolution.x) {
-    return -1;
+  if (x < 0 || x >= resolution.x) {
+    return VOXEL_INDEX_NONE;
   }
-
-  if (y == -1 || y >= resolution.y) {
-    return -1;
+  else if (y < 0 || y >= resolution.y) {
+    return VOXEL_INDEX_NONE;
   }
-
-  if (z == -1 || z >= resolution.z) {
-    return -1;
+  else if (z < 0 || z >= resolution.z) {
+    return VOXEL_INDEX_NONE;
   }
 
   return x + y * resolution.x + z * resolution.x * resolution.y;
@@ -184,15 +184,15 @@ VolumeMeshBuilder::VolumeMeshBuilder(VolumeParams *volume_params)
   params = volume_params;
   number_of_nodes = 0;
 
-  const size_t x = divide_up(params->resolution.x, CUBE_SIZE);
-  const size_t y = divide_up(params->resolution.y, CUBE_SIZE);
-  const size_t z = divide_up(params->resolution.z, CUBE_SIZE);
+  const int64_t x = divide_up(params->resolution.x, CUBE_SIZE);
+  const int64_t y = divide_up(params->resolution.y, CUBE_SIZE);
+  const int64_t z = divide_up(params->resolution.z, CUBE_SIZE);
 
   /* Adding 2*pad_size since we pad in both positive and negative directions
    * along the axis. */
-  const size_t px = divide_up(params->resolution.x + 2 * params->pad_size, CUBE_SIZE);
-  const size_t py = divide_up(params->resolution.y + 2 * params->pad_size, CUBE_SIZE);
-  const size_t pz = divide_up(params->resolution.z + 2 * params->pad_size, CUBE_SIZE);
+  const int64_t px = divide_up(params->resolution.x + 2 * params->pad_size, CUBE_SIZE);
+  const int64_t py = divide_up(params->resolution.y + 2 * params->pad_size, CUBE_SIZE);
+  const int64_t pz = divide_up(params->resolution.z + 2 * params->pad_size, CUBE_SIZE);
 
   res = make_int3(px, py, pz);
   pad_offset = make_int3(px - x, py - y, pz - z);
@@ -209,7 +209,10 @@ void VolumeMeshBuilder::add_node(int x, int y, int z)
 
   assert((index_x >= 0) && (index_y >= 0) && (index_z >= 0));
 
-  const size_t index = compute_voxel_index(res, index_x, index_y, index_z);
+  const int64_t index = compute_voxel_index(res, index_x, index_y, index_z);
+  if (index == VOXEL_INDEX_NONE) {
+    return;
+  }
 
   /* We already have a node here. */
   if (grid[index] == 1) {
@@ -256,7 +259,7 @@ void VolumeMeshBuilder::generate_vertices_and_quads(vector<ccl::int3> &vertices_
   for (int z = 0; z < res.z; ++z) {
     for (int y = 0; y < res.y; ++y) {
       for (int x = 0; x < res.x; ++x) {
-        size_t voxel_index = compute_voxel_index(res, x, y, z);
+        int64_t voxel_index = compute_voxel_index(res, x, y, z);
         if (grid[voxel_index] == 0) {
           continue;
         }
@@ -285,32 +288,32 @@ void VolumeMeshBuilder::generate_vertices_and_quads(vector<ccl::int3> &vertices_
          */
 
         voxel_index = compute_voxel_index(res, x - 1, y, z);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_X_MIN);
         }
 
         voxel_index = compute_voxel_index(res, x + 1, y, z);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_X_MAX);
         }
 
         voxel_index = compute_voxel_index(res, x, y - 1, z);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Y_MIN);
         }
 
         voxel_index = compute_voxel_index(res, x, y + 1, z);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Y_MAX);
         }
 
         voxel_index = compute_voxel_index(res, x, y, z - 1);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Z_MIN);
         }
 
         voxel_index = compute_voxel_index(res, x, y, z + 1);
-        if (voxel_index == -1 || grid[voxel_index] == 0) {
+        if (voxel_index == VOXEL_INDEX_NONE || grid[voxel_index] == 0) {
           create_quad(corners, vertices_is, quads, res, used_verts, QUAD_Z_MAX);
         }
       }
@@ -455,7 +458,7 @@ void GeometryManager::create_volume_mesh(Mesh *mesh, Progress &progress)
   for (int z = 0; z < resolution.z; ++z) {
     for (int y = 0; y < resolution.y; ++y) {
       for (int x = 0; x < resolution.x; ++x) {
-        size_t voxel_index = compute_voxel_index(resolution, x, y, z);
+        int64_t voxel_index = compute_voxel_index(resolution, x, y, z);
 
         for (size_t i = 0; i < voxel_grids.size(); ++i) {
           const VoxelAttributeGrid &voxel_grid = voxel_grids[i];
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index b1b30979b0e..58bcc7ccdfb 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -1110,6 +1110,7 @@ void Session::render(bool with_denoising)
   task.adaptive_sampling.use = (scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ) &&
                                scene->dscene.data.film.pass_adaptive_aux_buffer;
   task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples;
+  task.adaptive_sampling.adaptive_step = scene->dscene.data.integrator.adaptive_step;
 
   /* Acquire render tiles by default. */
   task.tile_types = RenderTile::PATH_TRACE;
author	Jacques Lucke <jacques@blender.org>	2020-04-09 12:59:24 +0300
committer	Jacques Lucke <jacques@blender.org>	2020-04-09 12:59:24 +0300
commit	dabd59ba23f877f68aaf73e79f0d58118723d9b7 (patch)
tree	e5a161e9bfe31645e711350fa071ea470673c506 /intern/cycles
parent	43f895a59247ea4058cb3f019cd4dabd9ad9b0e4 (diff)
parent	80255e67e30c4d77be64a5fa8dc68c0a55f39ebc (diff)