Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Werner <stefan.werner@tangent-animation.com>2020-03-05 14:05:42 +0300
committerStefan Werner <stefan.werner@tangent-animation.com>2020-03-05 14:21:38 +0300
commit51e898324de30c0985a80e5bc067358b5ccedbfc (patch)
tree5efddead1b7ca5655f1d6d2422b59e7da51fe271 /intern/cycles/kernel
parent4ccbbd308060f0330472828b317c59e054c9ee7b (diff)
Adaptive Sampling for Cycles.
This feature takes some inspiration from "RenderMan: An Advanced Path Tracing Architecture for Movie Rendering" and "A Hierarchical Automatic Stopping Condition for Monte Carlo Global Illumination" The basic principle is as follows: While samples are being added to a pixel, the adaptive sampler writes half of the samples to a separate buffer. This gives it two separate estimates of the same pixel, and by comparing their difference it estimates convergence. Once convergence drops below a given threshold, the pixel is considered done. When a pixel has not converged yet and needs more samples than the minimum, its immediate neighbors are also set to take more samples. This is done in order to more reliably detect sharp features such as caustics. A 3x3 box filter that is run periodically over the tile buffer is used for that purpose. After a tile has finished rendering, the values of all passes are scaled as if they were rendered with the full number of samples. This way, any code operating on these buffers, for example the denoiser, does not need to be changed for per-pixel sample counts. Reviewed By: brecht, #cycles Differential Revision: https://developer.blender.org/D4686
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt9
-rw-r--r--intern/cycles/kernel/kernel_adaptive_sampling.h231
-rw-r--r--intern/cycles/kernel/kernel_passes.h39
-rw-r--r--intern/cycles/kernel/kernel_path.h9
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h8
-rw-r--r--intern/cycles/kernel/kernel_types.h18
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h84
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h8
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu70
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu8
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl4
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h44
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_x.h30
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_filter_y.h29
-rw-r--r--intern/cycles/kernel/split/kernel_adaptive_stopping.h37
20 files changed, 694 insertions, 30 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 566b6e3d191..0dd0da65f82 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -36,6 +36,10 @@ set(SRC_CUDA_KERNELS
)
set(SRC_OPENCL_KERNELS
+ kernels/opencl/kernel_adaptive_stopping.cl
+ kernels/opencl/kernel_adaptive_filter_x.cl
+ kernels/opencl/kernel_adaptive_filter_y.cl
+ kernels/opencl/kernel_adaptive_adjust_samples.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
@@ -94,6 +98,7 @@ set(SRC_BVH_HEADERS
set(SRC_HEADERS
kernel_accumulate.h
+ kernel_adaptive_sampling.h
kernel_bake.h
kernel_camera.h
kernel_color.h
@@ -324,6 +329,10 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
+ split/kernel_adaptive_adjust_samples.h
+ split/kernel_adaptive_filter_x.h
+ split/kernel_adaptive_filter_y.h
+ split/kernel_adaptive_stopping.h
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
new file mode 100644
index 00000000000..502b69e4f7f
--- /dev/null
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample)
+{
+ /* TODO Stefan: Is this better in linear, sRGB or something else? */
+ float4 I = *((ccl_global float4 *)buffer);
+ float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ /* The per pixel error as seen in section 2.1 of
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+ * A small epsilon is added to the divisor to prevent division by zero. */
+ float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+ (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+ if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+ /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+ buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+ }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+ ccl_global float *buffer,
+ float sample_multiplier)
+{
+ *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+ /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+ int flag = kernel_data.film.pass_flag;
+
+ if (flag & PASSMASK(SHADOW))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_shadow) *= sample_multiplier;
+
+ if (flag & PASSMASK(MIST))
+ *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+ if (flag & PASSMASK(NORMAL))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+ if (flag & PASSMASK(UV))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+ if (flag & PASSMASK(MOTION)) {
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+ *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+ }
+
+ if (kernel_data.film.use_light_pass) {
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(EMISSION))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+ if (light_flag & PASSMASK(BACKGROUND))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+ if (light_flag & PASSMASK(AO))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_color) *= sample_multiplier;
+ }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+# define scale_float3_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale; \
+ *(buffer + offset + 3) *= scale * scale; \
+ *(buffer + offset + 4) *= scale * scale; \
+ *(buffer + offset + 5) *= scale * scale;
+
+# define scale_shadow_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale * scale;
+
+ if (kernel_data.film.pass_denoising_data) {
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+ if (kernel_data.film.pass_denoising_clean) {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+ }
+ else {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ }
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+ 1) *= sample_multiplier * sample_multiplier;
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+ if (kernel_data.film.cryptomatte_passes) {
+ int num_slots = 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+ num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+ ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+ kernel_data.film.pass_cryptomatte);
+ for (int slot = 0; slot < num_slots; slot++) {
+ id_buffer[slot].y *= sample_multiplier;
+ }
+ }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+ bool any = false;
+ bool prev = false;
+ for (int x = tile->x; x < tile->x + tile->w; ++x) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (x > tile->x && !prev) {
+ index = index - 1;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+ bool prev = false;
+ bool any = false;
+ for (int y = tile->y; y < tile->y + tile->h; ++y) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (y > tile->y && !prev) {
+ index = index - tile->stride;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 187e8340c82..33ec05c6048 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -29,7 +29,9 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
if (kernel_data.film.pass_denoising_data == 0)
return;
- buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+ buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+ DENOISING_PASS_SHADOW_B :
+ DENOISING_PASS_SHADOW_A;
path_total = ensure_finite(path_total);
path_total_shaded = ensure_finite(path_total_shaded);
@@ -386,6 +388,41 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, L);
#endif
+
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+ criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+ Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+ tiles. */
+ if (kernel_data.film.pass_adaptive_aux_buffer &&
+ kernel_data.integrator.adaptive_threshold > 0.0f) {
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+ }
+#ifdef __KERNEL_CPU__
+ if (sample > kernel_data.integrator.adaptive_min_samples &&
+ (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) {
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+#endif
+ }
+
+ /* Write the sample count as negative numbers initially to mark the samples as in progress.
+ * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+ * are scaled as if they were taken at a uniform sample count. */
+ if (kernel_data.film.pass_sample_count) {
+ /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between
+ * passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+ 0x80000000);
+#else
+ if (buffer[kernel_data.film.pass_sample_count] > 0) {
+ buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+ }
+#endif
+ kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1a0b67275a7..bdd2703a894 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -31,6 +31,7 @@
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
#include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/kernel_passes.h"
#if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -656,6 +657,14 @@ ccl_device void kernel_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* Initialize random numbers and sample ray. */
uint rng_hash;
Ray ray;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index f75e4ab4c97..0d5781fe3d1 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -523,6 +523,14 @@ ccl_device void kernel_branched_path_trace(
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* initialize random numbers and ray */
uint rng_hash;
Ray ray;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 88c2d0d3196..c5be93e2cda 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,6 +63,11 @@ CCL_NAMESPACE_BEGIN
#define VOLUME_STACK_SIZE 32
+/* Adaptive sampling constants */
+#define ADAPTIVE_SAMPLE_STEP 4
+static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0,
+ "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work");
+
/* Split kernel constants */
#define WORK_POOL_SIZE_GPU 64
#define WORK_POOL_SIZE_CPU 1
@@ -374,6 +379,8 @@ typedef enum PassType {
PASS_CRYPTOMATTE,
PASS_AOV_COLOR,
PASS_AOV_VALUE,
+ PASS_ADAPTIVE_AUX_BUFFER,
+ PASS_SAMPLE_COUNT,
PASS_CATEGORY_MAIN_END = 31,
PASS_MIST = 32,
@@ -1223,6 +1230,9 @@ typedef struct KernelFilm {
int cryptomatte_depth;
int pass_cryptomatte;
+ int pass_adaptive_aux_buffer;
+ int pass_sample_count;
+
int pass_mist;
float mist_start;
float mist_inv_depth;
@@ -1256,6 +1266,8 @@ typedef struct KernelFilm {
int display_divide_pass_stride;
int use_display_exposure;
int use_display_pass_alpha;
+
+ int pad3, pad4, pad5;
} KernelFilm;
static_assert_align(KernelFilm, 16);
@@ -1337,6 +1349,8 @@ typedef struct KernelIntegrator {
/* sampler */
int sampling_pattern;
int aa_samples;
+ int adaptive_min_samples;
+ float adaptive_threshold;
/* volume render */
int use_volumes;
@@ -1348,7 +1362,7 @@ typedef struct KernelIntegrator {
int max_closures;
- int pad1;
+ int pad1, pad2, pad3;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
@@ -1662,7 +1676,7 @@ typedef struct WorkTile {
uint start_sample;
uint num_samples;
- uint offset;
+ int offset;
uint stride;
ccl_global float *buffer;
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 799561a7466..c642d227e4b 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -23,17 +23,41 @@ CCL_NAMESPACE_BEGIN
* Utility functions for work stealing
*/
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ uint global_work_index,
+ ccl_private uint *x,
+ ccl_private uint *y,
+ ccl_private uint *sample)
+{
+#ifdef __KERNEL_CUDA__
+ /* Keeping threads for the same pixel together improves performance on CUDA. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#else /* __KERNEL_CUDA__ */
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+#endif /* __KERNEL_CUDA__ */
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
+
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
+}
+
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef __SPLIT_KERNEL__
/* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
/* With a small amount of work there may be more threads than work due to
* rounding up of global size, stop such threads immediately. */
@@ -56,31 +80,37 @@ ccl_device bool get_next_work(KernelGlobals *kg,
/* Test if all work for this pool is done. */
return (*global_work_index < total_work_size);
}
-#endif
-/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
- uint global_work_index,
- ccl_private uint *x,
- ccl_private uint *y,
- ccl_private uint *sample)
+ccl_device bool get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
-#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
- uint y_offset = pixel_offset / tile->w;
- uint x_offset = pixel_offset - y_offset * tile->w;
-
- *x = tile->x + x_offset;
- *y = tile->y + y_offset;
- *sample = tile->start_sample + sample_offset;
+ bool got_work = false;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ do {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ if (got_work) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ break;
+ }
+ }
+ } while (got_work);
+ }
+ else {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ }
+ return got_work;
}
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index f5d981fb71a..683f4b88d79 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -89,5 +89,9 @@ DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9ca3f46b5b6..96b2bf11132 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -58,6 +58,10 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
+# include "kernel/split/kernel_adaptive_stopping.h"
+# include "kernel/split/kernel_adaptive_filter_x.h"
+# include "kernel/split/kernel_adaptive_filter_y.h"
+# include "kernel/split/kernel_adaptive_adjust_samples.h"
# endif /* __SPLIT_KERNEL__ */
#else
# define STUB_ASSERT(arch, name) \
@@ -204,6 +208,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index af311027f78..c4c810c6a82 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -33,6 +33,7 @@
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
#include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"
/* kernels */
extern "C" __global__ void
@@ -83,6 +84,75 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(&kg, buffer, sample);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->h) {
+ int y = tile->y + ccl_global_id(0);
+ kernel_do_adaptive_filter_x(&kg, y, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->w) {
+ int x = tile->x + ccl_global_id(0);
+ kernel_do_adaptive_filter_y(&kg, x, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+ if(kernel_data.film.pass_adaptive_aux_buffer) {
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+ if(sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 43b3d0aa0e6..95ad7599cf1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -43,6 +43,10 @@
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
#include "kernel/kernel_film.h"
@@ -121,6 +125,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
new file mode 100644
index 00000000000..ebdb99d4730
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
new file mode 100644
index 00000000000..76d82d4184e
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
new file mode 100644
index 00000000000..1e6d15ba0f2
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
new file mode 100644
index 00000000000..51de0059667
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
index 6041f13b52b..c3b7b09460a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
@@ -28,3 +28,7 @@
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff --git a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
new file mode 100644
index 00000000000..60ebf415970
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
new file mode 100644
index 00000000000..93f41f7ced4
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int y = kernel_split_params.tile.y + pixel_index;
+ kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
new file mode 100644
index 00000000000..eca53d079ec
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index;
+ kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+ }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
new file mode 100644
index 00000000000..c8eb1ebd705
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ kernel_do_adaptive_stopping(kg,
+ buffer,
+ kernel_split_params.tile.start_sample +
+ kernel_split_params.tile.num_samples - 1);
+ }
+}
+CCL_NAMESPACE_END