Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2020-04-07 20:43:51 +0300
committerBrecht Van Lommel <brecht@blender.org>2020-04-07 21:29:48 +0300
commit53981c7fb6fdd9973e40f81f867f25d10540c1d1 (patch)
tree84572c33af5bb0c343f6a2928b135041bb8cb3b3 /intern
parent7b4b07a7ddc874efd11b3932f152c7136d720f9c (diff)
Cleanup: refactor adaptive sampling to more easily change some parameters
No functional changes yet, this is work towards making CPU and GPU results match more closely.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device.cpp2
-rw-r--r--intern/cycles/device/device.h14
-rw-r--r--intern/cycles/device/device_cpu.cpp22
-rw-r--r--intern/cycles/device/device_cuda.cpp1
-rw-r--r--intern/cycles/device/device_network.cpp1
-rw-r--r--intern/cycles/device/device_opencl.cpp1
-rw-r--r--intern/cycles/device/device_task.cpp3
-rw-r--r--intern/cycles/kernel/kernel_passes.h10
-rw-r--r--intern/cycles/kernel/kernel_types.h9
-rw-r--r--intern/cycles/render/integrator.cpp7
-rw-r--r--intern/cycles/render/session.cpp1
11 files changed, 50 insertions, 21 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index d94d409175b..dfbf57e8b88 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -597,6 +597,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_half_images = true;
info.has_volume_decoupled = true;
+ info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_profiling = true;
@@ -639,6 +640,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
+ info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index a98ac171709..c55dfb3a83b 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -75,12 +75,13 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
- bool display_device; /* GPU is used as a display device. */
- bool has_half_images; /* Support half-float textures. */
- bool has_volume_decoupled; /* Decoupled volume shading. */
- bool has_osl; /* Support Open Shading Language. */
- bool use_split_kernel; /* Use split or mega kernel. */
- bool has_profiling; /* Supports runtime collection of profiling info. */
+ bool display_device; /* GPU is used as a display device. */
+ bool has_half_images; /* Support half-float textures. */
+ bool has_volume_decoupled; /* Decoupled volume shading. */
+ bool has_adaptive_stop_per_sample; /* Per-sample adaptive sampling stopping. */
+ bool has_osl; /* Support Open Shading Language. */
+ bool use_split_kernel; /* Use split or mega kernel. */
+ bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
vector<DeviceInfo> denoising_devices;
@@ -94,6 +95,7 @@ class DeviceInfo {
display_device = false;
has_half_images = false;
has_volume_decoupled = false;
+ has_adaptive_stop_per_sample = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 57e8523e02a..c701c14318f 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -839,7 +839,7 @@ class CPUDevice : public Device {
return true;
}
- bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile)
+ bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample)
{
WorkTile wtile;
wtile.x = tile.x;
@@ -850,11 +850,24 @@ class CPUDevice : public Device {
wtile.stride = tile.stride;
wtile.buffer = (float *)tile.buffer;
+ /* For CPU we do adaptive stopping per sample so we can stop earlier, but
+ * for combined CPU + GPU rendering we match the GPU and do it per tile
+ * after a given number of sample steps. */
+ if (!kernel_data.integrator.adaptive_stop_per_sample) {
+ for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
+ for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
+ const int index = wtile.offset + x + y * wtile.stride;
+ float *buffer = wtile.buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+ }
+ }
+
bool any = false;
- for (int y = tile.y; y < tile.y + tile.h; ++y) {
+ for (int y = wtile.y; y < wtile.y + wtile.h; ++y) {
any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
}
- for (int x = tile.x; x < tile.x + tile.w; ++x) {
+ for (int x = wtile.x; x < wtile.x + wtile.w; ++x) {
any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
}
return (!any);
@@ -917,7 +930,7 @@ class CPUDevice : public Device {
tile.sample = sample + 1;
if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
- const bool stop = adaptive_sampling_filter(kg, tile);
+ const bool stop = adaptive_sampling_filter(kg, tile, sample);
if (stop) {
const int num_progress_samples = end_sample - sample;
tile.sample = end_sample;
@@ -1327,6 +1340,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.id = "CPU";
info.num = 0;
info.has_volume_decoupled = true;
+ info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_half_images = true;
info.has_profiling = true;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 9a703b45c0a..4a53fcd151d 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -129,6 +129,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 2742cbf53aa..0933d51f321 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -311,6 +311,7 @@ void device_network_info(vector<DeviceInfo> &devices)
/* todo: get this info from device */
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
info.has_osl = false;
devices.push_back(info);
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 891b73351a0..8a0b128697f 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -119,6 +119,7 @@ void device_opencl_info(vector<DeviceInfo> &devices)
info.display_device = true;
info.use_split_kernel = true;
info.has_volume_decoupled = false;
+ info.has_adaptive_stop_per_sample = false;
info.id = id;
/* Check OpenCL extensions */
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index c36b1344c3b..d2447eae867 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -138,8 +138,7 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
/* Adaptive Sampling */
-AdaptiveSampling::AdaptiveSampling()
- : use(true), adaptive_step(ADAPTIVE_SAMPLE_STEP), min_samples(0)
+AdaptiveSampling::AdaptiveSampling() : use(true), adaptive_step(0), min_samples(0)
{
}
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 98136bc7047..7437e540a1f 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -403,9 +403,13 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
}
#ifdef __KERNEL_CPU__
- if (sample > kernel_data.integrator.adaptive_min_samples &&
- (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) {
- kernel_do_adaptive_stopping(kg, buffer, sample);
+ if ((sample > kernel_data.integrator.adaptive_min_samples) &&
+ kernel_data.integrator.adaptive_stop_per_sample) {
+ const int step = kernel_data.integrator.adaptive_step;
+
+ if ((sample & (step - 1)) == (step - 1)) {
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
}
#endif
}
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 44c936da626..a1f8c35348d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,11 +63,6 @@ CCL_NAMESPACE_BEGIN
#define VOLUME_STACK_SIZE 32
-/* Adaptive sampling constants */
-#define ADAPTIVE_SAMPLE_STEP 4
-static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0,
- "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work");
-
/* Split kernel constants */
#define WORK_POOL_SIZE_GPU 64
#define WORK_POOL_SIZE_CPU 1
@@ -1350,6 +1345,8 @@ typedef struct KernelIntegrator {
int sampling_pattern;
int aa_samples;
int adaptive_min_samples;
+ int adaptive_step;
+ int adaptive_stop_per_sample;
float adaptive_threshold;
/* volume render */
@@ -1362,7 +1359,7 @@ typedef struct KernelIntegrator {
int max_closures;
- int pad1, pad2, pad3;
+ int pad1;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 2f9d088899e..d4beb06e57b 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -190,6 +190,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
else {
kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
}
+
+ kintegrator->adaptive_step = 4;
+ kintegrator->adaptive_stop_per_sample = device->info.has_adaptive_stop_per_sample;
+
+ /* Adaptive step must be a power of two for bitwise operations to work. */
+ assert((kintegrator->adaptive_step & (kintegrator->adaptive_step - 1)) == 0);
+
if (aa_samples > 0 && adaptive_threshold == 0.0f) {
kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index b1b30979b0e..58bcc7ccdfb 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -1110,6 +1110,7 @@ void Session::render(bool with_denoising)
task.adaptive_sampling.use = (scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ) &&
scene->dscene.data.film.pass_adaptive_aux_buffer;
task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples;
+ task.adaptive_sampling.adaptive_step = scene->dscene.data.integrator.adaptive_step;
/* Acquire render tiles by default. */
task.tile_types = RenderTile::PATH_TRACE;