Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukasstockner97>2021-01-11 22:42:47 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2021-01-11 23:04:49 +0300
commit688e5c6d3895a8f6b5aec06977c9dcb4b00e0a76 (patch)
tree85fcba0cb6f02e427c97e7af77616925970f8544 /intern/cycles/device
parentb70f4a265a22ceb2c31dd54002a318d0f949e294 (diff)
Fix T82351: Cycles: Tile stealing glitches with adaptive sampling
In my testing this works, but it requires me to remove the min(start_sample...) part in the adaptive sampling kernel, and I assume there's a reason why it was there? Reviewed By: brecht Maniphest Tasks: T82351 Differential Revision: https://developer.blender.org/D9445
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp14
-rw-r--r--intern/cycles/device/device_cpu.cpp5
-rw-r--r--intern/cycles/device/device_optix.cpp14
-rw-r--r--intern/cycles/device/device_split_kernel.cpp4
-rw-r--r--intern/cycles/device/device_task.cpp41
-rw-r--r--intern/cycles/device/device_task.h3
6 files changed, 31 insertions, 50 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 48151365e5d..307d1469ba5 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -1929,18 +1929,19 @@ void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkT
}
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
- if (task.adaptive_sampling.use) {
- step_samples = task.adaptive_sampling.align_static_samples(step_samples);
- }
/* Render all samples. */
int start_sample = rtile.start_sample;
int end_sample = rtile.start_sample + rtile.num_samples;
- for (int sample = start_sample; sample < end_sample; sample += step_samples) {
+ for (int sample = start_sample; sample < end_sample;) {
/* Setup and copy work tile to device. */
wtile->start_sample = sample;
- wtile->num_samples = min(step_samples, end_sample - sample);
+ wtile->num_samples = step_samples;
+ if (task.adaptive_sampling.use) {
+ wtile->num_samples = task.adaptive_sampling.align_samples(sample, step_samples);
+ }
+ wtile->num_samples = min(wtile->num_samples, end_sample - sample);
work_tiles.copy_to_device();
CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
@@ -1962,7 +1963,8 @@ void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkT
cuda_assert(cuCtxSynchronize());
/* Update progress. */
- rtile.sample = sample + wtile->num_samples;
+ sample += wtile->num_samples;
+ rtile.sample = sample;
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
if (task.get_cancel()) {
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index fea4fc53d1f..bd00d4db775 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -920,8 +920,7 @@ class CPUDevice : public Device {
ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
- float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f,
- buffer[kernel_data.film.pass_sample_count]);
+ float sample_multiplier = tile.sample / buffer[kernel_data.film.pass_sample_count];
if (sample_multiplier != 1.0f) {
kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
}
@@ -997,7 +996,7 @@ class CPUDevice : public Device {
coverage.finalize();
}
- if (task.adaptive_sampling.use) {
+ if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
adaptive_sampling_post(tile, kg);
}
}
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index f19289f966e..f04113635f3 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -760,9 +760,6 @@ class OptiXDevice : public CUDADevice {
const int end_sample = rtile.start_sample + rtile.num_samples;
// Keep this number reasonable to avoid running into TDRs
int step_samples = (info.display_device ? 8 : 32);
- if (task.adaptive_sampling.use) {
- step_samples = task.adaptive_sampling.align_static_samples(step_samples);
- }
// Offset into launch params buffer so that streams use separate data
device_ptr launch_params_ptr = launch_params.device_pointer +
@@ -770,10 +767,14 @@ class OptiXDevice : public CUDADevice {
const CUDAContextScope scope(cuContext);
- for (int sample = rtile.start_sample; sample < end_sample; sample += step_samples) {
+ for (int sample = rtile.start_sample; sample < end_sample;) {
// Copy work tile information to device
- wtile.num_samples = min(step_samples, end_sample - sample);
wtile.start_sample = sample;
+ wtile.num_samples = step_samples;
+ if (task.adaptive_sampling.use) {
+ wtile.num_samples = task.adaptive_sampling.align_samples(sample, step_samples);
+ }
+ wtile.num_samples = min(wtile.num_samples, end_sample - sample);
device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
check_result_cuda(
cuMemcpyHtoDAsync(d_wtile_ptr, &wtile, sizeof(wtile), cuda_stream[thread_index]));
@@ -815,7 +816,8 @@ class OptiXDevice : public CUDADevice {
check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
// Update current sample, so it is displayed correctly
- rtile.sample = wtile.start_sample + wtile.num_samples;
+ sample += wtile.num_samples;
+ rtile.sample = sample;
// Update task progress after the kernel completed rendering
task.update_progress(&rtile, wtile.w * wtile.h * wtile.num_samples);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 4c288f60c16..9889f688aaa 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -223,8 +223,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask &task,
subtile.num_samples = samples_per_second;
if (task.adaptive_sampling.use) {
- subtile.num_samples = task.adaptive_sampling.align_dynamic_samples(subtile.start_sample,
- subtile.num_samples);
+ subtile.num_samples = task.adaptive_sampling.align_samples(subtile.start_sample,
+ subtile.num_samples);
}
/* Don't go beyond requested number of samples. */
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 6e7c184c6c9..2cc9c6a985f 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -144,41 +144,20 @@ AdaptiveSampling::AdaptiveSampling() : use(true), adaptive_step(0), min_samples(
}
/* Render samples in steps that align with the adaptive filtering. */
-int AdaptiveSampling::align_static_samples(int samples) const
+int AdaptiveSampling::align_samples(int sample, int num_samples) const
{
- if (samples > adaptive_step) {
- /* Make multiple of adaptive_step. */
- while (samples % adaptive_step != 0) {
- samples--;
- }
- }
- else if (samples < adaptive_step) {
- /* Make divisor of adaptive_step. */
- while (adaptive_step % samples != 0) {
- samples--;
- }
- }
-
- return max(samples, 1);
-}
+ int end_sample = sample + num_samples;
-/* Render samples in steps that align with the adaptive filtering, with the
- * suggested number of samples dynamically changing. */
-int AdaptiveSampling::align_dynamic_samples(int offset, int samples) const
-{
- /* Round so that we end up on multiples of adaptive_samples. */
- samples += offset;
+ /* Round down end sample to the nearest sample that needs filtering. */
+ end_sample &= ~(adaptive_step - 1);
- if (samples > adaptive_step) {
- /* Make multiple of adaptive_step. */
- while (samples % adaptive_step != 0) {
- samples--;
- }
+ if (end_sample <= sample) {
+ /* In order to reach the next sample that needs filtering, we'd need
+ * to increase num_samples. We don't do that in this function, so
+ * just keep it as is and don't filter this time around. */
+ return num_samples;
}
-
- samples -= offset;
-
- return max(samples, 1);
+ return end_sample - sample;
}
bool AdaptiveSampling::need_filter(int sample) const
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index f819f84eb43..f9b47c59e95 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -117,8 +117,7 @@ class AdaptiveSampling {
public:
AdaptiveSampling();
- int align_static_samples(int samples) const;
- int align_dynamic_samples(int offset, int samples) const;
+ int align_samples(int sample, int num_samples) const;
bool need_filter(int sample) const;
bool use;