Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_cpu.cpp14
-rw-r--r--intern/cycles/device/device_cuda.cpp8
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp8
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h25
-rw-r--r--intern/cycles/kernel/kernel_passes.h34
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h6
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h12
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu12
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl12
9 files changed, 35 insertions, 96 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 72330b02a28..ff34f4f9ce4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -176,10 +176,10 @@ public:
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
- KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
- KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
+ KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
+ KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
@@ -563,8 +563,7 @@ public:
(float*) buffer_variance_ptr,
&task->rect.x,
task->render_buffer.pass_stride,
- task->render_buffer.denoising_data_offset,
- use_split_kernel);
+ task->render_buffer.denoising_data_offset);
}
}
return true;
@@ -587,8 +586,7 @@ public:
(float*) variance_ptr,
&task->rect.x,
task->render_buffer.pass_stride,
- task->render_buffer.denoising_data_offset,
- use_split_kernel);
+ task->render_buffer.denoising_data_offset);
}
}
return true;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index e5464dcf34e..54e012191ae 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1173,7 +1173,6 @@ public:
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
- bool use_split_variance = use_split_kernel();
void *args[] = {&task->render_buffer.samples,
&task->tiles_mem.device_pointer,
&a_ptr,
@@ -1183,8 +1182,7 @@ public:
&buffer_variance_ptr,
&task->rect,
&task->render_buffer.pass_stride,
- &task->render_buffer.denoising_data_offset,
- &use_split_variance};
+ &task->render_buffer.denoising_data_offset};
CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
cuda_assert(cuCtxSynchronize());
@@ -1209,7 +1207,6 @@ public:
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
- bool use_split_variance = use_split_kernel();
void *args[] = {&task->render_buffer.samples,
&task->tiles_mem.device_pointer,
&mean_offset,
@@ -1218,8 +1215,7 @@ public:
&variance_ptr,
&task->rect,
&task->render_buffer.pass_stride,
- &task->render_buffer.denoising_data_offset,
- &use_split_variance};
+ &task->render_buffer.denoising_data_offset};
CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
cuda_assert(cuCtxSynchronize());
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 7bdf81462b8..8095611f099 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -982,7 +982,6 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
- char split_kernel = is_split_kernel()? 1 : 0;
kernel_set_args(ckFilterDivideShadow, 0,
task->render_buffer.samples,
tiles_mem,
@@ -993,8 +992,7 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
buffer_variance_mem,
task->rect,
task->render_buffer.pass_stride,
- task->render_buffer.denoising_data_offset,
- split_kernel);
+ task->render_buffer.denoising_data_offset);
enqueue_kernel(ckFilterDivideShadow,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
@@ -1015,7 +1013,6 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
- char split_kernel = is_split_kernel()? 1 : 0;
kernel_set_args(ckFilterGetFeature, 0,
task->render_buffer.samples,
tiles_mem,
@@ -1025,8 +1022,7 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
variance_mem,
task->rect,
task->render_buffer.pass_stride,
- task->render_buffer.denoising_data_offset,
- split_kernel);
+ task->render_buffer.denoising_data_offset);
enqueue_kernel(ckFilterGetFeature,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 2aeb54a62be..eefcbfea230 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -35,8 +35,7 @@ ccl_device void kernel_filter_divide_shadow(int sample,
ccl_global float *bufferVariance,
int4 rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
@@ -57,10 +56,12 @@ ccl_device void kernel_filter_divide_shadow(int sample,
float varB = center_buffer[5];
int odd_sample = (sample+1)/2;
int even_sample = sample/2;
- if(use_split_variance) {
- varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
- varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
- }
+
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
+ varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
+
varA /= max(odd_sample - 1, 1);
varB /= max(even_sample - 1, 1);
@@ -84,8 +85,7 @@ ccl_device void kernel_filter_get_feature(int sample,
ccl_global float *mean,
ccl_global float *variance,
int4 rect, int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
@@ -97,12 +97,9 @@ ccl_device void kernel_filter_get_feature(int sample,
mean[idx] = center_buffer[m_offset] / sample;
if(sample > 1) {
- if(use_split_variance) {
- variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
- }
- else {
- variance[idx] = center_buffer[v_offset] / (sample * (sample-1));
- }
+ /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
+ * update does not work efficiently with atomics in the kernel. */
+ variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
}
else {
/* Can't compute variance with single sample, just set it very high. */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index fff7f4cfdb7..bd756185e78 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -67,18 +67,7 @@ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer
/* The online one-pass variance update that's used for the megakernel can't easily be implemented
* with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
-# ifdef __SPLIT_KERNEL__
kernel_write_pass_float(buffer+1, sample, value*value);
-# else
- if(sample == 0) {
- kernel_write_pass_float(buffer+1, sample, 0.0f);
- }
- else {
- float new_mean = buffer[0] * (1.0f / (sample + 1));
- float old_mean = (buffer[0] - value) * (1.0f / sample);
- kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean));
- }
-# endif
}
# if defined(__SPLIT_KERNEL__)
@@ -95,19 +84,7 @@ ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buff
ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value)
{
kernel_write_pass_float3_unaligned(buffer, sample, value);
-# ifdef __SPLIT_KERNEL__
kernel_write_pass_float3_unaligned(buffer+3, sample, value*value);
-# else
- if(sample == 0) {
- kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f));
- }
- else {
- float3 sum = make_float3(buffer[0], buffer[1], buffer[2]);
- float3 new_mean = sum * (1.0f / (sample + 1));
- float3 old_mean = (sum - value) * (1.0f / sample);
- kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean));
- }
-# endif
}
ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
@@ -125,18 +102,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob
kernel_write_pass_float(buffer+1, sample/2, path_total_shaded);
float value = path_total_shaded / max(path_total, 1e-7f);
-# ifdef __SPLIT_KERNEL__
kernel_write_pass_float(buffer+2, sample/2, value*value);
-# else
- if(sample < 2) {
- kernel_write_pass_float(buffer+2, sample/2, 0.0f);
- }
- else {
- float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f);
- float new_value = buffer[1] / max(buffer[0], 1e-7f);
- kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value));
- }
-# endif
}
#endif /* __DENOISING_FEATURES__ */
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 2ed713299fd..bf13ba62806 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -27,8 +27,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *bufferV,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance);
+ int buffer_denoising_offset);
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
TilesInfo *tiles,
@@ -40,8 +39,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *variance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance);
+ int buffer_denoising_offset);
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 8dc1a8d583c..2fbb0ea2bdb 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -45,8 +45,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *bufferVariance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
@@ -60,8 +59,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
bufferVariance,
load_int4(prefilter_rect),
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
#endif
}
@@ -74,8 +72,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *mean, float *variance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
@@ -86,8 +83,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
mean, variance,
load_int4(prefilter_rect),
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 009c3fde9d5..c8172355a7f 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -37,8 +37,7 @@ kernel_cuda_filter_divide_shadow(int sample,
float *bufferVariance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
@@ -53,8 +52,7 @@ kernel_cuda_filter_divide_shadow(int sample,
bufferVariance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
@@ -68,8 +66,7 @@ kernel_cuda_filter_get_feature(int sample,
float *variance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
@@ -81,8 +78,7 @@ kernel_cuda_filter_get_feature(int sample,
mean, variance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index f015ac47d8a..7a7b596a350 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -31,8 +31,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
ccl_global float *bufferVariance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- char use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + get_global_id(0);
int y = prefilter_rect.y + get_global_id(1);
@@ -47,8 +46,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
bufferVariance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
@@ -60,8 +58,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
ccl_global float *variance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- char use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + get_global_id(0);
int y = prefilter_rect.y + get_global_id(1);
@@ -73,8 +70,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
mean, variance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}