Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/filter')
-rw-r--r--intern/cycles/filter/filter_features.h8
-rw-r--r--intern/cycles/filter/filter_nlm_cpu.h12
-rw-r--r--intern/cycles/filter/filter_nlm_gpu.h9
-rw-r--r--intern/cycles/filter/filter_prefilter.h22
-rw-r--r--intern/cycles/filter/filter_reconstruction.h28
-rw-r--r--intern/cycles/filter/kernels/cpu/filter_cpu.h8
-rw-r--r--intern/cycles/filter/kernels/cpu/filter_cpu_impl.h23
-rw-r--r--intern/cycles/filter/kernels/cuda/filter.cu33
8 files changed, 88 insertions, 55 deletions
diff --git a/intern/cycles/filter/filter_features.h b/intern/cycles/filter/filter_features.h
index 07c69ed7081..96b2db960d1 100644
--- a/intern/cycles/filter/filter_features.h
+++ b/intern/cycles/filter/filter_features.h
@@ -117,14 +117,14 @@ ccl_device_inline void filter_calculate_scale(float *scale)
scale[7] = 1.0f/max(sqrtf(scale[7]), 0.01f); //AlbedoB
}
-ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int channel, int pass_stride)
+ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int pass_stride)
{
- return make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2));
+ return make_float3(ccl_get_feature(0), ccl_get_feature(1), ccl_get_feature(2));
}
-ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int channel, int pass_stride)
+ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int pass_stride)
{
- return average(make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2)));
+ return average(make_float3(ccl_get_feature(0), ccl_get_feature(1), ccl_get_feature(2)));
}
ccl_device_inline bool filter_firefly_rejection(float3 pixel_color, float pixel_variance, float3 center_color, float sqrt_center_variance)
diff --git a/intern/cycles/filter/filter_nlm_cpu.h b/intern/cycles/filter/filter_nlm_cpu.h
index d66a743a437..3b03865a7f5 100644
--- a/intern/cycles/filter/filter_nlm_cpu.h
+++ b/intern/cycles/filter/filter_nlm_cpu.h
@@ -112,10 +112,14 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, float ccl
ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
float ccl_readonly_ptr differenceImage,
float ccl_readonly_ptr buffer,
- int color_pass, int variance_pass,
- float *transform, int *rank,
- float *XtWX, float3 *XtWY,
- int4 rect, int4 filter_rect,
+ float *color_pass,
+ float *variance_pass,
+ float *transform,
+ int *rank,
+ float *XtWX,
+ float3 *XtWY,
+ int4 rect,
+ int4 filter_rect,
int w, int h, int f)
{
/* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
diff --git a/intern/cycles/filter/filter_nlm_gpu.h b/intern/cycles/filter/filter_nlm_gpu.h
index c904032a7cc..195fa15ed9c 100644
--- a/intern/cycles/filter/filter_nlm_gpu.h
+++ b/intern/cycles/filter/filter_nlm_gpu.h
@@ -84,11 +84,14 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
int dx, int dy,
float ccl_readonly_ptr differenceImage,
float ccl_readonly_ptr buffer,
- int color_pass, int variance_pass,
+ float *color_pass,
+ float *variance_pass,
float ccl_readonly_ptr transform,
int *rank,
- float *XtWX, float3 *XtWY,
- int4 rect, int4 filter_rect,
+ float *XtWX,
+ float3 *XtWY,
+ int4 rect,
+ int4 filter_rect,
int w, int h, int f)
{
int y = fy + filter_rect.y;
diff --git a/intern/cycles/filter/filter_prefilter.h b/intern/cycles/filter/filter_prefilter.h
index 58f58a86e7b..b2eeea28fd8 100644
--- a/intern/cycles/filter/filter_prefilter.h
+++ b/intern/cycles/filter/filter_prefilter.h
@@ -25,14 +25,19 @@ CCL_NAMESPACE_BEGIN
* sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves)
* bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
*/
-ccl_device void kernel_filter_divide_shadow(int sample, float **buffers,
+ccl_device void kernel_filter_divide_shadow(int sample,
+ float **buffers,
int x, int y,
int *tile_x, int *tile_y,
int *offset, int *stride,
- float *unfiltered, float *sampleVariance,
- float *sampleVarianceV, float *bufferVariance,
- int4 rect, int buffer_pass_stride,
- int buffer_denoising_offset, int num_frames,
+ float *unfilteredA,
+ float *unfilteredB,
+ float *sampleVariance,
+ float *sampleVarianceV,
+ float *bufferVariance,
+ int4 rect,
+ int buffer_pass_stride,
+ int buffer_denoising_offset,
bool use_gradients)
{
int xtile = (x < tile_x[1])? 0: ((x < tile_x[2])? 1: 2);
@@ -47,13 +52,12 @@ ccl_device void kernel_filter_divide_shadow(int sample, float **buffers,
int buffer_w = align_up(rect.z - rect.x, 4);
int idx = (y-rect.y)*buffer_w + (x - rect.x);
- int Bofs = (rect.w - rect.y)*buffer_w*num_frames;
- unfiltered[idx] = center_buffer[15] / max(center_buffer[14], 1e-7f);
- unfiltered[idx+Bofs] = center_buffer[18] / max(center_buffer[17], 1e-7f);
+ unfilteredA[idx] = center_buffer[15] / max(center_buffer[14], 1e-7f);
+ unfilteredB[idx] = center_buffer[18] / max(center_buffer[17], 1e-7f);
float varFac = 1.0f / (sample * (sample-1));
sampleVariance[idx] = (center_buffer[16] + center_buffer[19]) * varFac;
sampleVarianceV[idx] = 0.5f * (center_buffer[16] - center_buffer[19]) * (center_buffer[16] - center_buffer[19]) * varFac * varFac;
- bufferVariance[idx] = 0.5f * (unfiltered[idx] - unfiltered[idx+Bofs]) * (unfiltered[idx] - unfiltered[idx+Bofs]);
+ bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
}
/* Load a regular feature from the render buffers into the denoise buffer.
diff --git a/intern/cycles/filter/filter_reconstruction.h b/intern/cycles/filter/filter_reconstruction.h
index 15e84b5d054..a308964fb4f 100644
--- a/intern/cycles/filter/filter_reconstruction.h
+++ b/intern/cycles/filter/filter_reconstruction.h
@@ -18,17 +18,21 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
int storage_stride,
- int dx, int dy, int w, int h,
+ int dx, int dy,
+ int w, int h,
float ccl_readonly_ptr buffer,
- int color_pass, int variance_pass,
+ float *color_pass,
+ float *variance_pass,
float ccl_readonly_ptr transform,
- int *rank, float weight,
- float *XtWX, float3 *XtWY)
+ int *rank,
+ float weight,
+ float *XtWX,
+ float3 *XtWY)
{
const int pass_stride = w*h;
- float ccl_readonly_ptr p_buffer = buffer + y*w + x;
- float ccl_readonly_ptr q_buffer = buffer + (y+dy)*w + (x+dx);
+ int p_offset = y *w + x;
+ int q_offset = (y+dy)*w + (x+dx);
#ifdef __KERNEL_CPU__
const int stride = 1;
@@ -37,21 +41,21 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
const int stride = storage_stride;
#endif
- float3 p_color = filter_get_pixel_color(p_buffer, color_pass, pass_stride);
- float3 q_color = filter_get_pixel_color(q_buffer, color_pass, pass_stride);
+ float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride);
+ float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride);
- float p_std_dev = sqrtf(filter_get_pixel_variance(p_buffer, variance_pass, pass_stride));
- float q_std_dev = sqrtf(filter_get_pixel_variance(q_buffer, variance_pass, pass_stride));
+ float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
+ float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) {
return;
}
float feature_means[DENOISE_FEATURES], features[DENOISE_FEATURES];
- filter_get_features(make_int3(x, y, 0), p_buffer, feature_means, NULL, pass_stride);
+ filter_get_features(make_int3(x, y, 0), buffer + p_offset, feature_means, NULL, pass_stride);
float design_row[DENOISE_FEATURES+1];
- filter_get_design_row_transform(make_int3(x+dx, y+dy, 0), q_buffer, feature_means, pass_stride, features, *rank, design_row, transform, stride);
+ filter_get_design_row_transform(make_int3(x+dx, y+dy, 0), buffer + q_offset, feature_means, pass_stride, features, *rank, design_row, transform, stride);
math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride);
math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
diff --git a/intern/cycles/filter/kernels/cpu/filter_cpu.h b/intern/cycles/filter/kernels/cpu/filter_cpu.h
index b26e12c1245..6a0b58b214c 100644
--- a/intern/cycles/filter/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/filter/kernels/cpu/filter_cpu.h
@@ -24,14 +24,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
int *tile_y,
int *offset,
int *stride,
- float *unfiltered,
+ float *unfilteredA,
+ float *unfilteredB,
float *sampleV,
float *sampleVV,
float *bufferV,
int* prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset,
- int num_frames,
bool use_gradients);
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
@@ -119,8 +119,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *differenceImage,
float *buffer,
- int color_pass,
- int variance_pass,
+ float *color_pass,
+ float *variance_pass,
float *transform,
int *rank,
float *XtWX,
diff --git a/intern/cycles/filter/kernels/cpu/filter_cpu_impl.h b/intern/cycles/filter/kernels/cpu/filter_cpu_impl.h
index 79697e4d48f..586c30cfa69 100644
--- a/intern/cycles/filter/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/filter/kernels/cpu/filter_cpu_impl.h
@@ -42,11 +42,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
int *tile_y,
int *offset,
int *stride,
- float *unfiltered, float *sampleVariance, float *sampleVarianceV, float *bufferVariance,
+ float *unfilteredA,
+ float *unfilteredB,
+ float *sampleVariance,
+ float *sampleVarianceV,
+ float *bufferVariance,
int* prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset,
- int num_frames,
bool use_gradients)
{
#ifdef KERNEL_STUB
@@ -55,10 +58,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
kernel_filter_divide_shadow(sample, buffers,
x, y, tile_x, tile_y,
offset, stride,
- unfiltered, sampleVariance,
- sampleVarianceV, bufferVariance,
- load_int4(prefilter_rect), buffer_pass_stride,
- buffer_denoising_offset, num_frames,
+ unfilteredA,
+ unfilteredB,
+ sampleVariance,
+ sampleVarianceV,
+ bufferVariance,
+ load_int4(prefilter_rect),
+ buffer_pass_stride,
+ buffer_denoising_offset,
use_gradients);
#endif
}
@@ -213,8 +220,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *differenceImage,
float *buffer,
- int color_pass,
- int variance_pass,
+ float *color_pass,
+ float *variance_pass,
float *transform,
int *rank,
float *XtWX,
diff --git a/intern/cycles/filter/kernels/cuda/filter.cu b/intern/cycles/filter/kernels/cuda/filter.cu
index 3dabafddee8..c62953c1fcb 100644
--- a/intern/cycles/filter/kernels/cuda/filter.cu
+++ b/intern/cycles/filter/kernels/cuda/filter.cu
@@ -31,10 +31,14 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_divide_shadow(int sample, float* buffers,
int4 buffer_rect,
int offset, int stride,
- float *unfiltered, float *sampleVariance,
- float *sampleVarianceV, float *bufferVariance,
- int4 prefilter_rect, int buffer_pass_stride,
- int buffer_denoising_offset, int num_frames,
+ float *unfilteredA,
+ float *unfilteredB,
+ float *sampleVariance,
+ float *sampleVarianceV,
+ float *bufferVariance,
+ int4 prefilter_rect,
+ int buffer_pass_stride,
+ int buffer_denoising_offset,
bool use_gradients)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
@@ -48,10 +52,14 @@ kernel_cuda_filter_divide_shadow(int sample, float* buffers,
kernel_filter_divide_shadow(sample, tile_buffers,
x, y, tile_x, tile_y,
tile_offset, tile_stride,
- unfiltered, sampleVariance,
- sampleVarianceV, bufferVariance,
- prefilter_rect, buffer_pass_stride,
- buffer_denoising_offset, num_frames,
+ unfilteredA,
+ unfilteredB,
+ sampleVariance,
+ sampleVarianceV,
+ bufferVariance,
+ prefilter_rect,
+ buffer_pass_stride,
+ buffer_denoising_offset,
use_gradients);
}
}
@@ -198,11 +206,14 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
float ccl_readonly_ptr differenceImage,
float ccl_readonly_ptr buffer,
- int color_pass, int variance_pass,
+ float *color_pass,
+ float *variance_pass,
float const* __restrict__ transform,
int *rank,
- float *XtWX, float3 *XtWY,
- int4 rect, int4 filter_rect,
+ float *XtWX,
+ float3 *XtWY,
+ int4 rect,
+ int4 filter_rect,
int w, int h, int f) {
int x = blockDim.x*blockIdx.x + threadIdx.x + max(0, rect.x-filter_rect.x);
int y = blockDim.y*blockIdx.y + threadIdx.y + max(0, rect.y-filter_rect.y);