Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h9
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h19
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu25
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl20
4 files changed, 60 insertions, 13 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 08333c7a455..02c85562db8 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -68,6 +68,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
int r);
void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+ TileInfo *tiles,
int x,
int y,
int storage_ofs,
@@ -75,6 +76,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
int *rank,
int* rect,
int pass_stride,
+ int frame_stride,
+ bool use_time,
int radius,
float pca_threshold);
@@ -87,6 +90,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
int* rect,
int stride,
int channel_offset,
+ int frame_offset,
float a,
float k_2);
@@ -116,6 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
+ int t,
float *difference_image,
float *buffer,
float *transform,
@@ -126,7 +131,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int *filter_window,
int stride,
int f,
- int pass_stride);
+ int pass_stride,
+ int frame_offset,
+ bool use_time);
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
float *accum_image,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index b792367e3ab..c29505880cb 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -135,6 +135,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
}
void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+ TileInfo *tile_info,
int x,
int y,
int storage_ofs,
@@ -142,6 +143,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
int *rank,
int* prefilter_rect,
int pass_stride,
+ int frame_stride,
+ bool use_time,
int radius,
float pca_threshold)
{
@@ -151,9 +154,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
rank += storage_ofs;
transform += storage_ofs*TRANSFORM_SIZE;
kernel_filter_construct_transform(buffer,
+ tile_info,
x, y,
load_int4(prefilter_rect),
pass_stride,
+ frame_stride,
+ use_time,
transform,
rank,
radius,
@@ -170,6 +176,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
int *rect,
int stride,
int channel_offset,
+ int frame_offset,
float a,
float k_2)
{
@@ -184,6 +191,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
load_int4(rect),
stride,
channel_offset,
+ frame_offset,
a, k_2);
#endif
}
@@ -243,6 +251,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
+ int t,
float *difference_image,
float *buffer,
float *transform,
@@ -253,12 +262,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int *filter_window,
int stride,
int f,
- int pass_stride)
+ int pass_stride,
+ int frame_offset,
+ bool use_time)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy,
+ kernel_filter_nlm_construct_gramian(dx, dy, t,
difference_image,
buffer,
transform, rank,
@@ -266,7 +277,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
load_int4(rect),
load_int4(filter_window),
stride, f,
- pass_stride);
+ pass_stride,
+ frame_offset,
+ use_time);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 3b51bb41aed..5b552b01413 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -29,7 +29,7 @@
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_divide_shadow(int sample,
- TileInfo *tile_info,
+ CCL_FILTER_TILE_INFO,
float *unfilteredA,
float *unfilteredB,
float *sampleVariance,
@@ -59,7 +59,7 @@ kernel_cuda_filter_divide_shadow(int sample,
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_get_feature(int sample,
- TileInfo *tile_info,
+ CCL_FILTER_TILE_INFO,
int m_offset,
int v_offset,
float *mean,
@@ -138,10 +138,12 @@ kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_construct_transform(float const* __restrict__ buffer,
+ CCL_FILTER_TILE_INFO,
float *transform, int *rank,
int4 filter_area, int4 rect,
int radius, float pca_threshold,
- int pass_stride)
+ int pass_stride, int frame_stride,
+ bool use_time)
{
int x = blockDim.x*blockIdx.x + threadIdx.x;
int y = blockDim.y*blockIdx.y + threadIdx.y;
@@ -149,8 +151,11 @@ kernel_cuda_filter_construct_transform(float const* __restrict__ buffer,
int *l_rank = rank + y*filter_area.z + x;
float *l_transform = transform + y*filter_area.z + x;
kernel_filter_construct_transform(buffer,
+ tile_info,
x + filter_area.x, y + filter_area.y,
- rect, pass_stride,
+ rect,
+ pass_stride, frame_stride,
+ use_time,
l_transform, l_rank,
radius, pca_threshold,
filter_area.z*filter_area.w,
@@ -170,6 +175,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
int pass_stride,
int r,
int channel_offset,
+ int frame_offset,
float a,
float k_2)
{
@@ -183,6 +189,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
difference_image + ofs,
rect, stride,
channel_offset,
+ frame_offset,
a, k_2);
}
}
@@ -274,7 +281,8 @@ kernel_cuda_filter_nlm_normalize(float *out_image,
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_image,
+kernel_cuda_filter_nlm_construct_gramian(int t,
+ const float *ccl_restrict difference_image,
const float *ccl_restrict buffer,
float const* __restrict__ transform,
int *rank,
@@ -286,13 +294,16 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im
int stride,
int pass_stride,
int r,
- int f)
+ int f,
+ int frame_offset,
+ bool use_time)
{
int4 co, rect;
int ofs;
if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
+ t,
difference_image + ofs,
buffer,
transform, rank,
@@ -300,6 +311,8 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im
rect, filter_window,
stride, f,
pass_stride,
+ frame_offset,
+ use_time,
threadIdx.y*blockDim.x + threadIdx.x);
}
}
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 8a821ee281d..996bc27f71b 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -127,11 +127,14 @@ __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
}
__kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
+ CCL_FILTER_TILE_INFO,
ccl_global float *transform,
ccl_global int *rank,
int4 filter_area,
int4 rect,
int pass_stride,
+ int frame_stride,
+ char use_time,
int radius,
float pca_threshold)
{
@@ -141,8 +144,11 @@ __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_
ccl_global int *l_rank = rank + y*filter_area.z + x;
ccl_global float *l_transform = transform + y*filter_area.z + x;
kernel_filter_construct_transform(buffer,
+ CCL_FILTER_TILE_INFO_ARG,
x + filter_area.x, y + filter_area.y,
- rect, pass_stride,
+ rect,
+ pass_stride, frame_stride,
+ use_time,
l_transform, l_rank,
radius, pca_threshold,
filter_area.z*filter_area.w,
@@ -160,6 +166,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
int pass_stride,
int r,
int channel_offset,
+ int frame_offset,
float a,
float k_2)
{
@@ -173,6 +180,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
difference_image + ofs,
rect, stride,
channel_offset,
+ frame_offset,
a, k_2);
}
}
@@ -254,7 +262,8 @@ __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image,
}
}
-__kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *ccl_restrict difference_image,
+__kernel void kernel_ocl_filter_nlm_construct_gramian(int t,
+ const ccl_global float *ccl_restrict difference_image,
const ccl_global float *ccl_restrict buffer,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
@@ -266,13 +275,16 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc
int stride,
int pass_stride,
int r,
- int f)
+ int f,
+ int frame_offset,
+ char use_time)
{
int4 co, rect;
int ofs;
if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
+ t,
difference_image + ofs,
buffer,
transform, rank,
@@ -280,6 +292,8 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc
rect, filter_window,
stride, f,
pass_stride,
+ frame_offset,
+ use_time,
get_local_id(1)*get_local_size(0) + get_local_id(0));
}
}