Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h10
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu23
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu30
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl12
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h2
9 files changed, 65 insertions, 43 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index b62aa9663ec..e036b53b810 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -95,6 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
float *difference_image,
float *image,
+ float *temp_image,
float *out_image,
float *accum_image,
int* rect,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 26777fdabb2..4c758711481 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -191,6 +191,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
float *difference_image,
float *image,
+ float *temp_image,
float *out_image,
float *accum_image,
int *rect,
@@ -200,7 +201,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
#else
- kernel_filter_nlm_update_output(dx, dy, difference_image, image, out_image, accum_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_update_output(dx, dy, difference_image, image, temp_image, out_image, accum_image, load_int4(rect), stride, f);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index b77b7350d86..ae4fd85780d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -26,7 +26,7 @@ template<typename T> struct TextureInterpolator {
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void)0
+ } (void) 0
static ccl_always_inline float4 read(float4 r)
{
@@ -540,4 +540,4 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
CCL_NAMESPACE_END
-#endif // __KERNEL_CPU_IMAGE_H__
+#endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 5ec1655ab05..759b7e4c20d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -97,7 +97,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
{
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
/* Film */
@@ -120,7 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
x, y,
offset,
stride);
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
@@ -141,7 +141,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
x, y,
offset,
stride);
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
/* Shader Evaluate */
@@ -176,7 +176,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
else {
kernel_background_evaluate(kg, input, output, i);
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
#else /* __SPLIT_KERNEL__ */
@@ -208,7 +208,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
ccl_local type locals; \
kernel_##name(kg, &locals); \
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 0561c40e6b1..b856cbde45c 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -140,7 +140,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -148,7 +148,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -165,13 +165,13 @@ kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -186,13 +186,13 @@ kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -209,13 +209,13 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -252,14 +252,13 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 8a180a509e8..af311027f78 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -40,14 +40,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
{
int work_index = ccl_global_id(0);
-
- if(work_index < total_work_size) {
- uint x, y, sample;
+ bool thread_is_active = work_index < total_work_size;
+ uint x, y, sample;
+ KernelGlobals kg;
+ if(thread_is_active) {
get_work_pixel(tile, work_index, &x, &y, &sample);
- KernelGlobals kg;
kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
+
+ if(kernel_data.film.cryptomatte_passes) {
+ __syncthreads();
+ if(thread_is_active) {
+ kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
+ }
+ }
}
#ifdef __BRANCHED_PATH__
@@ -56,14 +63,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
{
int work_index = ccl_global_id(0);
-
- if(work_index < total_work_size) {
- uint x, y, sample;
+ bool thread_is_active = work_index < total_work_size;
+ uint x, y, sample;
+ KernelGlobals kg;
+ if(thread_is_active) {
get_work_pixel(tile, work_index, &x, &y, &sample);
- KernelGlobals kg;
kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
+
+ if(kernel_data.film.cryptomatte_passes) {
+ __syncthreads();
+ if(thread_is_active) {
+ kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
+ }
+ }
}
#endif
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 3c75754fb39..a550f97f4eb 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -132,7 +132,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -155,13 +155,13 @@ __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict di
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -174,13 +174,13 @@ __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_rest
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -195,13 +195,13 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -234,14 +234,13 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 63128d0aecf..de1f5088629 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -66,9 +66,17 @@ __kernel void kernel_ocl_path_trace(
int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1);
-
- if(x < sx + sw && y < sy + sh)
+ bool thread_is_active = x < sx + sw && y < sy + sh;
+ if(thread_is_active) {
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+ if(kernel_data.film.cryptomatte_passes) {
+ /* Make sure no thread is writing to the buffers. */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(thread_is_active) {
+ kernel_cryptomatte_post(kg, buffer, sample, x, y, offset, stride);
+ }
+ }
}
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index dd9d683e030..79af831c2fb 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -142,7 +142,7 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void)0
+ } (void) 0
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{