diff options
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel.cu | 30 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel.cl | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h | 2 |
9 files changed, 65 insertions, 43 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index b62aa9663ec..e036b53b810 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -95,6 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int dy, float *difference_image, float *image, + float *temp_image, float *out_image, float *accum_image, int* rect, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 26777fdabb2..4c758711481 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -191,6 +191,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int dy, float *difference_image, float *image, + float *temp_image, float *out_image, float *accum_image, int *rect, @@ -200,7 +201,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); #else - kernel_filter_nlm_update_output(dx, dy, difference_image, image, out_image, accum_image, load_int4(rect), stride, f); + kernel_filter_nlm_update_output(dx, dy, difference_image, image, temp_image, out_image, accum_image, load_int4(rect), stride, f); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index b77b7350d86..ae4fd85780d 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -26,7 +26,7 @@ template<typename T> struct TextureInterpolator { u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void)0 + } (void) 0 static ccl_always_inline float4 read(float4 r) { @@ -540,4 +540,4 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, CCL_NAMESPACE_END -#endif // __KERNEL_CPU_IMAGE_H__ +#endif // __KERNEL_CPU_IMAGE_H__ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 5ec1655ab05..759b7e4c20d 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -97,7 +97,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, { kernel_path_trace(kg, buffer, sample, x, y, offset, stride); } -#endif /* KERNEL_STUB */ +#endif /* KERNEL_STUB */ } /* Film */ @@ -120,7 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, x, y, offset, stride); -#endif /* KERNEL_STUB */ +#endif /* KERNEL_STUB */ } void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, @@ -141,7 +141,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, x, y, offset, stride); -#endif /* KERNEL_STUB */ +#endif /* KERNEL_STUB */ } /* Shader Evaluate */ @@ -176,7 +176,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, else { kernel_background_evaluate(kg, input, output, i); } -#endif /* KERNEL_STUB */ +#endif /* KERNEL_STUB */ } #else /* __SPLIT_KERNEL__ */ @@ -208,7 +208,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, ccl_local type locals; \ kernel_##name(kg, &locals); \ } -#endif /* KERNEL_STUB */ +#endif /* KERNEL_STUB */ DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 0561c40e6b1..b856cbde45c 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -140,7 +140,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int channel_offset, float a, @@ -148,7 +148,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, weight_image, variance_image, @@ -165,13 +165,13 @@ kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image, int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_blur(co.x, co.y, difference_image + ofs, out_image + ofs, @@ -186,13 +186,13 @@ kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image, int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_calc_weight(co.x, co.y, difference_image + ofs, out_image + ofs, @@ -209,13 +209,13 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image, int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w, difference_image + ofs, image, @@ -252,14 +252,13 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im int w, int h, int stride, - int shift_stride, + int pass_stride, int r, - int f, - int pass_stride) + int f) { int4 co, rect; int ofs; - if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) { + if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { kernel_filter_nlm_construct_gramian(co.x, co.y, co.z, co.w, difference_image + ofs, diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 8a180a509e8..af311027f78 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -40,14 +40,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #ifdef __BRANCHED_PATH__ @@ -56,14 +63,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #endif diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 3c75754fb39..a550f97f4eb 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -132,7 +132,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_ int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int channel_offset, float a, @@ -140,7 +140,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_ { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, weight_image, variance_image, @@ -155,13 +155,13 @@ __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict di int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_blur(co.x, co.y, difference_image + ofs, out_image + ofs, @@ -174,13 +174,13 @@ __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_rest int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_calc_weight(co.x, co.y, difference_image + ofs, out_image + ofs, @@ -195,13 +195,13 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re int w, int h, int stride, - int shift_stride, + int pass_stride, int r, int f) { int4 co, rect; int ofs; - if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) { + if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w, difference_image + ofs, image, @@ -234,14 +234,13 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc int w, int h, int stride, - int shift_stride, + int pass_stride, int r, - int f, - int pass_stride) + int f) { int4 co, rect; int ofs; - if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) { + if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { kernel_filter_nlm_construct_gramian(co.x, co.y, co.z, co.w, difference_image + ofs, diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 63128d0aecf..de1f5088629 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -66,9 +66,17 @@ __kernel void kernel_ocl_path_trace( int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) + bool thread_is_active = x < sx + sw && y < sy + sh; + if(thread_is_active) { kernel_path_trace(kg, buffer, sample, x, y, offset, stride); + } + if(kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if(thread_is_active) { + kernel_cryptomatte_post(kg, buffer, sample, x, y, offset, stride); + } + } } #else /* __COMPILE_ONLY_MEGAKERNEL__ */ diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index dd9d683e030..79af831c2fb 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -142,7 +142,7 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix) u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void)0 + } (void) 0 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { |