From b4a8b813993138cc831d4c04f0f94bdafc51e7fe Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sun, 1 Apr 2018 02:10:27 +0200 Subject: Cycles Denoising: Don't use atomics in the accumulation kernel on CPUs The GPU kernel needs to use atomics for accumulation since all offsets are processed in parallel, but on CPUs that's not the case, so we can disable them there for a considerable speedup. --- intern/cycles/kernel/filter/filter_reconstruction.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index b7bf322f9ce..58740d5b06a 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -61,8 +61,13 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, make_int2(x+dx, y+dy), buffer + q_offset, pass_stride, *rank, design_row, transform, stride); +#ifdef __KERNEL_GPU__ math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride); math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride); +#else + math_trimatrix_add_gramian(XtWX, (*rank)+1, design_row, weight); + math_vec3_add(XtWY, (*rank)+1, design_row, weight * q_color); +#endif } ccl_device_inline void kernel_filter_finalize(int x, int y, -- cgit v1.2.3