From c9608047472ae2b08d2d4b188fca5211a6c0b925 Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Wed, 4 Jul 2018 14:02:38 +0200 Subject: Cycles Denoising: Pass tile buffers to every OpenCL kernel to conform to standard and get rid of set_tile_info --- intern/cycles/device/device_cpu.cpp | 13 ------- intern/cycles/device/device_cuda.cpp | 13 ------- intern/cycles/device/device_denoising.cpp | 5 ++- intern/cycles/device/device_denoising.h | 1 - intern/cycles/device/opencl/opencl.h | 2 -- intern/cycles/device/opencl/opencl_base.cpp | 48 +++++++++++--------------- intern/cycles/kernel/filter/filter_defines.h | 25 ++++++++++++++ intern/cycles/kernel/filter/filter_prefilter.h | 8 ++--- intern/cycles/kernel/kernels/opencl/filter.cl | 32 +++-------------- 9 files changed, 55 insertions(+), 92 deletions(-) diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c54ac1e5933..be0dcc20755 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -459,18 +459,6 @@ public: } }; - bool denoising_set_tile_info(device_ptr *buffers, DenoisingTask *task) - { - TileInfo *tile_info = (TileInfo*) task->tile_info_mem.host_pointer; - for(int i = 0; i < 9; i++) { - tile_info->buffers[i] = buffers[i]; - } - - task->tile_info_mem.copy_to_device(); - - return true; - } - bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr, DenoisingTask *task) { @@ -722,7 +710,6 @@ public: denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); - denoising.functions.set_tile_info = function_bind(&CPUDevice::denoising_set_tile_info, this, _1, &denoising); denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); denoising.render_buffer.samples = tile.sample; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index c8e0ea4d896..8294af340e8 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1251,18 +1251,6 @@ public: } } - bool denoising_set_tile_info(device_ptr *buffers, DenoisingTask *task) - { - TileInfo *tile_info = (TileInfo*) task->tile_info_mem.host_pointer; - for(int i = 0; i < 9; i++) { - tile_info->buffers[i] = buffers[i]; - } - - task->tile_info_mem.copy_to_device(); - - return !have_error(); - } - #define CUDA_GET_BLOCKSIZE(func, w, h) \ int threads_per_block; \ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ @@ -1622,7 +1610,6 @@ public: denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising); denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); - denoising.functions.set_tile_info = function_bind(&CUDADevice::denoising_set_tile_info, this, _1, &denoising); denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.render_buffer.samples = rtile.sample; diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 318b14ab499..c0d4634262d 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -62,11 +62,10 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles) { tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int)); - device_ptr buffers[9]; for(int i = 0; i < 9; i++) { - buffers[i] = rtiles[i].buffer; tile_info->offsets[i] = rtiles[i].offset; tile_info->strides[i] = rtiles[i].stride; + tile_info->buffers[i] = rtiles[i].buffer; } tile_info->x[0] = rtiles[3].x; tile_info->x[1] = rtiles[4].x; @@ -81,7 +80,7 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles) target_buffer.stride = rtiles[9].stride; target_buffer.ptr = rtiles[9].buffer; - functions.set_tile_info(buffers); + tile_info_mem.copy_to_device(); } void DenoisingTask::setup_denoising_buffer() diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 1063d43d039..e0da52867f1 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -89,7 +89,6 @@ public: device_ptr depth_ptr, device_ptr output_ptr )> detect_outliers; - function set_tile_info; function map_neighbor_tiles; function unmap_neighbor_tiles; } functions; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index c550d738bd5..22e0503365c 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -436,8 +436,6 @@ protected: device_ptr depth_ptr, device_ptr output_ptr, DenoisingTask *task); - bool denoising_set_tile_info(device_ptr *buffers, - DenoisingTask *task); device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size); void mem_free_sub_ptr(device_ptr ptr); diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 50d371e4c55..9a50d217321 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -246,7 +246,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea denoising_program.add_kernel(ustring("filter_nlm_normalize")); denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); denoising_program.add_kernel(ustring("filter_finalize")); - denoising_program.add_kernel(ustring("filter_set_tile_info")); vector programs; programs.push_back(&base_program); @@ -981,9 +980,16 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr, cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow")); - kernel_set_args(ckFilterDivideShadow, 0, - task->render_buffer.samples, - tile_info_mem, + int arg_ofs = kernel_set_args(ckFilterDivideShadow, 0, + task->render_buffer.samples, + tile_info_mem); + cl_mem buffers[9]; + for(int i = 0; i < 9; i++) { + buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]); + arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs, + buffers[i]); + } + kernel_set_args(ckFilterDivideShadow, arg_ofs, a_mem, b_mem, sample_variance_mem, @@ -1012,9 +1018,16 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset, cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature")); - kernel_set_args(ckFilterGetFeature, 0, - task->render_buffer.samples, - tile_info_mem, + int arg_ofs = kernel_set_args(ckFilterGetFeature, 0, + task->render_buffer.samples, + tile_info_mem); + cl_mem buffers[9]; + for(int i = 0; i < 9; i++) { + buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]); + arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs, + buffers[i]); + } + kernel_set_args(ckFilterGetFeature, arg_ofs, mean_offset, variance_offset, mean_mem, @@ -1056,29 +1069,8 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr, return true; } -bool OpenCLDeviceBase::denoising_set_tile_info(device_ptr *buffers, - DenoisingTask *task) -{ - task->tile_info_mem.copy_to_device(); - - cl_mem tile_info_mem = CL_MEM_PTR(task->tile_info_mem.device_pointer); - - cl_kernel ckFilterSetTileInfo = denoising_program(ustring("filter_set_tile_info")); - - kernel_set_args(ckFilterSetTileInfo, 0, tile_info_mem); - for(int i = 0; i < 9; i++) { - cl_mem buffer_mem = CL_MEM_PTR(buffers[i]); - kernel_set_args(ckFilterSetTileInfo, i+1, buffer_mem); - } - - enqueue_kernel(ckFilterSetTileInfo, 1, 1); - - return true; -} - void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising) { - denoising.functions.set_tile_info = function_bind(&OpenCLDeviceBase::denoising_set_tile_info, this, _1, &denoising); denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising); denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h index 57d3d90594f..d48ea3ac1d6 100644 --- a/intern/cycles/kernel/filter/filter_defines.h +++ b/intern/cycles/kernel/filter/filter_defines.h @@ -35,4 +35,29 @@ typedef struct TileInfo { #endif } TileInfo; +#ifdef __KERNEL_OPENCL__ +# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info, \ + ccl_global float *tile_buffer_1, \ + ccl_global float *tile_buffer_2, \ + ccl_global float *tile_buffer_3, \ + ccl_global float *tile_buffer_4, \ + ccl_global float *tile_buffer_5, \ + ccl_global float *tile_buffer_6, \ + ccl_global float *tile_buffer_7, \ + ccl_global float *tile_buffer_8, \ + ccl_global float *tile_buffer_9 +# define CCL_FILTER_TILE_INFO_ARG tile_info, \ + tile_buffer_1, tile_buffer_2, tile_buffer_3, \ + tile_buffer_4, tile_buffer_5, tile_buffer_6, \ + tile_buffer_7, tile_buffer_8, tile_buffer_9 +# define ccl_get_tile_buffer(id) (tile_buffer_ ## id) +#else +# ifdef __KERNEL_CUDA__ +# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info +# else +# define CCL_FILTER_TILE_INFO TileInfo* tile_info +# endif +# define ccl_get_tile_buffer(id) (tile_info->buffers[id]) +#endif + #endif /* __FILTER_DEFINES_H__*/ diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 9513bf46bd7..3507f80df46 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy. */ ccl_device void kernel_filter_divide_shadow(int sample, - ccl_global TileInfo *tile_info, + CCL_FILTER_TILE_INFO, int x, int y, ccl_global float *unfilteredA, ccl_global float *unfilteredB, @@ -43,7 +43,7 @@ ccl_device void kernel_filter_divide_shadow(int sample, int offset = tile_info->offsets[tile]; int stride = tile_info->strides[tile]; - const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) tile_info->buffers[tile]; + const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile); center_buffer += (y*stride + x + offset)*buffer_pass_stride; center_buffer += buffer_denoising_offset + 14; @@ -79,7 +79,7 @@ ccl_device void kernel_filter_divide_shadow(int sample, * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive). */ ccl_device void kernel_filter_get_feature(int sample, - ccl_global TileInfo *tile_info, + CCL_FILTER_TILE_INFO, int m_offset, int v_offset, int x, int y, ccl_global float *mean, @@ -90,7 +90,7 @@ ccl_device void kernel_filter_get_feature(int sample, int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2); int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2); int tile = ytile*3+xtile; - ccl_global float *center_buffer = ((ccl_global float*) tile_info->buffers[tile]) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset; + ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset; int buffer_w = align_up(rect.z - rect.x, 4); int idx = (y-rect.y)*buffer_w + (x - rect.x); diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index d553ee6833c..3c75754fb39 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -23,7 +23,7 @@ /* kernels */ __kernel void kernel_ocl_filter_divide_shadow(int sample, - ccl_global TileInfo *tile_info, + CCL_FILTER_TILE_INFO, ccl_global float *unfilteredA, ccl_global float *unfilteredB, ccl_global float *sampleVariance, @@ -37,7 +37,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample, int y = prefilter_rect.y + get_global_id(1); if(x < prefilter_rect.z && y < prefilter_rect.w) { kernel_filter_divide_shadow(sample, - tiles, + CCL_FILTER_TILE_INFO_ARG, x, y, unfilteredA, unfilteredB, @@ -51,7 +51,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample, } __kernel void kernel_ocl_filter_get_feature(int sample, - ccl_global TileInfo *tile_info, + CCL_FILTER_TILE_INFO, int m_offset, int v_offset, ccl_global float *mean, @@ -64,7 +64,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample, int y = prefilter_rect.y + get_global_id(1); if(x < prefilter_rect.z && y < prefilter_rect.w) { kernel_filter_get_feature(sample, - tiles, + CCL_FILTER_TILE_INFO_ARG, m_offset, v_offset, x, y, mean, variance, @@ -276,27 +276,3 @@ __kernel void kernel_ocl_filter_finalize(ccl_global float *buffer, buffer_params, sample); } } - -__kernel void kernel_ocl_filter_set_tile_info(ccl_global TileInfo* tile_info, - ccl_global float *buffer_1, - ccl_global float *buffer_2, - ccl_global float *buffer_3, - ccl_global float *buffer_4, - ccl_global float *buffer_5, - ccl_global float *buffer_6, - ccl_global float *buffer_7, - ccl_global float *buffer_8, - ccl_global float *buffer_9) -{ - if((get_global_id(0) == 0) && (get_global_id(1) == 0)) { - tile_info->buffers[0] = buffer_1; - tile_info->buffers[1] = buffer_2; - tile_info->buffers[2] = buffer_3; - tile_info->buffers[3] = buffer_4; - tile_info->buffers[4] = buffer_5; - tile_info->buffers[5] = buffer_6; - tile_info->buffers[6] = buffer_7; - tile_info->buffers[7] = buffer_8; - tile_info->buffers[8] = buffer_9; - } -} -- cgit v1.2.3