diff options
author | Campbell Barton <ideasman42@gmail.com> | 2018-10-09 00:46:00 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2018-10-09 00:46:00 +0300 |
commit | 733e6c0b1d2433166b7326c3654c5ece7ce17b88 (patch) | |
tree | 265b6f10e6eb1e826743a02f9b12703038304142 /intern/cycles/device | |
parent | 11877e5bec4c590e51565c7e6191b33957dddc1c (diff) | |
parent | 3bc885e5f46ba103ed25f7e1c3bd897157ef768e (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 23 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 48 |
4 files changed, 43 insertions, 49 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 7c72ab1a009..7eb73dea3ef 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -184,11 +184,11 @@ public: KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel; - KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel; - KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel; - KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel; + KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel; + KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel; + KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel; KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel; @@ -499,6 +499,7 @@ public: filter_nlm_update_output_kernel()(dx, dy, blurDifference, (float*) image_ptr, + difference, (float*) out_ptr, weightAccum, local_rect, diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 2af8a9f48c8..22b1bc493c8 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1397,18 +1397,14 @@ public: int h = task->reconstruction_state.source_h; int stride = task->buffer.stride; - int shift_stride = stride*h; + int pass_stride = task->buffer.pass_stride; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*num_shifts; - - device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); - temporary_mem.alloc_to_device(2*mem_size); if(have_error()) return false; - CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); - CUdeviceptr blurDifference = difference + mem_size; + CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts; { CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; @@ -1426,9 +1422,9 @@ public: task->reconstruction_state.source_w * task->reconstruction_state.source_h, num_shifts); - void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &shift_stride, &r, &task->buffer.pass_stride, &a, &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &shift_stride, &r, &f}; - void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &shift_stride, &r, &f}; + void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &pass_stride, &a, &k_2}; + void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; + void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; void *construct_gramian_args[] = {&blurDifference, &task->buffer.mem.device_pointer, &task->storage.transform.device_pointer, @@ -1437,9 +1433,8 @@ public: &task->storage.XtWY.device_pointer, &task->reconstruction_state.filter_window, &w, &h, &stride, - &shift_stride, &r, - &f, - &task->buffer.pass_stride}; + &pass_stride, &r, + &f}; CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); @@ -1448,8 +1443,6 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); } - temporary_mem.free(); - { CUfunction cuFinalize; cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize")); diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 23c18fa15b2..78c65a3d22d 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -99,14 +99,18 @@ void DenoisingTask::setup_denoising_buffer() buffer.mem.alloc_to_device(mem_size, false); /* CPUs process shifts sequentially while GPUs process them in parallel. */ - int num_shifts = 1; + int num_layers; if(buffer.gpu_temporary_mem) { /* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */ int max_radius = max(radius, 6); - num_shifts = (2*max_radius + 1) * (2*max_radius + 1); + int num_shifts = (2*max_radius + 1) * (2*max_radius + 1); + num_layers = 2*num_shifts + 1; + } + else { + num_layers = 3; } /* Allocate two layers per shift as well as one for the weight accumulation. */ - buffer.temporary_mem.alloc_to_device((2*num_shifts + 1) * buffer.pass_stride); + buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride); } void DenoisingTask::prefilter_shadowing() diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index cc887134bb0..ffd3b7d7625 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -865,38 +865,38 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, int h = task->reconstruction_state.source_h; int stride = task->buffer.stride; - int shift_stride = stride*h; - int num_shifts = (2*task->radius + 1)*(2*task->radius + 1); - int mem_size = sizeof(float)*shift_stride*num_shifts; + int r = task->radius; + int pass_stride = task->buffer.pass_stride; + int num_shifts = (2*r+1)*(2*r+1); - cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr); - opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct"); - cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr); - opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct"); + device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride*num_shifts); + device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts); + cl_mem difference_mem = CL_MEM_PTR(*difference); + cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference); kernel_set_args(ckNLMCalcDifference, 0, color_mem, color_variance_mem, - difference, + difference_mem, w, h, stride, - shift_stride, - task->radius, - task->buffer.pass_stride, + pass_stride, + r, + pass_stride, 1.0f, task->nlm_k_2); kernel_set_args(ckNLMBlur, 0, - difference, - blurDifference, + difference_mem, + blurDifference_mem, w, h, stride, - shift_stride, - task->radius, 4); + pass_stride, + r, 4); kernel_set_args(ckNLMCalcWeight, 0, - blurDifference, - difference, + blurDifference_mem, + difference_mem, w, h, stride, - shift_stride, - task->radius, 4); + pass_stride, + r, 4); kernel_set_args(ckNLMConstructGramian, 0, - blurDifference, + blurDifference_mem, buffer_mem, transform_mem, rank_mem, @@ -904,9 +904,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, XtWY_mem, task->reconstruction_state.filter_window, w, h, stride, - shift_stride, - task->radius, 4, - task->buffer.pass_stride); + pass_stride, + r, 4); enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true); enqueue_kernel(ckNLMBlur, w*h, num_shifts, true); @@ -914,9 +913,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, enqueue_kernel(ckNLMBlur, w*h, num_shifts, true); enqueue_kernel(ckNLMConstructGramian, w*h, num_shifts, true, 256); - opencl_assert(clReleaseMemObject(difference)); - opencl_assert(clReleaseMemObject(blurDifference)); - kernel_set_args(ckFinalize, 0, output_mem, rank_mem, |