Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2018-10-08 23:13:40 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2018-10-08 23:13:40 +0300
commit15e9d80375797dd7ba9779daf6d1a7da5cd6de8e (patch)
tree0e065f6e43c404f75ba141b7bf1c868c9ba84efd
parent9756475ed632d868b16352f389fc276a6879b867 (diff)
Cycles: Use existing shared temporary memory in reconstruction step of the denoiser
Previously the code allocated its own temporary memory, but it's possible to just use the existing shared one instead.
-rw-r--r--intern/cycles/device/device_cuda.cpp23
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp48
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu23
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl23
4 files changed, 52 insertions, 65 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 7d9a13ecc88..5b46d5a507d 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1397,18 +1397,14 @@ public:
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
- int shift_stride = stride*h;
+ int pass_stride = task->buffer.pass_stride;
int num_shifts = (2*r+1)*(2*r+1);
- int mem_size = sizeof(float)*shift_stride*num_shifts;
-
- device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
- temporary_mem.alloc_to_device(2*mem_size);
if(have_error())
return false;
- CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + mem_size;
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts;
{
CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1426,9 +1422,9 @@ public:
task->reconstruction_state.source_w * task->reconstruction_state.source_h,
num_shifts);
- void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &shift_stride, &r, &task->buffer.pass_stride, &a, &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &shift_stride, &r, &f};
- void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &shift_stride, &r, &f};
+ void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &pass_stride, &a, &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
void *construct_gramian_args[] = {&blurDifference,
&task->buffer.mem.device_pointer,
&task->storage.transform.device_pointer,
@@ -1437,9 +1433,8 @@ public:
&task->storage.XtWY.device_pointer,
&task->reconstruction_state.filter_window,
&w, &h, &stride,
- &shift_stride, &r,
- &f,
- &task->buffer.pass_stride};
+ &pass_stride, &r,
+ &f};
CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
@@ -1448,8 +1443,6 @@ public:
CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
}
- temporary_mem.free();
-
{
CUfunction cuFinalize;
cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index cc887134bb0..75418dad1cc 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -865,38 +865,38 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
- int shift_stride = stride*h;
- int num_shifts = (2*task->radius + 1)*(2*task->radius + 1);
- int mem_size = sizeof(float)*shift_stride*num_shifts;
+ int r = task->radius;
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2*r-+1)*(2*r+1);
- cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
- cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
+ device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride*num_shifts);
+ device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts);
+ cl_mem difference_mem = CL_MEM_PTR(*difference);
+ cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
kernel_set_args(ckNLMCalcDifference, 0,
color_mem,
color_variance_mem,
- difference,
+ difference_mem,
w, h, stride,
- shift_stride,
- task->radius,
- task->buffer.pass_stride,
+ pass_stride,
+ r,
+ pass_stride,
1.0f, task->nlm_k_2);
kernel_set_args(ckNLMBlur, 0,
- difference,
- blurDifference,
+ difference_mem,
+ blurDifference_mem,
w, h, stride,
- shift_stride,
- task->radius, 4);
+ pass_stride,
+ r, 4);
kernel_set_args(ckNLMCalcWeight, 0,
- blurDifference,
- difference,
+ blurDifference_mem,
+ difference_mem,
w, h, stride,
- shift_stride,
- task->radius, 4);
+ pass_stride,
+ r, 4);
kernel_set_args(ckNLMConstructGramian, 0,
- blurDifference,
+ blurDifference_mem,
buffer_mem,
transform_mem,
rank_mem,
@@ -904,9 +904,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
XtWY_mem,
task->reconstruction_state.filter_window,
w, h, stride,
- shift_stride,
- task->radius, 4,
- task->buffer.pass_stride);
+ pass_stride,
+ r, 4);
enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true);
enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
@@ -914,9 +913,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
enqueue_kernel(ckNLMConstructGramian, w*h, num_shifts, true, 256);
- opencl_assert(clReleaseMemObject(difference));
- opencl_assert(clReleaseMemObject(blurDifference));
-
kernel_set_args(ckFinalize, 0,
output_mem,
rank_mem,
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 0561c40e6b1..b856cbde45c 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -140,7 +140,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -148,7 +148,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -165,13 +165,13 @@ kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -186,13 +186,13 @@ kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -209,13 +209,13 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -252,14 +252,13 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 3c75754fb39..a550f97f4eb 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -132,7 +132,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -155,13 +155,13 @@ __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict di
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -174,13 +174,13 @@ __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_rest
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -195,13 +195,13 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -234,14 +234,13 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,