Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/opencl/opencl_base.cpp')
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp225
1 files changed, 117 insertions, 108 deletions
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index f43177247ef..fe084edc90e 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -560,7 +560,7 @@ size_t OpenCLDeviceBase::global_size_round_up(int group_size, int global_size)
return global_size + ((r == 0)? 0: group_size - r);
}
-void OpenCLDeviceBase::enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size_t max_workgroup_size)
+void OpenCLDeviceBase::enqueue_kernel(cl_kernel kernel, size_t w, size_t h, bool x_workgroups, size_t max_workgroup_size)
{
size_t workgroup_size, max_work_items[3];
@@ -574,8 +574,15 @@ void OpenCLDeviceBase::enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size
}
/* Try to divide evenly over 2 dimensions. */
- size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
- size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
+ size_t local_size[2];
+ if(x_workgroups) {
+ local_size[0] = workgroup_size;
+ local_size[1] = 1;
+ }
+ else {
+ size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
+ local_size[0] = local_size[1] = sqrt_workgroup_size;
+ }
/* Some implementations have max size 1 on 2nd dimension. */
if(local_size[1] > max_work_items[1]) {
@@ -731,17 +738,25 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr,
device_ptr out_ptr,
DenoisingTask *task)
{
- int4 rect = task->rect;
- int w = rect.z-rect.x;
- int h = rect.w-rect.y;
+
+ int stride = task->buffer.stride;
+ int w = task->buffer.width;
+ int h = task->buffer.h;
int r = task->nlm_state.r;
int f = task->nlm_state.f;
float a = task->nlm_state.a;
float k_2 = task->nlm_state.k_2;
- cl_mem difference = CL_MEM_PTR(task->nlm_state.temporary_1_ptr);
- cl_mem blurDifference = CL_MEM_PTR(task->nlm_state.temporary_2_ptr);
- cl_mem weightAccum = CL_MEM_PTR(task->nlm_state.temporary_3_ptr);
+ int shift_stride = stride*h;
+ int num_shifts = (2*r+1)*(2*r+1);
+ int mem_size = sizeof(float)*shift_stride*num_shifts;
+
+ cl_mem weightAccum = CL_MEM_PTR(task->nlm_state.temporary_3_ptr);
+
+ cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer denoising_non_local_means");
+ cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer denoising_non_local_means");
cl_mem image_mem = CL_MEM_PTR(image_ptr);
cl_mem guide_mem = CL_MEM_PTR(guide_ptr);
@@ -757,31 +772,45 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr,
cl_kernel ckNLMUpdateOutput = denoising_program(ustring("filter_nlm_update_output"));
cl_kernel ckNLMNormalize = denoising_program(ustring("filter_nlm_normalize"));
- for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
- int dy = i / (2*r+1) - r;
- int dx = i % (2*r+1) - r;
- int4 local_rect = make_int4(max(0, -dx), max(0, -dy), rect.z-rect.x - max(0, dx), rect.w-rect.y - max(0, dy));
- kernel_set_args(ckNLMCalcDifference, 0,
- dx, dy, guide_mem, variance_mem,
- difference, local_rect, w, 0, a, k_2);
- kernel_set_args(ckNLMBlur, 0,
- difference, blurDifference, local_rect, w, f);
- kernel_set_args(ckNLMCalcWeight, 0,
- blurDifference, difference, local_rect, w, f);
- kernel_set_args(ckNLMUpdateOutput, 0,
- dx, dy, blurDifference, image_mem,
- out_mem, weightAccum, local_rect, w, f);
-
- enqueue_kernel(ckNLMCalcDifference, w, h);
- enqueue_kernel(ckNLMBlur, w, h);
- enqueue_kernel(ckNLMCalcWeight, w, h);
- enqueue_kernel(ckNLMBlur, w, h);
- enqueue_kernel(ckNLMUpdateOutput, w, h);
- }
+ kernel_set_args(ckNLMCalcDifference, 0,
+ guide_mem,
+ variance_mem,
+ difference,
+ w, h, stride,
+ shift_stride,
+ r, 0, a, k_2);
+ kernel_set_args(ckNLMBlur, 0,
+ difference,
+ blurDifference,
+ w, h, stride,
+ shift_stride,
+ r, f);
+ kernel_set_args(ckNLMCalcWeight, 0,
+ blurDifference,
+ difference,
+ w, h, stride,
+ shift_stride,
+ r, f);
+ kernel_set_args(ckNLMUpdateOutput, 0,
+ blurDifference,
+ image_mem,
+ out_mem,
+ weightAccum,
+ w, h, stride,
+ shift_stride,
+ r, f);
+
+ enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMCalcWeight, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMUpdateOutput, w*h, num_shifts, true);
+
+ opencl_assert(clReleaseMemObject(difference));
+ opencl_assert(clReleaseMemObject(blurDifference));
- int4 local_rect = make_int4(0, 0, w, h);
kernel_set_args(ckNLMNormalize, 0,
- out_mem, weightAccum, local_rect, w);
+ out_mem, weightAccum, w, h, stride);
enqueue_kernel(ckNLMNormalize, w, h);
return true;
@@ -837,81 +866,63 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
cl_kernel ckNLMConstructGramian = denoising_program(ustring("filter_nlm_construct_gramian"));
cl_kernel ckFinalize = denoising_program(ustring("filter_finalize"));
- cl_mem difference = CL_MEM_PTR(task->reconstruction_state.temporary_1_ptr);
- cl_mem blurDifference = CL_MEM_PTR(task->reconstruction_state.temporary_2_ptr);
-
- int r = task->radius;
- int f = 4;
- float a = 1.0f;
- for(int i = 0; i < (2*r+1)*(2*r+1); i++) {
- int dy = i / (2*r+1) - r;
- int dx = i % (2*r+1) - r;
-
- int local_rect[4] = {max(0, -dx), max(0, -dy),
- task->reconstruction_state.source_w - max(0, dx),
- task->reconstruction_state.source_h - max(0, dy)};
-
- kernel_set_args(ckNLMCalcDifference, 0,
- dx, dy,
- color_mem,
- color_variance_mem,
- difference,
- local_rect,
- task->buffer.w,
- task->buffer.pass_stride,
- a, task->nlm_k_2);
- enqueue_kernel(ckNLMCalcDifference,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
-
- kernel_set_args(ckNLMBlur, 0,
- difference,
- blurDifference,
- local_rect,
- task->buffer.w,
- f);
- enqueue_kernel(ckNLMBlur,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
-
- kernel_set_args(ckNLMCalcWeight, 0,
- blurDifference,
- difference,
- local_rect,
- task->buffer.w,
- f);
- enqueue_kernel(ckNLMCalcWeight,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
-
- /* Reuse previous arguments. */
- enqueue_kernel(ckNLMBlur,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
-
- kernel_set_args(ckNLMConstructGramian, 0,
- dx, dy,
- blurDifference,
- buffer_mem,
- transform_mem,
- rank_mem,
- XtWX_mem,
- XtWY_mem,
- local_rect,
- task->reconstruction_state.filter_rect,
- task->buffer.w,
- task->buffer.h,
- f,
- task->buffer.pass_stride);
- enqueue_kernel(ckNLMConstructGramian,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h,
- 256);
- }
+ int w = task->reconstruction_state.source_w;
+ int h = task->reconstruction_state.source_h;
+ int stride = task->buffer.stride;
+
+ int shift_stride = stride*h;
+ int num_shifts = (2*task->radius + 1)*(2*task->radius + 1);
+ int mem_size = sizeof(float)*shift_stride*num_shifts;
+
+ cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
+ cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
+
+ kernel_set_args(ckNLMCalcDifference, 0,
+ color_mem,
+ color_variance_mem,
+ difference,
+ w, h, stride,
+ shift_stride,
+ task->radius,
+ task->buffer.pass_stride,
+ 1.0f, task->nlm_k_2);
+ kernel_set_args(ckNLMBlur, 0,
+ difference,
+ blurDifference,
+ w, h, stride,
+ shift_stride,
+ task->radius, 4);
+ kernel_set_args(ckNLMCalcWeight, 0,
+ blurDifference,
+ difference,
+ w, h, stride,
+ shift_stride,
+ task->radius, 4);
+ kernel_set_args(ckNLMConstructGramian, 0,
+ blurDifference,
+ buffer_mem,
+ transform_mem,
+ rank_mem,
+ XtWX_mem,
+ XtWY_mem,
+ task->reconstruction_state.filter_window,
+ w, h, stride,
+ shift_stride,
+ task->radius, 4,
+ task->buffer.pass_stride);
+
+ enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMCalcWeight, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
+ enqueue_kernel(ckNLMConstructGramian, w*h, num_shifts, true, 256);
+
+ opencl_assert(clReleaseMemObject(difference));
+ opencl_assert(clReleaseMemObject(blurDifference));
kernel_set_args(ckFinalize, 0,
- task->buffer.w,
- task->buffer.h,
output_mem,
rank_mem,
XtWX_mem,
@@ -919,9 +930,7 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
task->filter_area,
task->reconstruction_state.buffer_params,
task->render_buffer.samples);
- enqueue_kernel(ckFinalize,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
+ enqueue_kernel(ckFinalize, w, h);
return true;
}