Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2019-02-06 14:42:10 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2019-02-06 17:18:29 +0300
commit405cacd4cd955552e1f7b50a176ddcdd9baf8d3b (patch)
treee54e2bf0c79bcc04d669088393b1d16df554bffd /intern/cycles/device/device_cuda.cpp
parent81159e99b819910b72cb3caba6b3cd4f35184ea9 (diff)
Cycles: prefilter feature passes separate from denoising.
Prefiltering of feature passes will happen during rendering, which can then be used for denoising immediately or written as a render pass for later (animation) denoising. The number of denoising data passes written is reduced because of this, leaving out the feature variance passes. The passes are now Normal, Albedo, Depth, Shadowing, Variance and Intensity. Ref D3889.
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp169
1 files changed, 105 insertions, 64 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 7b3c25a86d5..cb7d8bbb224 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1300,7 +1300,7 @@ public:
int pass_stride = task->buffer.pass_stride;
int num_shifts = (2*r+1)*(2*r+1);
- int channel_offset = 0;
+ int channel_offset = task->nlm_state.is_color? task->buffer.pass_stride : 0;
if(have_error())
return false;
@@ -1308,6 +1308,7 @@ public:
CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts;
CUdeviceptr weightAccum = difference + 2*sizeof(float)*pass_stride*num_shifts;
+ CUdeviceptr scale_ptr = 0;
cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*pass_stride));
cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float)*pass_stride));
@@ -1326,10 +1327,10 @@ public:
CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, num_shifts);
- void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2};
+ void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &scale_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2};
void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &r, &f};
+ void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &channel_offset, &r, &f};
CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
@@ -1379,19 +1380,16 @@ public:
return !have_error();
}
- bool denoising_reconstruct(device_ptr color_ptr,
- device_ptr color_variance_ptr,
- device_ptr output_ptr,
- DenoisingTask *task)
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ DenoisingTask *task)
{
if(have_error())
return false;
CUDAContextScope scope(this);
- mem_zero(task->storage.XtWX);
- mem_zero(task->storage.XtWY);
-
int r = task->radius;
int f = 4;
float a = 1.0f;
@@ -1410,60 +1408,69 @@ public:
CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts;
- {
- CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
- cuda_assert(cuModuleGetFunction(&cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
- cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
- cuda_assert(cuModuleGetFunction(&cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
- cuda_assert(cuModuleGetFunction(&cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
-
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
-
- CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
- task->reconstruction_state.source_w * task->reconstruction_state.source_h,
- num_shifts);
-
- void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &pass_stride, &a, &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
- void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
- void *construct_gramian_args[] = {&blurDifference,
- &task->buffer.mem.device_pointer,
- &task->storage.transform.device_pointer,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->reconstruction_state.filter_window,
- &w, &h, &stride,
- &pass_stride, &r,
- &f};
-
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
- CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
- }
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
+ cuda_assert(cuModuleGetFunction(&cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(cuModuleGetFunction(&cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(&cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
+ task->reconstruction_state.source_w * task->reconstruction_state.source_h,
+ num_shifts);
+
+ void *calc_difference_args[] = {&color_ptr,
+ &color_variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w, &h,
+ &stride, &pass_stride,
+ &r, &pass_stride,
+ &a, &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *construct_gramian_args[] = {&blurDifference,
+ &task->buffer.mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->reconstruction_state.filter_window,
+ &w, &h, &stride,
+ &pass_stride, &r,
+ &f};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
+ cuda_assert(cuCtxSynchronize());
- {
- CUfunction cuFinalize;
- cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
- cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
- void *finalize_args[] = {&output_ptr,
- &task->storage.rank.device_pointer,
- &task->storage.XtWX.device_pointer,
- &task->storage.XtWY.device_pointer,
- &task->filter_area,
- &task->reconstruction_state.buffer_params.x,
- &task->render_buffer.samples};
- CUDA_GET_BLOCKSIZE(cuFinalize,
- task->reconstruction_state.source_w,
- task->reconstruction_state.source_h);
- CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
- }
+ return !have_error();
+ }
+ bool denoising_solve(device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ CUfunction cuFinalize;
+ cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
+ void *finalize_args[] = {&output_ptr,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->filter_area,
+ &task->reconstruction_state.buffer_params.x,
+ &task->render_buffer.samples};
+ CUDA_GET_BLOCKSIZE(cuFinalize,
+ task->reconstruction_state.source_w,
+ task->reconstruction_state.source_h);
+ CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
cuda_assert(cuCtxSynchronize());
return !have_error();
@@ -1533,6 +1540,7 @@ public:
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
+ float scale,
DenoisingTask *task)
{
if(have_error())
@@ -1553,6 +1561,7 @@ public:
&variance_offset,
&mean_ptr,
&variance_ptr,
+ &scale,
&task->rect,
&task->render_buffer.pass_stride,
&task->render_buffer.offset};
@@ -1562,6 +1571,36 @@ public:
return !have_error();
}
+ bool denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task)
+ {
+ if(have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterWriteFeature;
+ cuda_assert(cuModuleGetFunction(&cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterWriteFeature,
+ task->filter_area.z,
+ task->filter_area.w);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->reconstruction_state.buffer_params,
+ &task->filter_area,
+ &from_ptr,
+ &buffer_ptr,
+ &out_offset,
+ &task->rect};
+ CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
bool denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
@@ -1596,11 +1635,13 @@ public:
void denoise(RenderTile &rtile, DenoisingTask& denoising)
{
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
- denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
+ denoising.functions.accumulate = function_bind(&CUDADevice::denoising_accumulate, this, _1, _2, _3, &denoising);
+ denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
- denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.write_feature = function_bind(&CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);