From 94efc651d40fe62417b605e2f400fd364fb0d8ef Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sat, 25 Aug 2018 12:19:44 -0700 Subject: Cycles Denoiser: Allocate a single temporary buffer for the entire denoising process With small tiles, the repeated allocations on GPUs can actually slow down the denoising quite a lot. Allocating the buffer just once reduces rendertime for the default cube with 16x16 tiles and denoising on a mobile 1050 from 22.7sec to 14.0sec. --- intern/cycles/device/device_cpu.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'intern/cycles/device/device_cpu.cpp') diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 6867dffc727..e92bbbfa6e6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -471,9 +471,10 @@ public: int w = align_up(rect.z-rect.x, 4); int h = rect.w-rect.y; - float *blurDifference = (float*) task->nlm_state.temporary_1_ptr; - float *difference = (float*) task->nlm_state.temporary_2_ptr; - float *weightAccum = (float*) task->nlm_state.temporary_3_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *blurDifference = temporary_mem; + float *difference = temporary_mem + task->buffer.pass_stride; + float *weightAccum = temporary_mem + 2*task->buffer.pass_stride; memset(weightAccum, 0, sizeof(float)*w*h); memset((float*) out_ptr, 0, sizeof(float)*w*h); @@ -537,8 +538,9 @@ public: mem_zero(task->storage.XtWX); mem_zero(task->storage.XtWY); - float *difference = (float*) task->reconstruction_state.temporary_1_ptr; - float *blurDifference = (float*) task->reconstruction_state.temporary_2_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *difference = temporary_mem; + float *blurDifference = temporary_mem + task->buffer.pass_stride; int r = task->radius; for(int i = 0; i < (2*r+1)*(2*r+1); i++) { @@ -713,6 +715,7 @@ public: denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); denoising.render_buffer.samples = tile.sample; + denoising.buffer.gpu_temporary_mem = false; denoising.run_denoising(&tile); } -- cgit v1.2.3