diff options
author | Lukas Stockner <lukas.stockner@freenet.de> | 2018-08-25 22:19:44 +0300 |
---|---|---|
committer | Lukas Stockner <lukas.stockner@freenet.de> | 2018-08-25 22:23:52 +0300 |
commit | 94efc651d40fe62417b605e2f400fd364fb0d8ef (patch) | |
tree | 27e02bd43309f6f4a213ee94fcbfb75828cf1d50 /intern/cycles/device/device_cpu.cpp | |
parent | 60a5ba265cbda5293a21eaeab1d65ba155d66e03 (diff) |
Cycles Denoiser: Allocate a single temporary buffer for the entire denoising process
With small tiles, the repeated allocations on GPUs can actually slow down the denoising quite a lot.
Allocating the buffer just once reduces rendertime for the default cube with 16x16 tiles and denoising on a mobile 1050 from 22.7sec to 14.0sec.
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 6867dffc727..e92bbbfa6e6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -471,9 +471,10 @@ public: int w = align_up(rect.z-rect.x, 4); int h = rect.w-rect.y; - float *blurDifference = (float*) task->nlm_state.temporary_1_ptr; - float *difference = (float*) task->nlm_state.temporary_2_ptr; - float *weightAccum = (float*) task->nlm_state.temporary_3_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *blurDifference = temporary_mem; + float *difference = temporary_mem + task->buffer.pass_stride; + float *weightAccum = temporary_mem + 2*task->buffer.pass_stride; memset(weightAccum, 0, sizeof(float)*w*h); memset((float*) out_ptr, 0, sizeof(float)*w*h); @@ -537,8 +538,9 @@ public: mem_zero(task->storage.XtWX); mem_zero(task->storage.XtWY); - float *difference = (float*) task->reconstruction_state.temporary_1_ptr; - float *blurDifference = (float*) task->reconstruction_state.temporary_2_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *difference = temporary_mem; + float *blurDifference = temporary_mem + task->buffer.pass_stride; int r = task->radius; for(int i = 0; i < (2*r+1)*(2*r+1); i++) { @@ -713,6 +715,7 @@ public: denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); denoising.render_buffer.samples = tile.sample; + denoising.buffer.gpu_temporary_mem = false; denoising.run_denoising(&tile); } |