diff options
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 116 |
1 files changed, 81 insertions, 35 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index af8db5b75b8..ee3a3ddea64 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -951,12 +951,13 @@ class CPUDevice : public Device { void denoise_openimagedenoise_buffer(DeviceTask &task, float *buffer, - size_t offset, - size_t stride, - size_t x, - size_t y, - size_t w, - size_t h) + const size_t offset, + const size_t stride, + const size_t x, + const size_t y, + const size_t w, + const size_t h, + const float scale) { #ifdef WITH_OPENIMAGEDENOISE assert(openimagedenoise_supported()); @@ -982,31 +983,65 @@ class CPUDevice : public Device { } /* Set images with appropriate stride for our interleaved pass storage. */ - const struct { + struct { const char *name; - int offset; - } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR}, - {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL}, - {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO}, - {"output", 0}, + const int offset; + const bool scale; + const bool use; + array<float> scaled_buffer; + } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR, false, true}, + {"albedo", + task.pass_denoising_data + DENOISING_PASS_ALBEDO, + true, + task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO}, + {"normal", + task.pass_denoising_data + DENOISING_PASS_NORMAL, + true, + task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO_NORMAL}, + {"output", 0, false, true}, { NULL, 0 }}; for (int i = 0; passes[i].name; i++) { + if (!passes[i].use) { + continue; + } + const int64_t pixel_offset = offset + x + y * stride; - const int64_t buffer_offset = (pixel_offset * task.pass_stride + passes[i].offset) * - sizeof(float); - const int64_t pixel_stride = task.pass_stride * sizeof(float); + const int64_t buffer_offset = (pixel_offset * task.pass_stride + passes[i].offset); + const int64_t pixel_stride = task.pass_stride; const int64_t row_stride = stride * pixel_stride; - oidn_filter.setImage(passes[i].name, - (char *)buffer + buffer_offset, - oidn::Format::Float3, - w, - h, - 0, - pixel_stride, - row_stride); + if (passes[i].scale && scale != 1.0f) { + /* Normalize albedo and normal passes as they are scaled by the number of samples. + * For the color passes OIDN will perform auto-exposure making it unnecessary. */ + array<float> &scaled_buffer = passes[i].scaled_buffer; + scaled_buffer.resize(w * h * 3); + + for (int y = 0; y < h; y++) { + const float *pass_row = buffer + buffer_offset + y * row_stride; + float *scaled_row = scaled_buffer.data() + y * w * 3; + + for (int x = 0; x < w; x++) { + scaled_row[x * 3 + 0] = pass_row[x * pixel_stride + 0] * scale; + scaled_row[x * 3 + 1] = pass_row[x * pixel_stride + 1] * scale; + scaled_row[x * 3 + 2] = pass_row[x * pixel_stride + 2] * scale; + } + } + + oidn_filter.setImage( + passes[i].name, scaled_buffer.data(), oidn::Format::Float3, w, h, 0, 0, 0); + } + else { + oidn_filter.setImage(passes[i].name, + buffer + buffer_offset, + oidn::Format::Float3, + w, + h, + 0, + pixel_stride * sizeof(float), + row_stride * sizeof(float)); + } } /* Execute filter. */ @@ -1021,6 +1056,7 @@ class CPUDevice : public Device { (void)y; (void)w; (void)h; + (void)scale; #endif } @@ -1037,7 +1073,8 @@ class CPUDevice : public Device { rtile.x, rtile.y, rtile.w, - rtile.h); + rtile.h, + 1.0f / rtile.sample); /* todo: it may be possible to avoid this copy, but we have to ensure that * when other code copies data from the device it doesn't overwrite the @@ -1047,6 +1084,9 @@ class CPUDevice : public Device { else { /* Per-tile denoising. */ rtile.sample = rtile.start_sample + rtile.num_samples; + const float scale = 1.0f / rtile.sample; + const float invscale = rtile.sample; + const size_t pass_stride = task.pass_stride; /* Map neighboring tiles into one buffer for denoising. */ RenderTileNeighbors neighbors(rtile); @@ -1075,22 +1115,24 @@ class CPUDevice : public Device { const int ymax = min(ntile.y + ntile.h, rect.w); const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride; - const float *tile_buffer = (float *)ntile.buffer + tile_offset * task.pass_stride; + const float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride; const size_t merged_stride = rect_size.x; const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; - float *merged_buffer = merged.data() + merged_offset * task.pass_stride; + float *merged_buffer = merged.data() + merged_offset * pass_stride; for (int y = ymin; y < ymax; y++) { - memcpy(merged_buffer, tile_buffer, sizeof(float) * task.pass_stride * (xmax - xmin)); - tile_buffer += ntile.stride * task.pass_stride; - merged_buffer += merged_stride * task.pass_stride; + for (int x = 0; x < pass_stride * (xmax - xmin); x++) { + merged_buffer[x] = tile_buffer[x] * scale; + } + tile_buffer += ntile.stride * pass_stride; + merged_buffer += merged_stride * pass_stride; } } /* Denoise */ denoise_openimagedenoise_buffer( - task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y); + task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y, 1.0f); /* Copy back result from merged buffer. */ RenderTile &ntile = neighbors.target; @@ -1101,16 +1143,20 @@ class CPUDevice : public Device { const int ymax = min(ntile.y + ntile.h, rect.w); const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride; - float *tile_buffer = (float *)ntile.buffer + tile_offset * task.pass_stride; + float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride; const size_t merged_stride = rect_size.x; const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; - const float *merged_buffer = merged.data() + merged_offset * task.pass_stride; + const float *merged_buffer = merged.data() + merged_offset * pass_stride; for (int y = ymin; y < ymax; y++) { - memcpy(tile_buffer, merged_buffer, sizeof(float) * task.pass_stride * (xmax - xmin)); - tile_buffer += ntile.stride * task.pass_stride; - merged_buffer += merged_stride * task.pass_stride; + for (int x = 0; x < pass_stride * (xmax - xmin); x += pass_stride) { + tile_buffer[x + 0] = merged_buffer[x + 0] * invscale; + tile_buffer[x + 1] = merged_buffer[x + 1] * invscale; + tile_buffer[x + 2] = merged_buffer[x + 2] * invscale; + } + tile_buffer += ntile.stride * pass_stride; + merged_buffer += merged_stride * pass_stride; } } |