diff options
author | Brecht Van Lommel <brecht@blender.org> | 2020-07-09 21:01:22 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2020-07-10 18:10:05 +0300 |
commit | 93791381fec898e6f74a189e4eeb25f66029f131 (patch) | |
tree | 768406a0020de662a82983cc37c54b03da714f2a /intern/cycles/device | |
parent | e65c78cd43aa8d50612365f6b506de53d1bbfd86 (diff) |
Cleanup: reduce hardcoded numbers in denoising neighbor tiles code
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/cuda/device_cuda_impl.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 52 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 8 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 42 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 81 | ||||
-rw-r--r-- | intern/cycles/device/device_task.h | 5 | ||||
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 2 |
9 files changed, 101 insertions, 97 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index b9bbeb9a25b..0be2c322dfa 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -1760,7 +1760,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void CUDADevice::adaptive_sampling_filter(uint filter_sample, diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index a5833369a17..115b05e3911 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -439,10 +439,10 @@ class Device { { return 0; } - virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) + virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/) { } - virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) + virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/) { } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 8f68e66a1b4..2e4761562a5 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -1040,7 +1040,7 @@ class CPUDevice : public Device { denoising.render_buffer.samples = tile.sample; denoising.buffer.gpu_temporary_mem = false; - denoising.run_denoising(&tile); + denoising.run_denoising(tile); } void thread_render(DeviceTask &task) diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 89de80a5bcd..38c42d15cab 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask() tile_info_mem.free(); } -void DenoisingTask::set_render_buffer(RenderTile *rtiles) +void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors) { - for (int i = 0; i < 9; i++) { - tile_info->offsets[i] = rtiles[i].offset; - tile_info->strides[i] = rtiles[i].stride; - tile_info->buffers[i] = rtiles[i].buffer; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &rtile = neighbors.tiles[i]; + tile_info->offsets[i] = rtile.offset; + tile_info->strides[i] = rtile.stride; + tile_info->buffers[i] = rtile.buffer; } - tile_info->x[0] = rtiles[3].x; - tile_info->x[1] = rtiles[4].x; - tile_info->x[2] = rtiles[5].x; - tile_info->x[3] = rtiles[5].x + rtiles[5].w; - tile_info->y[0] = rtiles[1].y; - tile_info->y[1] = rtiles[4].y; - tile_info->y[2] = rtiles[7].y; - tile_info->y[3] = rtiles[7].y + rtiles[7].h; - - target_buffer.offset = rtiles[9].offset; - target_buffer.stride = rtiles[9].stride; - target_buffer.ptr = rtiles[9].buffer; - - if (do_prefilter && rtiles[9].buffers) { + tile_info->x[0] = neighbors.tiles[3].x; + tile_info->x[1] = neighbors.tiles[4].x; + tile_info->x[2] = neighbors.tiles[5].x; + tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w; + tile_info->y[0] = neighbors.tiles[1].y; + tile_info->y[1] = neighbors.tiles[4].y; + tile_info->y[2] = neighbors.tiles[7].y; + tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h; + + target_buffer.offset = neighbors.target.offset; + target_buffer.stride = neighbors.target.stride; + target_buffer.ptr = neighbors.target.buffer; + + if (do_prefilter && neighbors.target.buffers) { target_buffer.denoising_output_offset = - rtiles[9].buffers->params.get_denoising_prefiltered_offset(); + neighbors.target.buffers->params.get_denoising_prefiltered_offset(); } else { target_buffer.denoising_output_offset = 0; @@ -320,12 +321,11 @@ void DenoisingTask::reconstruct() functions.solve(target_buffer.ptr); } -void DenoisingTask::run_denoising(RenderTile *tile) +void DenoisingTask::run_denoising(RenderTile &tile) { - RenderTile rtiles[10]; - rtiles[4] = *tile; - functions.map_neighbor_tiles(rtiles); - set_render_buffer(rtiles); + RenderTileNeighbors neighbors(tile); + functions.map_neighbor_tiles(neighbors); + set_render_buffer(neighbors); setup_denoising_buffer(); @@ -347,7 +347,7 @@ void DenoisingTask::run_denoising(RenderTile *tile) write_buffer(); } - functions.unmap_neighbor_tiles(rtiles); + functions.unmap_neighbor_tiles(neighbors); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 4c122e981eb..2c0dc23b44a 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -102,8 +102,8 @@ class DenoisingTask { device_ptr output_ptr)> detect_outliers; function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature; - function<void(RenderTile *rtiles)> map_neighbor_tiles; - function<void(RenderTile *rtiles)> unmap_neighbor_tiles; + function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles; + function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles; } functions; /* Stores state of the current Reconstruction operation, @@ -154,7 +154,7 @@ class DenoisingTask { DenoisingTask(Device *device, const DeviceTask &task); ~DenoisingTask(); - void run_denoising(RenderTile *tile); + void run_denoising(RenderTile &tile); struct DenoiseBuffers { int pass_stride; @@ -179,7 +179,7 @@ class DenoisingTask { protected: Device *device; - void set_render_buffer(RenderTile *rtiles); + void set_render_buffer(RenderTileNeighbors &neighbors); void setup_denoising_buffer(); void prefilter_shadowing(); void prefilter_features(); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index d38cbfccb6f..9ea8782d0f0 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -584,20 +584,22 @@ class MultiDevice : public Device { return -1; } - void map_neighbor_tiles(Device *sub_device, RenderTile *tiles) + void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) { - for (int i = 0; i < 9; i++) { - if (!tiles[i].buffers) { + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &tile = neighbors.tiles[i]; + + if (!tile.buffers) { continue; } - device_vector<float> &mem = tiles[i].buffers->buffer; - tiles[i].buffer = mem.device_pointer; + device_vector<float> &mem = tile.buffers->buffer; + tile.buffer = mem.device_pointer; if (mem.device == this && matching_rendering_and_denoising_devices) { /* Skip unnecessary copies in viewport mode (buffer covers the * whole image), but still need to fix up the tile device pointer. */ - map_tile(sub_device, tiles[i]); + map_tile(sub_device, tile); continue; } @@ -610,15 +612,15 @@ class MultiDevice : public Device { * also required for the case where a CPU thread is denoising * a tile rendered on the GPU. In that case we have to avoid * overwriting the buffer being de-noised by the CPU thread. */ - if (!tiles[i].buffers->map_neighbor_copied) { - tiles[i].buffers->map_neighbor_copied = true; + if (!tile.buffers->map_neighbor_copied) { + tile.buffers->map_neighbor_copied = true; mem.copy_from_device(); } if (mem.device == this) { /* Can re-use memory if tile is already allocated on the sub device. */ - map_tile(sub_device, tiles[i]); - mem.swap_device(sub_device, mem.device_size, tiles[i].buffer); + map_tile(sub_device, tile); + mem.swap_device(sub_device, mem.device_size, tile.buffer); } else { mem.swap_device(sub_device, 0, 0); @@ -626,40 +628,42 @@ class MultiDevice : public Device { mem.copy_to_device(); - tiles[i].buffer = mem.device_pointer; - tiles[i].device_size = mem.device_size; + tile.buffer = mem.device_pointer; + tile.device_size = mem.device_size; mem.restore_device(); } } } - void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles) + void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) { - device_vector<float> &mem = tiles[9].buffers->buffer; + RenderTile &target_tile = neighbors.target; + device_vector<float> &mem = target_tile.buffers->buffer; if (mem.device == this && matching_rendering_and_denoising_devices) { return; } /* Copy denoised result back to the host. */ - mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer); + mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer); mem.copy_from_device(); mem.restore_device(); /* Copy denoised result to the original device. */ mem.copy_to_device(); - for (int i = 0; i < 9; i++) { - if (!tiles[i].buffers) { + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + RenderTile &tile = neighbors.tiles[i]; + if (!tile.buffers) { continue; } - device_vector<float> &mem = tiles[i].buffers->buffer; + device_vector<float> &mem = tile.buffers->buffer; if (mem.device != sub_device && mem.device != this) { /* Free up memory again if it was allocated for the copy above. */ - mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer); + mem.swap_device(sub_device, tile.device_size, tile.buffer); sub_device->mem_free(mem); mem.restore_device(); } diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 266222c74c5..35856f48213 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -801,19 +801,18 @@ class OptiXDevice : public CUDADevice { // 0 1 2 // 3 4 5 // 6 7 8 9 - RenderTile rtiles[10]; - rtiles[4] = rtile; - task.map_neighbor_tiles(rtiles, this); - rtile = rtiles[4]; // Tile may have been modified by mapping code + RenderTileNeighbors neighbors(rtile); + task.map_neighbor_tiles(neighbors, this); + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + rtile = center_tile; // Tile may have been modified by mapping code // Calculate size of the tile to denoise (including overlap) - int4 rect = make_int4( - rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h); + int4 rect = center_tile.bounds(); // Overlap between tiles has to be at least 64 pixels // TODO(pmours): Query this value from OptiX rect = rect_expand(rect, 64); - int4 clip_rect = make_int4( - rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h); + int4 clip_rect = neighbors.bounds(); rect = rect_clip(rect, clip_rect); int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y); int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y); @@ -834,14 +833,14 @@ class OptiXDevice : public CUDADevice { device_only_memory<float> input(this, "denoiser input"); device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE); - if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) && - (!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) && - (!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) && - (!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) && - (!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) && - (!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) && - (!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) && - (!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) { + bool contiguous_memory = true; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) { + contiguous_memory = false; + } + } + + if (contiguous_memory) { // Tiles are in continous memory, so can just subtract overlap offset input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride; // Stride covers the whole width of the image and not just a single tile @@ -856,19 +855,19 @@ class OptiXDevice : public CUDADevice { input_stride *= rect_size.x; TileInfo *tile_info = tile_info_mem.alloc(1); - for (int i = 0; i < 9; i++) { - tile_info->offsets[i] = rtiles[i].offset; - tile_info->strides[i] = rtiles[i].stride; - tile_info->buffers[i] = rtiles[i].buffer; + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + tile_info->offsets[i] = neighbors.tiles[i].offset; + tile_info->strides[i] = neighbors.tiles[i].stride; + tile_info->buffers[i] = neighbors.tiles[i].buffer; } - tile_info->x[0] = rtiles[3].x; - tile_info->x[1] = rtiles[4].x; - tile_info->x[2] = rtiles[5].x; - tile_info->x[3] = rtiles[5].x + rtiles[5].w; - tile_info->y[0] = rtiles[1].y; - tile_info->y[1] = rtiles[4].y; - tile_info->y[2] = rtiles[7].y; - tile_info->y[3] = rtiles[7].y + rtiles[7].h; + tile_info->x[0] = neighbors.tiles[3].x; + tile_info->x[1] = neighbors.tiles[4].x; + tile_info->x[2] = neighbors.tiles[5].x; + tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w; + tile_info->y[0] = neighbors.tiles[1].y; + tile_info->y[1] = neighbors.tiles[4].y; + tile_info->y[2] = neighbors.tiles[7].y; + tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h; tile_info_mem.copy_to_device(); void *args[] = { @@ -977,10 +976,10 @@ class OptiXDevice : public CUDADevice { int2 output_offset = overlap_offset; overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually # else - output_layers[0].data = rtiles[9].buffer + pixel_offset; - output_layers[0].width = rtiles[9].w; - output_layers[0].height = rtiles[9].h; - output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride; + output_layers[0].data = target_tile.buffer + pixel_offset; + output_layers[0].width = target_tile.w; + output_layers[0].height = target_tile.h; + output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride; output_layers[0].pixelStrideInBytes = pixel_stride; # endif output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3; @@ -1002,26 +1001,26 @@ class OptiXDevice : public CUDADevice { # if OPTIX_DENOISER_NO_PIXEL_STRIDE void *output_args[] = {&input_ptr, - &rtiles[9].buffer, + &target_tile.buffer, &output_offset.x, &output_offset.y, &rect_size.x, &rect_size.y, - &rtiles[9].x, - &rtiles[9].y, - &rtiles[9].w, - &rtiles[9].h, - &rtiles[9].offset, - &rtiles[9].stride, + &target_tile.x, + &target_tile.y, + &target_tile.w, + &target_tile.h, + &target_tile.offset, + &target_tile.stride, &task.pass_stride, &rtile.sample}; launch_filter_kernel( - "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args); + "kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args); # endif check_result_cuda_ret(cuStreamSynchronize(0)); - task.unmap_neighbor_tiles(rtiles, this); + task.unmap_neighbor_tiles(neighbors, this); } else { // Run CUDA denoising kernels diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 600973b8100..21da55d50d4 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN class Device; class RenderBuffers; class RenderTile; +class RenderTileNeighbors; class Tile; enum DenoiserType { @@ -150,8 +151,8 @@ class DeviceTask { function<void(RenderTile &)> update_tile_sample; function<void(RenderTile &)> release_tile; function<bool()> get_cancel; - function<void(RenderTile *, Device *)> map_neighbor_tiles; - function<void(RenderTile *, Device *)> unmap_neighbor_tiles; + function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles; + function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles; uint tile_types; DenoiseParams denoising; diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 8c94815b193..e851749949d 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -1850,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising) denoising.render_buffer.samples = rtile.sample; denoising.buffer.gpu_temporary_mem = true; - denoising.run_denoising(&rtile); + denoising.run_denoising(rtile); } void OpenCLDevice::shader(DeviceTask &task) |