Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2020-07-09 21:01:22 +0300
committerBrecht Van Lommel <brecht@blender.org>2020-07-10 18:10:05 +0300
commit93791381fec898e6f74a189e4eeb25f66029f131 (patch)
tree768406a0020de662a82983cc37c54b03da714f2a /intern/cycles/device
parente65c78cd43aa8d50612365f6b506de53d1bbfd86 (diff)
Cleanup: reduce hardcoded numbers in denoising neighbor tiles code
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp2
-rw-r--r--intern/cycles/device/device.h4
-rw-r--r--intern/cycles/device/device_cpu.cpp2
-rw-r--r--intern/cycles/device/device_denoising.cpp52
-rw-r--r--intern/cycles/device/device_denoising.h8
-rw-r--r--intern/cycles/device/device_multi.cpp42
-rw-r--r--intern/cycles/device/device_optix.cpp81
-rw-r--r--intern/cycles/device/device_task.h5
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp2
9 files changed, 101 insertions, 97 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index b9bbeb9a25b..0be2c322dfa 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -1760,7 +1760,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void CUDADevice::adaptive_sampling_filter(uint filter_sample,
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index a5833369a17..115b05e3911 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -439,10 +439,10 @@ class Device {
{
return 0;
}
- virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
+ virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
- virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
+ virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 8f68e66a1b4..2e4761562a5 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -1040,7 +1040,7 @@ class CPUDevice : public Device {
denoising.render_buffer.samples = tile.sample;
denoising.buffer.gpu_temporary_mem = false;
- denoising.run_denoising(&tile);
+ denoising.run_denoising(tile);
}
void thread_render(DeviceTask &task)
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 89de80a5bcd..38c42d15cab 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask()
tile_info_mem.free();
}
-void DenoisingTask::set_render_buffer(RenderTile *rtiles)
+void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
{
- for (int i = 0; i < 9; i++) {
- tile_info->offsets[i] = rtiles[i].offset;
- tile_info->strides[i] = rtiles[i].stride;
- tile_info->buffers[i] = rtiles[i].buffer;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &rtile = neighbors.tiles[i];
+ tile_info->offsets[i] = rtile.offset;
+ tile_info->strides[i] = rtile.stride;
+ tile_info->buffers[i] = rtile.buffer;
}
- tile_info->x[0] = rtiles[3].x;
- tile_info->x[1] = rtiles[4].x;
- tile_info->x[2] = rtiles[5].x;
- tile_info->x[3] = rtiles[5].x + rtiles[5].w;
- tile_info->y[0] = rtiles[1].y;
- tile_info->y[1] = rtiles[4].y;
- tile_info->y[2] = rtiles[7].y;
- tile_info->y[3] = rtiles[7].y + rtiles[7].h;
-
- target_buffer.offset = rtiles[9].offset;
- target_buffer.stride = rtiles[9].stride;
- target_buffer.ptr = rtiles[9].buffer;
-
- if (do_prefilter && rtiles[9].buffers) {
+ tile_info->x[0] = neighbors.tiles[3].x;
+ tile_info->x[1] = neighbors.tiles[4].x;
+ tile_info->x[2] = neighbors.tiles[5].x;
+ tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+ tile_info->y[0] = neighbors.tiles[1].y;
+ tile_info->y[1] = neighbors.tiles[4].y;
+ tile_info->y[2] = neighbors.tiles[7].y;
+ tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
+
+ target_buffer.offset = neighbors.target.offset;
+ target_buffer.stride = neighbors.target.stride;
+ target_buffer.ptr = neighbors.target.buffer;
+
+ if (do_prefilter && neighbors.target.buffers) {
target_buffer.denoising_output_offset =
- rtiles[9].buffers->params.get_denoising_prefiltered_offset();
+ neighbors.target.buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
@@ -320,12 +321,11 @@ void DenoisingTask::reconstruct()
functions.solve(target_buffer.ptr);
}
-void DenoisingTask::run_denoising(RenderTile *tile)
+void DenoisingTask::run_denoising(RenderTile &tile)
{
- RenderTile rtiles[10];
- rtiles[4] = *tile;
- functions.map_neighbor_tiles(rtiles);
- set_render_buffer(rtiles);
+ RenderTileNeighbors neighbors(tile);
+ functions.map_neighbor_tiles(neighbors);
+ set_render_buffer(neighbors);
setup_denoising_buffer();
@@ -347,7 +347,7 @@ void DenoisingTask::run_denoising(RenderTile *tile)
write_buffer();
}
- functions.unmap_neighbor_tiles(rtiles);
+ functions.unmap_neighbor_tiles(neighbors);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 4c122e981eb..2c0dc23b44a 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -102,8 +102,8 @@ class DenoisingTask {
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
- function<void(RenderTile *rtiles)> map_neighbor_tiles;
- function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
+ function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
+ function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation,
@@ -154,7 +154,7 @@ class DenoisingTask {
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
- void run_denoising(RenderTile *tile);
+ void run_denoising(RenderTile &tile);
struct DenoiseBuffers {
int pass_stride;
@@ -179,7 +179,7 @@ class DenoisingTask {
protected:
Device *device;
- void set_render_buffer(RenderTile *rtiles);
+ void set_render_buffer(RenderTileNeighbors &neighbors);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index d38cbfccb6f..9ea8782d0f0 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -584,20 +584,22 @@ class MultiDevice : public Device {
return -1;
}
- void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
+ void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
- for (int i = 0; i < 9; i++) {
- if (!tiles[i].buffers) {
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &tile = neighbors.tiles[i];
+
+ if (!tile.buffers) {
continue;
}
- device_vector<float> &mem = tiles[i].buffers->buffer;
- tiles[i].buffer = mem.device_pointer;
+ device_vector<float> &mem = tile.buffers->buffer;
+ tile.buffer = mem.device_pointer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
/* Skip unnecessary copies in viewport mode (buffer covers the
* whole image), but still need to fix up the tile device pointer. */
- map_tile(sub_device, tiles[i]);
+ map_tile(sub_device, tile);
continue;
}
@@ -610,15 +612,15 @@ class MultiDevice : public Device {
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being de-noised by the CPU thread. */
- if (!tiles[i].buffers->map_neighbor_copied) {
- tiles[i].buffers->map_neighbor_copied = true;
+ if (!tile.buffers->map_neighbor_copied) {
+ tile.buffers->map_neighbor_copied = true;
mem.copy_from_device();
}
if (mem.device == this) {
/* Can re-use memory if tile is already allocated on the sub device. */
- map_tile(sub_device, tiles[i]);
- mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
+ map_tile(sub_device, tile);
+ mem.swap_device(sub_device, mem.device_size, tile.buffer);
}
else {
mem.swap_device(sub_device, 0, 0);
@@ -626,40 +628,42 @@ class MultiDevice : public Device {
mem.copy_to_device();
- tiles[i].buffer = mem.device_pointer;
- tiles[i].device_size = mem.device_size;
+ tile.buffer = mem.device_pointer;
+ tile.device_size = mem.device_size;
mem.restore_device();
}
}
}
- void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
+ void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
- device_vector<float> &mem = tiles[9].buffers->buffer;
+ RenderTile &target_tile = neighbors.target;
+ device_vector<float> &mem = target_tile.buffers->buffer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
return;
}
/* Copy denoised result back to the host. */
- mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
+ mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
mem.copy_from_device();
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
- for (int i = 0; i < 9; i++) {
- if (!tiles[i].buffers) {
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ RenderTile &tile = neighbors.tiles[i];
+ if (!tile.buffers) {
continue;
}
- device_vector<float> &mem = tiles[i].buffers->buffer;
+ device_vector<float> &mem = tile.buffers->buffer;
if (mem.device != sub_device && mem.device != this) {
/* Free up memory again if it was allocated for the copy above. */
- mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
+ mem.swap_device(sub_device, tile.device_size, tile.buffer);
sub_device->mem_free(mem);
mem.restore_device();
}
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 266222c74c5..35856f48213 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -801,19 +801,18 @@ class OptiXDevice : public CUDADevice {
// 0 1 2
// 3 4 5
// 6 7 8 9
- RenderTile rtiles[10];
- rtiles[4] = rtile;
- task.map_neighbor_tiles(rtiles, this);
- rtile = rtiles[4]; // Tile may have been modified by mapping code
+ RenderTileNeighbors neighbors(rtile);
+ task.map_neighbor_tiles(neighbors, this);
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ RenderTile &target_tile = neighbors.target;
+ rtile = center_tile; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
- int4 rect = make_int4(
- rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h);
+ int4 rect = center_tile.bounds();
// Overlap between tiles has to be at least 64 pixels
// TODO(pmours): Query this value from OptiX
rect = rect_expand(rect, 64);
- int4 clip_rect = make_int4(
- rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h);
+ int4 clip_rect = neighbors.bounds();
rect = rect_clip(rect, clip_rect);
int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y);
@@ -834,14 +833,14 @@ class OptiXDevice : public CUDADevice {
device_only_memory<float> input(this, "denoiser input");
device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE);
- if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) &&
- (!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) &&
- (!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) &&
- (!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) &&
- (!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) &&
- (!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) &&
- (!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) &&
- (!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) {
+ bool contiguous_memory = true;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) {
+ contiguous_memory = false;
+ }
+ }
+
+ if (contiguous_memory) {
// Tiles are in continous memory, so can just subtract overlap offset
input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride;
// Stride covers the whole width of the image and not just a single tile
@@ -856,19 +855,19 @@ class OptiXDevice : public CUDADevice {
input_stride *= rect_size.x;
TileInfo *tile_info = tile_info_mem.alloc(1);
- for (int i = 0; i < 9; i++) {
- tile_info->offsets[i] = rtiles[i].offset;
- tile_info->strides[i] = rtiles[i].stride;
- tile_info->buffers[i] = rtiles[i].buffer;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ tile_info->offsets[i] = neighbors.tiles[i].offset;
+ tile_info->strides[i] = neighbors.tiles[i].stride;
+ tile_info->buffers[i] = neighbors.tiles[i].buffer;
}
- tile_info->x[0] = rtiles[3].x;
- tile_info->x[1] = rtiles[4].x;
- tile_info->x[2] = rtiles[5].x;
- tile_info->x[3] = rtiles[5].x + rtiles[5].w;
- tile_info->y[0] = rtiles[1].y;
- tile_info->y[1] = rtiles[4].y;
- tile_info->y[2] = rtiles[7].y;
- tile_info->y[3] = rtiles[7].y + rtiles[7].h;
+ tile_info->x[0] = neighbors.tiles[3].x;
+ tile_info->x[1] = neighbors.tiles[4].x;
+ tile_info->x[2] = neighbors.tiles[5].x;
+ tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+ tile_info->y[0] = neighbors.tiles[1].y;
+ tile_info->y[1] = neighbors.tiles[4].y;
+ tile_info->y[2] = neighbors.tiles[7].y;
+ tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
tile_info_mem.copy_to_device();
void *args[] = {
@@ -977,10 +976,10 @@ class OptiXDevice : public CUDADevice {
int2 output_offset = overlap_offset;
overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually
# else
- output_layers[0].data = rtiles[9].buffer + pixel_offset;
- output_layers[0].width = rtiles[9].w;
- output_layers[0].height = rtiles[9].h;
- output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride;
+ output_layers[0].data = target_tile.buffer + pixel_offset;
+ output_layers[0].width = target_tile.w;
+ output_layers[0].height = target_tile.h;
+ output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride;
output_layers[0].pixelStrideInBytes = pixel_stride;
# endif
output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
@@ -1002,26 +1001,26 @@ class OptiXDevice : public CUDADevice {
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
void *output_args[] = {&input_ptr,
- &rtiles[9].buffer,
+ &target_tile.buffer,
&output_offset.x,
&output_offset.y,
&rect_size.x,
&rect_size.y,
- &rtiles[9].x,
- &rtiles[9].y,
- &rtiles[9].w,
- &rtiles[9].h,
- &rtiles[9].offset,
- &rtiles[9].stride,
+ &target_tile.x,
+ &target_tile.y,
+ &target_tile.w,
+ &target_tile.h,
+ &target_tile.offset,
+ &target_tile.stride,
&task.pass_stride,
&rtile.sample};
launch_filter_kernel(
- "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
+ "kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args);
# endif
check_result_cuda_ret(cuStreamSynchronize(0));
- task.unmap_neighbor_tiles(rtiles, this);
+ task.unmap_neighbor_tiles(neighbors, this);
}
else {
// Run CUDA denoising kernels
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 600973b8100..21da55d50d4 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class RenderBuffers;
class RenderTile;
+class RenderTileNeighbors;
class Tile;
enum DenoiserType {
@@ -150,8 +151,8 @@ class DeviceTask {
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
- function<void(RenderTile *, Device *)> map_neighbor_tiles;
- function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
+ function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles;
+ function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles;
uint tile_types;
DenoiseParams denoising;
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 8c94815b193..e851749949d 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -1850,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void OpenCLDevice::shader(DeviceTask &task)