Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2020-09-24 01:37:23 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2020-10-31 03:57:39 +0300
commit517ff40b124bc9d1324ccf7561a59ac51bf86602 (patch)
tree96295b1b6a11a597f7927cc61ce9371077bc7e54 /intern
parent523414dda2bf81b69b1c04e1145ac21758fa4268 (diff)
Cycles: Implement tile stealing to improve CPU+GPU rendering performance
While Cycles already supports using both CPU and GPU at the same time, there currently is a large problem with it: Since the CPU grabs one tile per thread, at the end of the render the GPU runs out of new work but the CPU still needs quite some time to finish its current times. Having smaller tiles helps somewhat, but especially OpenCL rendering tends to lose performance with smaller tiles. Therefore, this commit adds support for tile stealing: When a GPU device runs out of new tiles, it can signal the CPU to release one of its tiles. This way, at the end of the render, the GPU quickly finishes the remaining tiles instead of having to wait for the CPU. Thanks to AMD for sponsoring this work! Differential Revision: https://developer.blender.org/D9324
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cpu.cpp5
-rw-r--r--intern/cycles/device/device_memory.h8
-rw-r--r--intern/cycles/device/device_task.h1
-rw-r--r--intern/cycles/render/buffers.cpp1
-rw-r--r--intern/cycles/render/buffers.h3
-rw-r--r--intern/cycles/render/session.cpp98
-rw-r--r--intern/cycles/render/session.h13
7 files changed, 124 insertions, 5 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 23aedcd0c48..6912ac1e638 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -932,6 +932,11 @@ class CPUDevice : public Device {
break;
}
+ if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
+ tile.stealing_state = RenderTile::WAS_STOLEN;
+ break;
+ }
+
if (tile.task == RenderTile::PATH_TRACE) {
for (int y = tile.y; y < tile.y + tile.h; y++) {
for (int x = tile.x; x < tile.x + tile.w; x++) {
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 32654e62a6f..00b2aa864aa 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -450,6 +450,14 @@ template<typename T> class device_vector : public device_memory {
device_zero();
}
+ void move_device(Device *new_device)
+ {
+ copy_from_device();
+ device_free();
+ device = new_device;
+ copy_to_device();
+ }
+
protected:
size_t size(size_t width, size_t height, size_t depth)
{
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index fd380788282..f819f84eb43 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -159,6 +159,7 @@ class DeviceTask {
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
+ function<bool()> get_tile_stolen;
function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles;
function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles;
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 3607300cee6..045931ffdac 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -125,6 +125,7 @@ RenderTile::RenderTile()
buffer = 0;
buffers = NULL;
+ stealing_state = NO_STEALING;
}
/* Render Buffers */
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 425400a2c08..4ffc628bb52 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -146,6 +146,9 @@ class RenderTile {
device_ptr buffer;
int device_size;
+ typedef enum { NO_STEALING = 0, CAN_BE_STOLEN = 1, WAS_STOLEN = 2 } StealingState;
+ StealingState stealing_state;
+
RenderBuffers *buffers;
RenderTile();
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index fa3fd06ab27..1da3dd53445 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -382,6 +382,63 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
+bool Session::steal_tile(RenderTile &rtile, Device *tile_device, thread_scoped_lock &tile_lock)
+{
+ /* Devices that can get their tiles stolen don't steal tiles themselves.
+ * Additionally, if there are no stealable tiles in flight, give up here. */
+ if (tile_device->info.type == DEVICE_CPU || stealable_tiles == 0) {
+ return false;
+ }
+
+ /* Wait until no other thread is trying to steal a tile. */
+ while (tile_stealing_state != NOT_STEALING && stealable_tiles > 0) {
+ /* Someone else is currently trying to get a tile.
+ * Wait on the condition variable and try later. */
+ tile_steal_cond.wait(tile_lock);
+ }
+ /* If another thread stole the last stealable tile in the meantime, give up. */
+ if (stealable_tiles == 0) {
+ return false;
+ }
+
+ /* There are stealable tiles in flight, so signal that one should be released. */
+ tile_stealing_state = WAITING_FOR_TILE;
+ assert(success == 0);
+
+ /* Wait until a device notices the signal and releases its tile. */
+ while (tile_stealing_state != GOT_TILE && stealable_tiles > 0) {
+ tile_steal_cond.wait(tile_lock);
+ }
+ /* If the last stealable tile finished on its own, give up. */
+ if (tile_stealing_state != GOT_TILE) {
+ tile_stealing_state = NOT_STEALING;
+ return false;
+ }
+
+ /* Successfully stole a tile, now move it to the new device. */
+ rtile = stolen_tile;
+ rtile.buffers->buffer.move_device(tile_device);
+ rtile.buffer = rtile.buffers->buffer.device_pointer;
+ rtile.stealing_state = RenderTile::NO_STEALING;
+ rtile.num_samples -= (rtile.sample - rtile.start_sample);
+ rtile.start_sample = rtile.sample;
+
+ tile_stealing_state = NOT_STEALING;
+
+ /* Poke any threads which might be waiting for NOT_STEALING above. */
+ tile_steal_cond.notify_one();
+
+ return true;
+}
+
+bool Session::get_tile_stolen()
+{
+ /* If tile_stealing_state is WAITING_FOR_TILE, atomically set it to RELEASING_TILE
+ * and return true. */
+ TileStealingState expected = WAITING_FOR_TILE;
+ return tile_stealing_state.compare_exchange_weak(expected, RELEASING_TILE);
+}
+
bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types)
{
if (progress.get_cancel()) {
@@ -403,7 +460,8 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
denoising_cond.wait(tile_lock);
continue;
}
- return false;
+
+ return steal_tile(rtile, tile_device, tile_lock);
}
/* fill render tile */
@@ -419,11 +477,18 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
if (tile->state == Tile::DENOISE) {
rtile.task = RenderTile::DENOISE;
}
- else if (read_bake_tile_cb) {
- rtile.task = RenderTile::BAKE;
- }
else {
- rtile.task = RenderTile::PATH_TRACE;
+ if (tile_device->info.type == DEVICE_CPU) {
+ stealable_tiles++;
+ rtile.stealing_state = RenderTile::CAN_BE_STOLEN;
+ }
+
+ if (read_bake_tile_cb) {
+ rtile.task = RenderTile::BAKE;
+ }
+ else {
+ rtile.task = RenderTile::PATH_TRACE;
+ }
}
tile_lock.unlock();
@@ -508,6 +573,26 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
{
thread_scoped_lock tile_lock(tile_mutex);
+ if (rtile.stealing_state != RenderTile::NO_STEALING) {
+ stealable_tiles--;
+ if (rtile.stealing_state == RenderTile::WAS_STOLEN) {
+ /* If the tile is being stolen, don't release it here - the new device will pick up where
+ * the old one left off. */
+
+ assert(tile_stealing_state == RELEASING_TILE);
+ assert(rtile.sample < rtile.start_sample + rtile.num_samples);
+
+ tile_stealing_state = GOT_TILE;
+ stolen_tile = rtile;
+ tile_steal_cond.notify_all();
+ return;
+ }
+ else if (stealable_tiles == 0) {
+ /* If this was the last stealable tile, wake up any threads still waiting for one. */
+ tile_steal_cond.notify_all();
+ }
+ }
+
progress.add_finished_tile(rtile.task == RenderTile::DENOISE);
bool delete_tile;
@@ -815,6 +900,8 @@ void Session::reset_(BufferParams &buffer_params, int samples)
}
tile_manager.reset(buffer_params, samples);
+ stealable_tiles = 0;
+ tile_stealing_state = NOT_STEALING;
progress.reset_sample();
bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
@@ -1075,6 +1162,7 @@ void Session::render(bool need_denoise)
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
+ task.get_tile_stolen = function_bind(&Session::get_tile_stolen, this);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index a22bf7731ae..770d3124db7 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -193,6 +193,8 @@ class Session {
bool render_need_denoise(bool &delayed);
+ bool steal_tile(RenderTile &tile, Device *tile_device, thread_scoped_lock &tile_lock);
+ bool get_tile_stolen();
bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types);
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile, const bool need_denoise);
@@ -217,11 +219,22 @@ class Session {
thread_mutex buffers_mutex;
thread_mutex display_mutex;
thread_condition_variable denoising_cond;
+ thread_condition_variable tile_steal_cond;
double reset_time;
double last_update_time;
double last_display_time;
+ RenderTile stolen_tile;
+ typedef enum {
+ NOT_STEALING, /* There currently is no tile stealing in progress. */
+ WAITING_FOR_TILE, /* A device is waiting for another device to release a tile. */
+ RELEASING_TILE, /* A device has releasing a stealable tile. */
+ GOT_TILE /* A device has released a stealable tile, which is now stored in stolen_tile. */
+ } TileStealingState;
+ std::atomic<TileStealingState> tile_stealing_state;
+ int stealable_tiles;
+
/* progressive refine */
bool update_progressive_refine(bool cancel);
};