diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-06-06 22:54:06 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-06-08 12:35:27 +0300 |
commit | cf086d6273fa613939a43b38fcb5f03b4c81d18f (patch) | |
tree | a99f3ac0fe89429e8b9b80f7801ceabae111f304 | |
parent | e1805299d2f43f8ab473a979bafc7a8b57d9270b (diff) |
Cycles: Adjust split kernel tile updating logic to make rendering a bit fastertemp-cycles-opencl-staging
This makes tiles update less frequently and causes there to be more samples
in each batch making rendering faster. This helps a bit with the
slowdown seen from D2703.
I don't really like tiles not updating as much, it feels much less
responsive, maybe theres another way to go about it?
Timings by nirved: https://hastebin.com/ifanihewum.css
-rw-r--r-- | intern/cycles/app/cycles_server.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/app/cycles_standalone.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/blender/blender_session.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 36 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/session.h | 2 |
6 files changed, 37 insertions, 9 deletions
diff --git a/intern/cycles/app/cycles_server.cpp b/intern/cycles/app/cycles_server.cpp index e65b9d769e4..042550cf393 100644 --- a/intern/cycles/app/cycles_server.cpp +++ b/intern/cycles/app/cycles_server.cpp @@ -39,6 +39,8 @@ int main(int argc, const char **argv) bool list = false, debug = false; int threads = 0, verbosity = 1; + Session::headless = true; + vector<DeviceType>& types = Device::available_types(); foreach(DeviceType type, types) { diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 0cd249f0d84..a0c49f8b1d5 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -482,6 +482,8 @@ int main(int argc, const char **argv) path_init(); options_parse(argc, argv); + Session::headless = options.session_params.background; + #ifdef WITH_CYCLES_STANDALONE_GUI if(options.session_params.background) { #endif diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index c6a59577507..74829128f12 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -133,6 +133,8 @@ void BlenderSession::create_session() scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4); scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4); + Session::headless = headless; + /* create session */ session = new Session(session_params); session->scene = scene; diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index d2b3a89fa98..c785fd0cf98 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -19,12 +19,14 @@ #include "kernel/kernel_types.h" #include "kernel/split/kernel_split_data_types.h" +#include "render/session.h" + #include "util/util_logging.h" #include "util/util_time.h" CCL_NAMESPACE_BEGIN -static const double alpha = 0.1; /* alpha for rolling average */ +static const double alpha = 0.4; /* alpha for rolling average */ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) { @@ -195,8 +197,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, tile.sample = tile.start_sample; - /* for exponential increase between tile updates */ - int time_multiplier = 1; + /* time between tile updates */ + double time_between_updates = 1.5; while(tile.sample < tile.start_sample + tile.num_samples) { /* to keep track of how long it takes to run a number of samples */ @@ -206,12 +208,21 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, const int initial_num_samples = 1; /* approx number of samples per second */ int samples_per_second = (avg_time_per_sample > 0.0) ? - int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples; + int(time_between_updates / avg_time_per_sample) + 1 : initial_num_samples; RenderTile subtile = tile; subtile.start_sample = tile.sample; subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample); + /* if running headless render all samples at once */ + if(Session::headless) { + VLOG(3) << "Headless render, rendering all samples in tile"; + subtile.num_samples = tile.start_sample + tile.num_samples - tile.sample; + } + + VLOG(3) << "Starting batch of " << subtile.num_samples << " samples"; + VLOG(3) << "Target completion time: " << time_between_updates; + if(device->have_error()) { return false; } @@ -244,7 +255,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, while(activeRaysAvailable) { /* Do path-iteration in host [Enqueue Path-iteration kernels. */ - for(int PathIter = 0; PathIter < 16; PathIter++) { + for(int PathIter = 0; PathIter < 1; PathIter++) { ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); @@ -269,7 +280,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* Wait up to twice as many seconds for current samples to finish * to avoid artifacts in render result from ending too soon. */ - cancel_time = time_dt() + 2.0 * time_multiplier; + cancel_time = time_dt() + 1.5 * time_between_updates; } if(time_dt() > cancel_time) { @@ -301,9 +312,16 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, } } - double time_per_sample = ((time_dt()-start_time) / subtile.num_samples); + double time_for_batch = time_dt() - start_time; + double time_per_sample = time_for_batch / subtile.num_samples; + + VLOG(3) << "Time rendering batch: " << time_for_batch; + VLOG(3) << "Time per sample: " << time_per_sample; - if(avg_time_per_sample == 0.0) { + if((tile.sample == tile.start_sample) && (subtile.num_samples != tile.start_sample + tile.num_samples - tile.sample)) { + /* dont update avg_time_per_sample for first sample batch in tile as doing so may skew the result */ + } + else if(avg_time_per_sample == 0.0) { /* start rolling average */ avg_time_per_sample = time_per_sample; } @@ -316,7 +334,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, tile.sample += subtile.num_samples; task->update_progress(&tile, tile.w*tile.h*subtile.num_samples); - time_multiplier = min(time_multiplier << 1, 10); + time_between_updates = 10; if(task->get_cancel()) { return true; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 08909943c49..c38843540e6 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -38,6 +38,8 @@ CCL_NAMESPACE_BEGIN +bool Session::headless = false; + /* Note about preserve_tile_device option for tile manager: * progressive refine and viewport rendering does requires tiles to * always be allocated for the same device diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 9f8bb8c42fa..6f1cbf2789c 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -161,6 +161,8 @@ public: * (for example, when rendering with unlimited samples). */ float get_progress(); + static bool headless; + protected: struct DelayedReset { thread_mutex mutex; |