Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2019-05-10 22:39:58 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2020-05-15 21:25:24 +0300
commitd9773edaa394f61393f9c8b80275e62f74306097 (patch)
tree232b771b341e98a5403af16791bdcca133cb1edd /intern/cycles
parent3ff8ca60e94db2584ca76e323a54c738e677d5f8 (diff)
Cycles: code refactor to bake using regular render session and tiles
There should be no user visible change from this, except that tile size now affects performance. The goal here is to simplify bake denoising in D3099, letting it reuse more denoising tiles and pass code. A lot of code is now shared with regular rendering, with the two main differences being that we read some render result passes from the bake API when starting to render a tile, and call the bake kernel instead of the path trace kernel. With this kind of design where Cycles asks for tiles from the bake API, it should eventually be easier to reduce memory usage, show tiles as they are baked, or bake multiple passes at once, though there's still quite some work needed for that. Reviewers: #cycles Subscribers: monio, wmatyjewicz, lukasstockner97, michaelknubben Differential Revision: https://developer.blender.org/D3108
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/addon/__init__.py4
-rw-r--r--intern/cycles/blender/addon/engine.py4
-rw-r--r--intern/cycles/blender/blender_python.cpp28
-rw-r--r--intern/cycles/blender/blender_session.cpp168
-rw-r--r--intern/cycles/blender/blender_session.h13
-rw-r--r--intern/cycles/blender/blender_sync.cpp3
-rw-r--r--intern/cycles/device/cuda/device_cuda.h2
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp52
-rw-r--r--intern/cycles/device/device_cpu.cpp28
-rw-r--r--intern/cycles/device/opencl/device_opencl.h1
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp53
-rw-r--r--intern/cycles/kernel/kernel_bake.h152
-rw-r--r--intern/cycles/kernel/kernel_types.h17
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h19
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu11
-rw-r--r--intern/cycles/render/bake.cpp305
-rw-r--r--intern/cycles/render/bake.h52
-rw-r--r--intern/cycles/render/buffers.cpp34
-rw-r--r--intern/cycles/render/buffers.h3
-rw-r--r--intern/cycles/render/film.cpp17
-rw-r--r--intern/cycles/render/session.cpp32
-rw-r--r--intern/cycles/render/session.h1
23 files changed, 456 insertions, 546 deletions
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 3d2a52d0cf6..3ab352e52a2 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -82,8 +82,8 @@ class CyclesRender(bpy.types.RenderEngine):
def render(self, depsgraph):
engine.render(self, depsgraph)
- def bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
- engine.bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result)
+ def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
+ engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)
# viewport render
def view_update(self, context, depsgraph):
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index a1b063430f5..e7ea5e7a1f6 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -168,11 +168,11 @@ def render(engine, depsgraph):
_cycles.render(engine.session, depsgraph.as_pointer())
-def bake(engine, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
+def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
import _cycles
session = getattr(engine, "session", None)
if session is not None:
- _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, object_id, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer())
+ _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, width, height)
def reset(engine, data, depsgraph):
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 8c7c0bc1daa..79c16856462 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -298,22 +298,18 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydepsgraph, *pyobject;
- PyObject *pypixel_array, *pyresult;
const char *pass_type;
- int num_pixels, depth, object_id, pass_filter;
+ int pass_filter, width, height;
if (!PyArg_ParseTuple(args,
- "OOOsiiOiiO",
+ "OOOsiii",
&pysession,
&pydepsgraph,
&pyobject,
&pass_type,
&pass_filter,
- &object_id,
- &pypixel_array,
- &num_pixels,
- &depth,
- &pyresult))
+ &width,
+ &height))
return NULL;
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
@@ -326,23 +322,9 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
BL::Object b_object(objectptr);
- void *b_result = PyLong_AsVoidPtr(pyresult);
-
- PointerRNA bakepixelptr;
- RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr);
- BL::BakePixel b_bake_pixel(bakepixelptr);
-
python_thread_state_save(&session->python_thread_state);
- session->bake(b_depsgraph,
- b_object,
- pass_type,
- pass_filter,
- object_id,
- b_bake_pixel,
- (size_t)num_pixels,
- depth,
- (float *)b_result);
+ session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
python_thread_state_restore(&session->python_thread_state);
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 5ea96d6bdfd..31b09695632 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -247,9 +247,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
void BlenderSession::free_session()
{
- if (sync)
- delete sync;
-
+ delete sync;
delete session;
}
@@ -317,6 +315,7 @@ static void end_render_result(BL::RenderEngine &b_engine,
void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
+ bool do_read_only,
bool highlight)
{
int x = rtile.x - session->tile_manager.params.full_x;
@@ -342,7 +341,23 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
BL::RenderLayer b_rlay = *b_single_rlay;
- if (do_update_only) {
+ if (do_read_only) {
+ /* copy each pass */
+ BL::RenderLayer::passes_iterator b_iter;
+
+ for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
+ BL::RenderPass b_pass(*b_iter);
+
+ /* find matching pass type */
+ PassType pass_type = BlenderSync::get_pass_type(b_pass);
+ int components = b_pass.channels();
+
+ rtile.buffers->set_pass_rect(pass_type, components, (float *)b_pass.rect());
+ }
+
+ end_render_result(b_engine, b_rr, false, false, false);
+ }
+ else if (do_update_only) {
/* Sample would be zero at initial tile update, which is only needed
* to tag tile form blender side as IN PROGRESS for proper highlight
* no buffers should be sent to blender yet. For denoise we also
@@ -362,9 +377,14 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
}
}
+void BlenderSession::read_render_tile(RenderTile &rtile)
+{
+ do_write_update_render_tile(rtile, false, true, false);
+}
+
void BlenderSession::write_render_tile(RenderTile &rtile)
{
- do_write_update_render_tile(rtile, false, false);
+ do_write_update_render_tile(rtile, false, false, false);
}
void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
@@ -374,9 +394,9 @@ void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
* would need to be investigated a bit further, but for now shall be fine
*/
if (!b_engine.is_preview())
- do_write_update_render_tile(rtile, true, highlight);
+ do_write_update_render_tile(rtile, true, false, highlight);
else
- do_write_update_render_tile(rtile, false, false);
+ do_write_update_render_tile(rtile, false, false, false);
}
static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@@ -593,25 +613,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
#endif
}
-static void populate_bake_data(BakeData *data,
- const int object_id,
- BL::BakePixel &pixel_array,
- const int num_pixels)
-{
- BL::BakePixel bp = pixel_array;
-
- int i;
- for (i = 0; i < num_pixels; i++) {
- if (bp.object_id() == object_id) {
- data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
- }
- else {
- data->set_null(i);
- }
- bp = bp.next();
- }
-}
-
static int bake_pass_filter_get(const int pass_filter)
{
int flag = BAKE_FILTER_NONE;
@@ -642,43 +643,26 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
BL::Object &b_object,
const string &pass_type,
const int pass_filter,
- const int object_id,
- BL::BakePixel &pixel_array,
- const size_t num_pixels,
- const int /*depth*/,
- float result[])
+ const int bake_width,
+ const int bake_height)
{
b_depsgraph = b_depsgraph_;
ShaderEvalType shader_type = get_shader_type(pass_type);
-
- /* Set baking flag in advance, so kernel loading can check if we need
- * any baking capabilities.
- */
- scene->bake_manager->set_baking(true);
-
- /* ensure kernels are loaded before we do any scene updates */
- session->load_kernels();
-
- if (shader_type == SHADER_EVAL_UV) {
- /* force UV to be available */
- Pass::add(PASS_UV, scene->film->passes);
- }
-
int bake_pass_filter = bake_pass_filter_get(pass_filter);
- bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
- /* force use_light_pass to be true if we bake more than just colors */
- if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
- Pass::add(PASS_LIGHT, scene->film->passes);
- }
+ /* Initialize bake manager, before we load the baking kernels. */
+ scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);
- /* create device and update scene */
- scene->film->tag_update(scene);
- scene->integrator->tag_update(scene);
+ /* Passes are identified by name, so in order to return the combined pass we need to set the
+ * name. */
+ Pass::add(PASS_COMBINED, scene->film->passes, "Combined");
+
+ session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
+ session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
if (!session->progress.get_cancel()) {
- /* update scene */
+ /* Sync scene. */
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_camera(b_render, b_camera_override, width, height, "");
sync->sync_data(
@@ -686,75 +670,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
builtin_images_load();
}
- BakeData *bake_data = NULL;
+ /* Object might have been disabled for rendering or excluded in some
+ * other way, in that case Blender will report a warning afterwards. */
+ bool object_found = false;
+ foreach (Object *ob, scene->objects) {
+ if (ob->name == b_object.name()) {
+ object_found = true;
+ break;
+ }
+ }
- if (!session->progress.get_cancel()) {
- /* get buffer parameters */
+ if (object_found && !session->progress.get_cancel()) {
+ /* Get session and buffer parameters. */
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ session_params.progressive_refine = false;
- scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
+ BufferParams buffer_params;
+ buffer_params.width = bake_width;
+ buffer_params.height = bake_height;
+ buffer_params.passes = scene->film->passes;
- /* set number of samples */
+ /* Update session. */
session->tile_manager.set_samples(session_params.samples);
session->reset(buffer_params, session_params.samples);
- session->update_scene();
-
- /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
- size_t object_index = OBJECT_NONE;
- int tri_offset = 0;
-
- for (size_t i = 0; i < scene->objects.size(); i++) {
- const Object *object = scene->objects[i];
- const Geometry *geom = object->geometry;
- if (object->name == b_object.name() && geom->type == Geometry::MESH) {
- const Mesh *mesh = static_cast<const Mesh *>(geom);
- object_index = i;
- tri_offset = mesh->prim_offset;
- break;
- }
- }
-
- /* Object might have been disabled for rendering or excluded in some
- * other way, in that case Blender will report a warning afterwards. */
- if (object_index != OBJECT_NONE) {
- int object = object_index;
-
- bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
- populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
- }
-
- /* set number of samples */
- session->tile_manager.set_samples(session_params.samples);
- session->reset(buffer_params, session_params.samples);
- session->update_scene();
session->progress.set_update_callback(
function_bind(&BlenderSession::update_bake_progress, this));
}
/* Perform bake. Check cancel to avoid crash with incomplete scene data. */
- if (!session->progress.get_cancel() && bake_data) {
- scene->bake_manager->bake(scene->device,
- &scene->dscene,
- scene,
- session->progress,
- shader_type,
- bake_pass_filter,
- bake_data,
- result);
+ if (object_found && !session->progress.get_cancel()) {
+ session->start();
+ session->wait();
}
- /* free all memory used (host and device), so we wouldn't leave render
- * engine with extra memory allocated
- */
-
- session->device_free();
-
- delete sync;
- sync = NULL;
+ session->read_bake_tile_cb = function_null;
+ session->write_render_tile_cb = function_null;
}
void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 3e6498bb655..34e952e312b 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -66,14 +66,12 @@ class BlenderSession {
BL::Object &b_object,
const string &pass_type,
const int custom_flag,
- const int object_id,
- BL::BakePixel &pixel_array,
- const size_t num_pixels,
- const int depth,
- float pixels[]);
+ const int bake_width,
+ const int bake_height);
void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
+ void read_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
@@ -155,7 +153,10 @@ class BlenderSession {
void do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
- void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
+ void do_write_update_render_tile(RenderTile &rtile,
+ bool do_update_only,
+ bool do_read_only,
+ bool highlight);
void builtin_images_load();
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index e8031be7dd1..f16305e737d 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -481,6 +481,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("AO", PASS_AO);
MAP_PASS("Shadow", PASS_SHADOW);
+ MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
+ MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
+
#ifdef __KERNEL_DEBUG__
MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 3e397da895b..3f23f0fe4c5 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -223,7 +223,7 @@ class CUDADevice : public Device {
CUdeviceptr d_wtile,
CUstream stream = 0);
- void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+ void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
void film_convert(DeviceTask &task,
device_ptr buffer,
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..acf53c3eb1b 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
cuMemGetInfo(&free_before, &total);
/* Get kernel function. */
- CUfunction cuPathTrace;
+ CUfunction cuRender;
- if (requested_features.use_integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (requested_features.use_baking) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
/* Launch kernel, using just 1 block appears sufficient to reserve
* memory for all multiprocessors. It would be good to do this in
@@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
void *args[] = {&d_work_tiles, &total_work_size};
- cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
0));
}
-void CUDADevice::path_trace(DeviceTask &task,
- RenderTile &rtile,
- device_vector<WorkTile> &work_tiles)
+void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
{
scoped_timer timer(&rtile.buffers->render_time);
@@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task,
return;
CUDAContextScope scope(this);
- CUfunction cuPathTrace;
+ CUfunction cuRender;
/* Get kernel function. */
- if (task.integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (rtile.task == RenderTile::BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
if (have_error()) {
return;
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
work_tiles.alloc(1);
@@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task,
* remain conservative for GPUs connected to a display to avoid driver
* timeouts and display freezing. */
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
if (!info.display_device) {
min_blocks *= 8;
}
@@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task,
void *args[] = {&d_work_tiles, &total_work_size};
cuda_assert(
- cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
uint filter_sample = sample + wtile->num_samples - 1;
@@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task)
CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
/* get kernel function */
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
}
else {
@@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task)
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- path_trace(*task, tile, work_tiles);
+ render(*task, tile, work_tiles);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(*task, tile, work_tiles);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c701c14318f..fc6febd8cee 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -188,6 +188,7 @@ class CPUDevice : public Device {
convert_to_byte_kernel;
KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
shader_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
@@ -270,6 +271,7 @@ class CPUDevice : public Device {
REGISTER_KERNEL(convert_to_half_float),
REGISTER_KERNEL(convert_to_byte),
REGISTER_KERNEL(shader),
+ REGISTER_KERNEL(bake),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
REGISTER_KERNEL(filter_write_feature),
@@ -895,7 +897,7 @@ class CPUDevice : public Device {
}
}
- void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+ void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@@ -919,12 +921,21 @@ class CPUDevice : public Device {
break;
}
- for (int y = tile.y; y < tile.y + tile.h; y++) {
- for (int x = tile.x; x < tile.x + tile.w; x++) {
- if (use_coverage) {
- coverage.init_pixel(x, y);
+ if (tile.task == RenderTile::PATH_TRACE) {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ if (use_coverage) {
+ coverage.init_pixel(x, y);
+ }
+ path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+ }
+ else {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
- path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
@@ -1019,9 +1030,12 @@ class CPUDevice : public Device {
split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
}
else {
- path_trace(task, tile, kg);
+ render(task, tile, kg);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(task, tile, kg);
+ }
else if (tile.task == RenderTile::DENOISE) {
denoise(denoising, tile);
task.update_progress(&tile, tile.w * tile.h);
diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h
index d6f4fb43061..389268e1c2a 100644
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -451,6 +451,7 @@ class OpenCLDevice : public Device {
device_ptr rgba_half);
void shader(DeviceTask &task);
void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
+ void bake(DeviceTask &task, RenderTile &tile);
void denoise(RenderTile &tile, DenoisingTask &denoising);
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 2766f85d17c..beb3174b111 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -1367,6 +1367,9 @@ void OpenCLDevice::thread_run(DeviceTask *task)
*/
clFinish(cqCommandQueue);
}
+ else if (tile.task == RenderTile::BAKE) {
+ bake(*task, tile);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
@@ -1858,10 +1861,7 @@ void OpenCLDevice::shader(DeviceTask &task)
cl_int d_offset = task.offset;
OpenCLDevice::OpenCLProgram *program = &background_program;
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- program = &bake_program;
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
program = &displace_program;
}
program->wait_for_availability();
@@ -1892,6 +1892,51 @@ void OpenCLDevice::shader(DeviceTask &task)
}
}
+void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
+{
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ /* Cast arguments to cl types. */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+ cl_int d_x = rtile.x;
+ cl_int d_y = rtile.y;
+ cl_int d_w = rtile.w;
+ cl_int d_h = rtile.h;
+ cl_int d_offset = rtile.offset;
+ cl_int d_stride = rtile.stride;
+
+ bake_program.wait_for_availability();
+ cl_kernel kernel = bake_program();
+
+ cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
+
+ set_kernel_arg_buffers(kernel, &start_arg_index);
+
+ start_arg_index += kernel_set_args(
+ kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
+
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+
+ kernel_set_args(kernel, start_arg_index, sample);
+
+ enqueue_kernel(kernel, d_w, d_h);
+
+ rtile.sample = sample + 1;
+
+ task.update_progress(&rtile, rtile.w * rtile.h);
+ }
+
+ clFinish(cqCommandQueue);
+}
+
string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index f1fc697553a..2709a9da734 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -18,38 +18,40 @@ CCL_NAMESPACE_BEGIN
#ifdef __BAKING__
-ccl_device_inline void compute_light_pass(
+ccl_device_noinline void compute_light_pass(
KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
{
kernel_assert(kernel_data.film.use_light_pass);
- PathRadiance L_sample;
- PathState state;
- Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- /* emission and indirect shader data memory used by various functions */
- ShaderData emission_sd, indirect_sd;
-
- ray.P = sd->P + sd->Ng;
- ray.D = -sd->Ng;
- ray.t = FLT_MAX;
-# ifdef __CAMERA_MOTION__
- ray.time = 0.5f;
-# endif
+ /* Emission and indirect shader data memory used by various functions. */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderData indirect_sd;
- /* init radiance */
- path_radiance_init(kg, &L_sample);
+ /* Init radiance. */
+ path_radiance_init(kg, L);
- /* init path state */
- path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
+ /* Init path state. */
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL);
- /* evaluate surface shader */
+ /* Evaluate surface shader. */
shader_eval_surface(kg, sd, &state, NULL, state.flag);
/* TODO, disable more closures we don't need besides transparent */
shader_bsdf_disable_transparency(kg, sd);
+ /* Init ray. */
+ Ray ray;
+ ray.P = sd->P + sd->Ng;
+ ray.D = -sd->Ng;
+ ray.t = FLT_MAX;
+# ifdef __CAMERA_MOTION__
+ ray.time = 0.5f;
+# endif
+
# ifdef __BRANCHED_PATH__
if (!kernel_data.integrator.branched) {
/* regular path tracer */
@@ -57,14 +59,13 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
- kernel_path_ao(
- kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
+ kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd));
}
/* sample emission */
if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
+ path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
bool is_sss_sample = false;
@@ -77,12 +78,10 @@ ccl_device_inline void compute_light_pass(
SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect);
if (kernel_path_subsurface_scatter(
- kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
+ kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) {
while (ss_indirect.num_rays) {
- kernel_path_subsurface_setup_indirect(
- kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
- kernel_path_indirect(
- kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput);
+ kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
}
is_sss_sample = true;
}
@@ -91,18 +90,18 @@ ccl_device_inline void compute_light_pass(
/* sample light and BSDF */
if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
- kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
+ kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L);
- if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
+ if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) {
# ifdef __LAMP_MIS__
state.ray_t = 0.0f;
# endif
/* compute indirect light */
- kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+ kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
/* sum and reset indirect light pass variables for the next samples */
- path_radiance_sum_indirect(&L_sample);
- path_radiance_reset_indirect(&L_sample);
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
}
}
# ifdef __BRANCHED_PATH__
@@ -112,13 +111,13 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
- kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
+ kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput);
}
/* sample emission */
if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
- path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
+ path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
# ifdef __SUBSURFACE__
@@ -127,7 +126,7 @@ ccl_device_inline void compute_light_pass(
/* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
kernel_branched_path_subsurface_scatter(
- kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
+ kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
}
# endif
@@ -138,19 +137,16 @@ ccl_device_inline void compute_light_pass(
if (kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(
- kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
+ kg, sd, emission_sd, &state, throughput, 1.0f, L, all);
}
# endif
/* indirect light */
kernel_branched_path_surface_indirect_light(
- kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
+ kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L);
}
}
# endif
-
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample);
}
/* this helps with AA but it's not the real solution as it does not AA the geometry
@@ -225,41 +221,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
return out;
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
- ccl_global uint4 *input,
- ccl_global float4 *output,
- ShaderEvalType type,
- int pass_filter,
- int i,
- int offset,
- int sample)
+ccl_device void kernel_bake_evaluate(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
{
- ShaderData sd;
- PathState state = {0};
- uint4 in = input[i * 2];
- uint4 diff = input[i * 2 + 1];
-
- float3 out = make_float3(0.0f, 0.0f, 0.0f);
+ /* Setup render buffers. */
+ const int index = offset + x + y * stride;
+ const int pass_stride = kernel_data.film.pass_stride;
+ buffer += index * pass_stride;
- int object = in.x;
- int prim = in.y;
+ ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
+ ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
+ ccl_global float *output = buffer + kernel_data.film.pass_combined;
+ int prim = __float_as_uint(primitive[1]);
if (prim == -1)
return;
- float u = __uint_as_float(in.z);
- float v = __uint_as_float(in.w);
-
- float dudx = __uint_as_float(diff.x);
- float dudy = __uint_as_float(diff.y);
- float dvdx = __uint_as_float(diff.z);
- float dvdy = __uint_as_float(diff.w);
+ prim += kernel_data.bake.tri_offset;
+ /* Random number generator. */
+ uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed;
int num_samples = kernel_data.integrator.aa_samples;
- /* random number generator */
- uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
-
float filter_x, filter_y;
if (sample == 0) {
filter_x = filter_y = 0.5f;
@@ -268,23 +251,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
}
- /* subpixel u/v offset */
+ /* Barycentric UV with subpixel offset. */
+ float u = primitive[2];
+ float v = primitive[3];
+
+ float dudx = differential[0];
+ float dudy = differential[1];
+ float dvdx = differential[2];
+ float dvdy = differential[3];
+
if (sample > 0) {
u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
1.0f - u);
}
- /* triangle */
+ /* Shader data setup. */
+ int object = kernel_data.bake.object_index;
int shader;
float3 P, Ng;
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
- /* light passes */
- PathRadiance L;
- path_radiance_init(kg, &L);
-
+ ShaderData sd;
shader_setup_from_sample(
kg,
&sd,
@@ -302,7 +291,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
LAMP_NONE);
sd.I = sd.N;
- /* update differentials */
+ /* Setup differentials. */
sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
sd.du.dx = dudx;
@@ -310,17 +299,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
- /* set RNG state for shaders that use sampling */
+ /* Set RNG state for shaders that use sampling. */
+ PathState state = {0};
state.rng_hash = rng_hash;
state.rng_offset = 0;
state.sample = sample;
state.num_samples = num_samples;
state.min_ray_pdf = FLT_MAX;
- /* light passes if we need more than color */
- if (pass_filter & ~BAKE_FILTER_COLOR)
+ /* Light passes if we need more than color. */
+ PathRadiance L;
+ int pass_filter = kernel_data.bake.pass_filter;
+
+ if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR)
compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
+ float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+ ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type;
switch (type) {
/* data passes */
case SHADER_EVAL_NORMAL:
@@ -441,10 +437,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
}
/* write output */
- const float output_fac = 1.0f / num_samples;
- const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-
- output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
+ const float4 result = make_float4(out.x, out.y, out.z, 1.0f);
+ kernel_write_pass_float4(output, result);
}
#endif /* __BAKING__ */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index a1f8c35348d..304835a1685 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -395,6 +395,10 @@ typedef enum PassType {
PASS_VOLUME_INDIRECT,
/* No Scatter color since it's tricky to define what it would even mean. */
PASS_CATEGORY_LIGHT_END = 63,
+
+ PASS_BAKE_PRIMITIVE,
+ PASS_BAKE_DIFFERENTIAL,
+ PASS_CATEGORY_BAKE_END = 95
} PassType;
#define PASS_ANY (~0)
@@ -1248,6 +1252,10 @@ typedef struct KernelFilm {
float4 xyz_to_b;
float4 rgb_to_y;
+ int pass_bake_primitive;
+ int pass_bake_differential;
+ int pad;
+
#ifdef __KERNEL_DEBUG__
int pass_bvh_traversed_nodes;
int pass_bvh_traversed_instances;
@@ -1427,6 +1435,14 @@ typedef struct KernelTables {
} KernelTables;
static_assert_align(KernelTables, 16);
+typedef struct KernelBake {
+ int object_index;
+ int tri_offset;
+ int type;
+ int pass_filter;
+} KernelBake;
+static_assert_align(KernelBake, 16);
+
typedef struct KernelData {
KernelCamera cam;
KernelFilm film;
@@ -1435,6 +1451,7 @@ typedef struct KernelData {
KernelBVH bvh;
KernelCurves curve;
KernelTables tables;
+ KernelBake bake;
} KernelData;
static_assert_align(KernelData, 16);
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 683f4b88d79..ea3103f12c3 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample);
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
+
/* Split kernels */
void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 091e53cfd83..5aa3fb14318 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -132,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
# endif /* KERNEL_STUB */
}
+/* Bake */
+
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
+{
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, bake);
+# else
+ kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
+# endif /* KERNEL_STUB */
+}
+
/* Shader Evaluate */
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
@@ -146,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
# ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, shader);
# else
- if (type >= SHADER_EVAL_BAKE) {
-# ifdef __BAKING__
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
-# endif
- }
- else if (type == SHADER_EVAL_DISPLACE) {
+ if (type == SHADER_EVAL_DISPLACE) {
kernel_displace_evaluate(kg, input, output, i);
}
else {
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index c4c810c6a82..d4f41132a11 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -214,13 +214,16 @@ kernel_cuda_background(uint4 *input,
#ifdef __BAKING__
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample)
+kernel_cuda_bake(WorkTile *tile, uint total_work_size)
{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+ int work_index = ccl_global_id(0);
+
+ if(work_index < total_work_size) {
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- if(x < sx + sw) {
KernelGlobals kg;
- kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
+ kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#endif
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 35f942b3e9b..6044182a51a 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -15,6 +15,7 @@
*/
#include "render/bake.h"
+#include "render/buffers.h"
#include "render/integrator.h"
#include "render/mesh.h"
#include "render/object.h"
@@ -24,272 +25,130 @@
CCL_NAMESPACE_BEGIN
-BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels)
- : m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels)
+static int aa_samples(Scene *scene, Object *object, ShaderEvalType type)
{
- m_primitive.resize(num_pixels);
- m_u.resize(num_pixels);
- m_v.resize(num_pixels);
- m_dudx.resize(num_pixels);
- m_dudy.resize(num_pixels);
- m_dvdx.resize(num_pixels);
- m_dvdy.resize(num_pixels);
-}
-
-BakeData::~BakeData()
-{
- m_primitive.clear();
- m_u.clear();
- m_v.clear();
- m_dudx.clear();
- m_dudy.clear();
- m_dvdx.clear();
- m_dvdy.clear();
-}
-
-void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy)
-{
- m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim);
- m_u[i] = uv[0];
- m_v[i] = uv[1];
- m_dudx[i] = dudx;
- m_dudy[i] = dudy;
- m_dvdx[i] = dvdx;
- m_dvdy[i] = dvdy;
-}
-
-void BakeData::set_null(int i)
-{
- m_primitive[i] = -1;
-}
-
-int BakeData::object()
-{
- return m_object;
-}
-
-size_t BakeData::size()
-{
- return m_num_pixels;
-}
+ if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
+ return 1;
+ }
+ else if (type == SHADER_EVAL_NORMAL) {
+ /* Only antialias normal if mesh has bump mapping. */
+ if (object->geometry) {
+ foreach (Shader *shader, object->geometry->used_shaders) {
+ if (shader->has_bump) {
+ return scene->integrator->aa_samples;
+ }
+ }
+ }
-bool BakeData::is_valid(int i)
-{
- return m_primitive[i] != -1;
+ return 1;
+ }
+ else {
+ return scene->integrator->aa_samples;
+ }
}
-uint4 BakeData::data(int i)
+/* Keep it synced with kernel_bake.h logic */
+static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter)
{
- return make_uint4(m_object, m_primitive[i], __float_as_int(m_u[i]), __float_as_int(m_v[i]));
-}
+ const int component_flags = pass_filter &
+ (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-uint4 BakeData::differentials(int i)
-{
- return make_uint4(__float_as_int(m_dudx[i]),
- __float_as_int(m_dudy[i]),
- __float_as_int(m_dvdx[i]),
- __float_as_int(m_dvdy[i]));
+ switch (type) {
+ case SHADER_EVAL_AO:
+ return BAKE_FILTER_AO;
+ case SHADER_EVAL_SHADOW:
+ return BAKE_FILTER_DIRECT;
+ case SHADER_EVAL_DIFFUSE:
+ return BAKE_FILTER_DIFFUSE | component_flags;
+ case SHADER_EVAL_GLOSSY:
+ return BAKE_FILTER_GLOSSY | component_flags;
+ case SHADER_EVAL_TRANSMISSION:
+ return BAKE_FILTER_TRANSMISSION | component_flags;
+ case SHADER_EVAL_COMBINED:
+ return pass_filter;
+ default:
+ return 0;
+ }
}
BakeManager::BakeManager()
{
- m_bake_data = NULL;
- m_is_baking = false;
+ type = SHADER_EVAL_BAKE;
+ pass_filter = 0;
+
need_update = true;
- m_shader_limit = 512 * 512;
}
BakeManager::~BakeManager()
{
- if (m_bake_data)
- delete m_bake_data;
}
bool BakeManager::get_baking()
{
- return m_is_baking;
-}
-
-void BakeManager::set_baking(const bool value)
-{
- m_is_baking = value;
+ return !object_name.empty();
}
-BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels)
+void BakeManager::set(Scene *scene,
+ const std::string &object_name_,
+ ShaderEvalType type_,
+ int pass_filter_)
{
- m_bake_data = new BakeData(object, tri_offset, num_pixels);
- return m_bake_data;
-}
-
-void BakeManager::set_shader_limit(const size_t x, const size_t y)
-{
- m_shader_limit = x * y;
- m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2)));
-}
+ object_name = object_name_;
+ type = type_;
+ pass_filter = shader_type_to_pass_filter(type_, pass_filter_);
-bool BakeManager::bake(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress,
- ShaderEvalType shader_type,
- const int pass_filter,
- BakeData *bake_data,
- float result[])
-{
- size_t num_pixels = bake_data->size();
-
- int num_samples = aa_samples(scene, bake_data, shader_type);
+ Pass::add(PASS_BAKE_PRIMITIVE, scene->film->passes);
+ Pass::add(PASS_BAKE_DIFFERENTIAL, scene->film->passes);
- /* calculate the total pixel samples for the progress bar */
- total_pixel_samples = 0;
- for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
- size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
- total_pixel_samples += shader_size * num_samples;
+ if (type == SHADER_EVAL_UV) {
+ /* force UV to be available */
+ Pass::add(PASS_UV, scene->film->passes);
}
- progress.reset_sample();
- progress.set_total_pixel_samples(total_pixel_samples);
-
- /* needs to be up to date for baking specific AA samples */
- dscene->data.integrator.aa_samples = num_samples;
- device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
-
- for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
- size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
- /* setup input for device task */
- device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY);
- uint4 *d_input_data = d_input.alloc(shader_size * 2);
- size_t d_input_size = 0;
-
- for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
- d_input_data[d_input_size++] = bake_data->data(i);
- d_input_data[d_input_size++] = bake_data->differentials(i);
- }
-
- if (d_input_size == 0) {
- m_is_baking = false;
- return false;
- }
-
- /* run device task */
- device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE);
- d_output.alloc(shader_size);
- d_output.zero_to_device();
- d_input.copy_to_device();
-
- DeviceTask task(DeviceTask::SHADER);
- task.shader_input = d_input.device_pointer;
- task.shader_output = d_output.device_pointer;
- task.shader_eval_type = shader_type;
- task.shader_filter = pass_filter;
- task.shader_x = 0;
- task.offset = shader_offset;
- task.shader_w = d_output.size();
- task.num_samples = num_samples;
- task.get_cancel = function_bind(&Progress::get_cancel, &progress);
- task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
-
- device->task_add(task);
- device->task_wait();
-
- if (progress.get_cancel()) {
- d_input.free();
- d_output.free();
- m_is_baking = false;
- return false;
- }
-
- d_output.copy_from_device(0, 1, d_output.size());
- d_input.free();
-
- /* read result */
- int k = 0;
-
- float4 *offset = d_output.data();
-
- size_t depth = 4;
- for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
- size_t index = i * depth;
- float4 out = offset[k++];
-
- if (bake_data->is_valid(i)) {
- for (size_t j = 0; j < 4; j++) {
- result[index + j] = out[j];
- }
- }
- }
-
- d_output.free();
+ /* force use_light_pass to be true if we bake more than just colors */
+ if (pass_filter & ~BAKE_FILTER_COLOR) {
+ Pass::add(PASS_LIGHT, scene->film->passes);
}
- m_is_baking = false;
- return true;
+ /* create device and update scene */
+ scene->film->tag_update(scene);
+ scene->integrator->tag_update(scene);
+
+ need_update = true;
}
void BakeManager::device_update(Device * /*device*/,
- DeviceScene * /*dscene*/,
- Scene * /*scene*/,
- Progress &progress)
+ DeviceScene *dscene,
+ Scene *scene,
+ Progress & /* progress */)
{
if (!need_update)
return;
- if (progress.get_cancel())
- return;
+ KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBake *kbake = &dscene->data.bake;
- need_update = false;
-}
-
-void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
-{
-}
-
-int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type)
-{
- if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
- return 1;
- }
- else if (type == SHADER_EVAL_NORMAL) {
- /* Only antialias normal if mesh has bump mapping. */
- Object *object = scene->objects[bake_data->object()];
+ kbake->type = type;
+ kbake->pass_filter = pass_filter;
- if (object->geometry) {
- foreach (Shader *shader, object->geometry->used_shaders) {
- if (shader->has_bump) {
- return scene->integrator->aa_samples;
- }
- }
+ int object_index = 0;
+ foreach (Object *object, scene->objects) {
+ const Geometry *geom = object->geometry;
+ if (object->name == object_name && geom->type == Geometry::MESH) {
+ kbake->object_index = object_index;
+ kbake->tri_offset = geom->prim_offset;
+ kintegrator->aa_samples = aa_samples(scene, object, type);
+ break;
}
- return 1;
- }
- else {
- return scene->integrator->aa_samples;
+ object_index++;
}
+
+ need_update = false;
}
-/* Keep it synced with kernel_bake.h logic */
-int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter)
+void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
{
- const int component_flags = pass_filter &
- (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-
- switch (type) {
- case SHADER_EVAL_AO:
- return BAKE_FILTER_AO;
- case SHADER_EVAL_SHADOW:
- return BAKE_FILTER_DIRECT;
- case SHADER_EVAL_DIFFUSE:
- return BAKE_FILTER_DIFFUSE | component_flags;
- case SHADER_EVAL_GLOSSY:
- return BAKE_FILTER_GLOSSY | component_flags;
- case SHADER_EVAL_TRANSMISSION:
- return BAKE_FILTER_TRANSMISSION | component_flags;
- case SHADER_EVAL_COMBINED:
- return pass_filter;
- default:
- return 0;
- }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 88537623efb..93e664c2ab1 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -25,67 +25,23 @@
CCL_NAMESPACE_BEGIN
-class BakeData {
- public:
- BakeData(const int object, const size_t tri_offset, const size_t num_pixels);
- ~BakeData();
-
- void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy);
- void set_null(int i);
- int object();
- size_t size();
- uint4 data(int i);
- uint4 differentials(int i);
- bool is_valid(int i);
-
- private:
- int m_object;
- size_t m_tri_offset;
- size_t m_num_pixels;
- vector<int> m_primitive;
- vector<float> m_u;
- vector<float> m_v;
- vector<float> m_dudx;
- vector<float> m_dudy;
- vector<float> m_dvdx;
- vector<float> m_dvdy;
-};
-
class BakeManager {
public:
BakeManager();
~BakeManager();
+ void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter);
bool get_baking();
- void set_baking(const bool value);
-
- BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels);
-
- void set_shader_limit(const size_t x, const size_t y);
-
- bool bake(Device *device,
- DeviceScene *dscene,
- Scene *scene,
- Progress &progress,
- ShaderEvalType shader_type,
- const int pass_filter,
- BakeData *bake_data,
- float result[]);
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_free(Device *device, DeviceScene *dscene);
- static int shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter);
- static int aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type);
-
bool need_update;
- size_t total_pixel_samples;
-
private:
- BakeData *m_bake_data;
- bool m_is_baking;
- size_t m_shader_limit;
+ ShaderEvalType type;
+ int pass_filter;
+ std::string object_name;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 2d89fb9ffba..b26366af852 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -459,6 +459,40 @@ bool RenderBuffers::get_pass_rect(
return false;
}
+bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels)
+{
+ if (buffer.data() == NULL) {
+ return false;
+ }
+
+ int pass_offset = 0;
+
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+
+ if (pass.type != type) {
+ pass_offset += pass.components;
+ continue;
+ }
+
+ float *out = buffer.data() + pass_offset;
+ int pass_stride = params.get_passes_size();
+ int size = params.width * params.height;
+
+ assert(pass.components == components);
+
+ for (int i = 0; i < size; i++, out += pass_stride, pixels += components) {
+ for (int j = 0; j < components; j++) {
+ out[j] = pixels[j];
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
/* Display Buffer */
DisplayBuffer::DisplayBuffer(Device *device, bool linear)
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 42efb031843..975bae2239c 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -92,6 +92,7 @@ class RenderBuffers {
const string &name, float exposure, int sample, int components, float *pixels);
bool get_denoising_pass_rect(
int offset, float exposure, int sample, int components, float *pixels);
+ bool set_pass_rect(PassType type, int components, float *pixels);
};
/* Display Buffer
@@ -130,7 +131,7 @@ class DisplayBuffer {
class RenderTile {
public:
- typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
+ typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task;
Task task;
int x, y, w, h;
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index 26eda93fadd..d7cbf4a3581 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -196,6 +196,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_AOV_VALUE:
pass.components = 1;
break;
+ case PASS_BAKE_PRIMITIVE:
+ case PASS_BAKE_DIFFERENTIAL:
+ pass.components = 4;
+ break;
default:
assert(false);
break;
@@ -386,11 +390,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
if (pass.type <= PASS_CATEGORY_MAIN_END) {
kfilm->pass_flag |= pass_flag;
}
- else {
- assert(pass.type <= PASS_CATEGORY_LIGHT_END);
+ else if (pass.type <= PASS_CATEGORY_LIGHT_END) {
kfilm->use_light_pass = 1;
kfilm->light_pass_flag |= pass_flag;
}
+ else {
+ assert(pass.type <= PASS_CATEGORY_BAKE_END);
+ }
switch (pass.type) {
case PASS_COMBINED:
@@ -471,6 +477,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_volume_direct = kfilm->pass_stride;
break;
+ case PASS_BAKE_PRIMITIVE:
+ kfilm->pass_bake_primitive = kfilm->pass_stride;
+ break;
+ case PASS_BAKE_DIFFERENTIAL:
+ kfilm->pass_bake_differential = kfilm->pass_stride;
+ break;
+
#ifdef WITH_CYCLES_DEBUG
case PASS_BVH_TRAVERSED_NODES:
kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index f7df81a0601..361a1465aac 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -410,7 +410,16 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
- rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+
+ if (tile->state == Tile::DENOISE) {
+ rtile.task = RenderTile::DENOISE;
+ }
+ else if (read_bake_tile_cb) {
+ rtile.task = RenderTile::BAKE;
+ }
+ else {
+ rtile.task = RenderTile::PATH_TRACE;
+ }
tile_lock.unlock();
@@ -451,11 +460,20 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
rtile.buffers = tile->buffers;
rtile.sample = tile_manager.state.sample;
- /* this will tag tile as IN PROGRESS in blender-side render pipeline,
- * which is needed to highlight currently rendering tile before first
- * sample was processed for it
- */
- update_tile_sample(rtile);
+ if (read_bake_tile_cb) {
+ /* This will read any passes needed as input for baking. */
+ {
+ thread_scoped_lock tile_lock(tile_mutex);
+ read_bake_tile_cb(rtile);
+ }
+ rtile.buffers->buffer.copy_to_device();
+ }
+ else {
+ /* This will tag tile as IN PROGRESS in blender-side render pipeline,
+ * which is needed to highlight currently rendering tile before first
+ * sample was processed for it. */
+ update_tile_sample(rtile);
+ }
return true;
}
@@ -484,6 +502,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
bool delete_tile;
if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) {
+ /* Finished tile pixels write. */
if (write_render_tile_cb && params.progressive_refine == false) {
write_render_tile_cb(rtile);
}
@@ -494,6 +513,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
}
}
else {
+ /* In progress tile pixels update. */
if (update_render_tile_cb && params.progressive_refine == false) {
update_render_tile_cb(rtile, false);
}
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index f06952e8020..2707eed5531 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -148,6 +148,7 @@ class Session {
function<void(RenderTile &)> write_render_tile_cb;
function<void(RenderTile &, bool)> update_render_tile_cb;
+ function<void(RenderTile &)> read_bake_tile_cb;
explicit Session(const SessionParams &params);
~Session();