diff options
Diffstat (limited to 'intern')
23 files changed, 456 insertions, 546 deletions
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py index 3d2a52d0cf6..3ab352e52a2 100644 --- a/intern/cycles/blender/addon/__init__.py +++ b/intern/cycles/blender/addon/__init__.py @@ -82,8 +82,8 @@ class CyclesRender(bpy.types.RenderEngine): def render(self, depsgraph): engine.render(self, depsgraph) - def bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result): - engine.bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result) + def bake(self, depsgraph, obj, pass_type, pass_filter, width, height): + engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height) # viewport render def view_update(self, context, depsgraph): diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index a1b063430f5..e7ea5e7a1f6 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -168,11 +168,11 @@ def render(engine, depsgraph): _cycles.render(engine.session, depsgraph.as_pointer()) -def bake(engine, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result): +def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height): import _cycles session = getattr(engine, "session", None) if session is not None: - _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, object_id, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer()) + _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, width, height) def reset(engine, data, depsgraph): diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 8c7c0bc1daa..79c16856462 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -298,22 +298,18 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args) static PyObject *bake_func(PyObject * /*self*/, PyObject *args) { PyObject *pysession, *pydepsgraph, *pyobject; - PyObject *pypixel_array, *pyresult; const char *pass_type; - int num_pixels, depth, object_id, pass_filter; + int pass_filter, width, height; if (!PyArg_ParseTuple(args, - "OOOsiiOiiO", + "OOOsiii", &pysession, &pydepsgraph, &pyobject, &pass_type, &pass_filter, - &object_id, - &pypixel_array, - &num_pixels, - &depth, - &pyresult)) + &width, + &height)) return NULL; BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession); @@ -326,23 +322,9 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args) RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr); BL::Object b_object(objectptr); - void *b_result = PyLong_AsVoidPtr(pyresult); - - PointerRNA bakepixelptr; - RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr); - BL::BakePixel b_bake_pixel(bakepixelptr); - python_thread_state_save(&session->python_thread_state); - session->bake(b_depsgraph, - b_object, - pass_type, - pass_filter, - object_id, - b_bake_pixel, - (size_t)num_pixels, - depth, - (float *)b_result); + session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height); python_thread_state_restore(&session->python_thread_state); diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 5ea96d6bdfd..31b09695632 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -247,9 +247,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg void BlenderSession::free_session() { - if (sync) - delete sync; - + delete sync; delete session; } @@ -317,6 +315,7 @@ static void end_render_result(BL::RenderEngine &b_engine, void BlenderSession::do_write_update_render_tile(RenderTile &rtile, bool do_update_only, + bool do_read_only, bool highlight) { int x = rtile.x - session->tile_manager.params.full_x; @@ -342,7 +341,23 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile, BL::RenderLayer b_rlay = *b_single_rlay; - if (do_update_only) { + if (do_read_only) { + /* copy each pass */ + BL::RenderLayer::passes_iterator b_iter; + + for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) { + BL::RenderPass b_pass(*b_iter); + + /* find matching pass type */ + PassType pass_type = BlenderSync::get_pass_type(b_pass); + int components = b_pass.channels(); + + rtile.buffers->set_pass_rect(pass_type, components, (float *)b_pass.rect()); + } + + end_render_result(b_engine, b_rr, false, false, false); + } + else if (do_update_only) { /* Sample would be zero at initial tile update, which is only needed * to tag tile form blender side as IN PROGRESS for proper highlight * no buffers should be sent to blender yet. For denoise we also @@ -362,9 +377,14 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile, } } +void BlenderSession::read_render_tile(RenderTile &rtile) +{ + do_write_update_render_tile(rtile, false, true, false); +} + void BlenderSession::write_render_tile(RenderTile &rtile) { - do_write_update_render_tile(rtile, false, false); + do_write_update_render_tile(rtile, false, false, false); } void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight) @@ -374,9 +394,9 @@ void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight) * would need to be investigated a bit further, but for now shall be fine */ if (!b_engine.is_preview()) - do_write_update_render_tile(rtile, true, highlight); + do_write_update_render_tile(rtile, true, false, highlight); else - do_write_update_render_tile(rtile, false, false); + do_write_update_render_tile(rtile, false, false, false); } static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest) @@ -593,25 +613,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) #endif } -static void populate_bake_data(BakeData *data, - const int object_id, - BL::BakePixel &pixel_array, - const int num_pixels) -{ - BL::BakePixel bp = pixel_array; - - int i; - for (i = 0; i < num_pixels; i++) { - if (bp.object_id() == object_id) { - data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy()); - } - else { - data->set_null(i); - } - bp = bp.next(); - } -} - static int bake_pass_filter_get(const int pass_filter) { int flag = BAKE_FILTER_NONE; @@ -642,43 +643,26 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, BL::Object &b_object, const string &pass_type, const int pass_filter, - const int object_id, - BL::BakePixel &pixel_array, - const size_t num_pixels, - const int /*depth*/, - float result[]) + const int bake_width, + const int bake_height) { b_depsgraph = b_depsgraph_; ShaderEvalType shader_type = get_shader_type(pass_type); - - /* Set baking flag in advance, so kernel loading can check if we need - * any baking capabilities. - */ - scene->bake_manager->set_baking(true); - - /* ensure kernels are loaded before we do any scene updates */ - session->load_kernels(); - - if (shader_type == SHADER_EVAL_UV) { - /* force UV to be available */ - Pass::add(PASS_UV, scene->film->passes); - } - int bake_pass_filter = bake_pass_filter_get(pass_filter); - bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter); - /* force use_light_pass to be true if we bake more than just colors */ - if (bake_pass_filter & ~BAKE_FILTER_COLOR) { - Pass::add(PASS_LIGHT, scene->film->passes); - } + /* Initialize bake manager, before we load the baking kernels. */ + scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter); - /* create device and update scene */ - scene->film->tag_update(scene); - scene->integrator->tag_update(scene); + /* Passes are identified by name, so in order to return the combined pass we need to set the + * name. */ + Pass::add(PASS_COMBINED, scene->film->passes, "Combined"); + + session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1); + session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1); if (!session->progress.get_cancel()) { - /* update scene */ + /* Sync scene. */ BL::Object b_camera_override(b_engine.camera_override()); sync->sync_camera(b_render, b_camera_override, width, height, ""); sync->sync_data( @@ -686,75 +670,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, builtin_images_load(); } - BakeData *bake_data = NULL; + /* Object might have been disabled for rendering or excluded in some + * other way, in that case Blender will report a warning afterwards. */ + bool object_found = false; + foreach (Object *ob, scene->objects) { + if (ob->name == b_object.name()) { + object_found = true; + break; + } + } - if (!session->progress.get_cancel()) { - /* get buffer parameters */ + if (object_found && !session->progress.get_cancel()) { + /* Get session and buffer parameters. */ SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); - BufferParams buffer_params = BlenderSync::get_buffer_params( - b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); + session_params.progressive_refine = false; - scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y()); + BufferParams buffer_params; + buffer_params.width = bake_width; + buffer_params.height = bake_height; + buffer_params.passes = scene->film->passes; - /* set number of samples */ + /* Update session. */ session->tile_manager.set_samples(session_params.samples); session->reset(buffer_params, session_params.samples); - session->update_scene(); - - /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */ - size_t object_index = OBJECT_NONE; - int tri_offset = 0; - - for (size_t i = 0; i < scene->objects.size(); i++) { - const Object *object = scene->objects[i]; - const Geometry *geom = object->geometry; - if (object->name == b_object.name() && geom->type == Geometry::MESH) { - const Mesh *mesh = static_cast<const Mesh *>(geom); - object_index = i; - tri_offset = mesh->prim_offset; - break; - } - } - - /* Object might have been disabled for rendering or excluded in some - * other way, in that case Blender will report a warning afterwards. */ - if (object_index != OBJECT_NONE) { - int object = object_index; - - bake_data = scene->bake_manager->init(object, tri_offset, num_pixels); - populate_bake_data(bake_data, object_id, pixel_array, num_pixels); - } - - /* set number of samples */ - session->tile_manager.set_samples(session_params.samples); - session->reset(buffer_params, session_params.samples); - session->update_scene(); session->progress.set_update_callback( function_bind(&BlenderSession::update_bake_progress, this)); } /* Perform bake. Check cancel to avoid crash with incomplete scene data. */ - if (!session->progress.get_cancel() && bake_data) { - scene->bake_manager->bake(scene->device, - &scene->dscene, - scene, - session->progress, - shader_type, - bake_pass_filter, - bake_data, - result); + if (object_found && !session->progress.get_cancel()) { + session->start(); + session->wait(); } - /* free all memory used (host and device), so we wouldn't leave render - * engine with extra memory allocated - */ - - session->device_free(); - - delete sync; - sync = NULL; + session->read_bake_tile_cb = function_null; + session->write_render_tile_cb = function_null; } void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay, diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index 3e6498bb655..34e952e312b 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -66,14 +66,12 @@ class BlenderSession { BL::Object &b_object, const string &pass_type, const int custom_flag, - const int object_id, - BL::BakePixel &pixel_array, - const size_t num_pixels, - const int depth, - float pixels[]); + const int bake_width, + const int bake_height); void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile); void write_render_tile(RenderTile &rtile); + void read_render_tile(RenderTile &rtile); /* update functions are used to update display buffer only after sample was rendered * only needed for better visual feedback */ @@ -155,7 +153,10 @@ class BlenderSession { void do_write_update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile, bool do_update_only); - void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight); + void do_write_update_render_tile(RenderTile &rtile, + bool do_update_only, + bool do_read_only, + bool highlight); void builtin_images_load(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index e8031be7dd1..f16305e737d 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -481,6 +481,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass) MAP_PASS("AO", PASS_AO); MAP_PASS("Shadow", PASS_SHADOW); + MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE); + MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL); + #ifdef __KERNEL_DEBUG__ MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES); MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES); diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h index 3e397da895b..3f23f0fe4c5 100644 --- a/intern/cycles/device/cuda/device_cuda.h +++ b/intern/cycles/device/cuda/device_cuda.h @@ -223,7 +223,7 @@ class CUDADevice : public Device { CUdeviceptr d_wtile, CUstream stream = 0); - void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles); + void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles); void film_convert(DeviceTask &task, device_ptr buffer, diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp index ba5d479e0e7..acf53c3eb1b 100644 --- a/intern/cycles/device/cuda/device_cuda_impl.cpp +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f cuMemGetInfo(&free_before, &total); /* Get kernel function. */ - CUfunction cuPathTrace; + CUfunction cuRender; - if (requested_features.use_integrator_branched) { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + if (requested_features.use_baking) { + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake")); + } + else if (requested_features.use_integrator_branched) { + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace")); } else { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace")); } - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1)); int min_blocks, num_threads_per_block; - cuda_assert(cuOccupancyMaxPotentialBlockSize( - &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); + cuda_assert( + cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0)); /* Launch kernel, using just 1 block appears sufficient to reserve * memory for all multiprocessors. It would be good to do this in @@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f void *args[] = {&d_work_tiles, &total_work_size}; - cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); + cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); cuda_assert(cuCtxSynchronize()); @@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile, 0)); } -void CUDADevice::path_trace(DeviceTask &task, - RenderTile &rtile, - device_vector<WorkTile> &work_tiles) +void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles) { scoped_timer timer(&rtile.buffers->render_time); @@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task, return; CUDAContextScope scope(this); - CUfunction cuPathTrace; + CUfunction cuRender; /* Get kernel function. */ - if (task.integrator_branched) { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + if (rtile.task == RenderTile::BAKE) { + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake")); + } + else if (task.integrator_branched) { + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace")); } else { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace")); } if (have_error()) { return; } - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1)); /* Allocate work tile. */ work_tiles.alloc(1); @@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task, * remain conservative for GPUs connected to a display to avoid driver * timeouts and display freezing. */ int min_blocks, num_threads_per_block; - cuda_assert(cuOccupancyMaxPotentialBlockSize( - &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); + cuda_assert( + cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0)); if (!info.display_device) { min_blocks *= 8; } @@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task, void *args[] = {&d_work_tiles, &total_work_size}; cuda_assert( - cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); + cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); /* Run the adaptive sampling kernels at selected samples aligned to step samples. */ uint filter_sample = sample + wtile->num_samples - 1; @@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task) CUdeviceptr d_output = (CUdeviceptr)task.shader_output; /* get kernel function */ - if (task.shader_eval_type >= SHADER_EVAL_BAKE) { - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); - } - else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { + if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace")); } else { @@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task) split_kernel->path_trace(task, tile, void_buffer, void_buffer); } else { - path_trace(*task, tile, work_tiles); + render(*task, tile, work_tiles); } } + else if (tile.task == RenderTile::BAKE) { + render(*task, tile, work_tiles); + } else if (tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c701c14318f..fc6febd8cee 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -188,6 +188,7 @@ class CPUDevice : public Device { convert_to_byte_kernel; KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel; + KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel; KernelFunctions<void (*)( int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)> @@ -270,6 +271,7 @@ class CPUDevice : public Device { REGISTER_KERNEL(convert_to_half_float), REGISTER_KERNEL(convert_to_byte), REGISTER_KERNEL(shader), + REGISTER_KERNEL(bake), REGISTER_KERNEL(filter_divide_shadow), REGISTER_KERNEL(filter_get_feature), REGISTER_KERNEL(filter_write_feature), @@ -895,7 +897,7 @@ class CPUDevice : public Device { } } - void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) + void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; @@ -919,12 +921,21 @@ class CPUDevice : public Device { break; } - for (int y = tile.y; y < tile.y + tile.h; y++) { - for (int x = tile.x; x < tile.x + tile.w; x++) { - if (use_coverage) { - coverage.init_pixel(x, y); + if (tile.task == RenderTile::PATH_TRACE) { + for (int y = tile.y; y < tile.y + tile.h; y++) { + for (int x = tile.x; x < tile.x + tile.w; x++) { + if (use_coverage) { + coverage.init_pixel(x, y); + } + path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); + } + } + } + else { + for (int y = tile.y; y < tile.y + tile.h; y++) { + for (int x = tile.x; x < tile.x + tile.w; x++) { + bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } - path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; @@ -1019,9 +1030,12 @@ class CPUDevice : public Device { split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { - path_trace(task, tile, kg); + render(task, tile, kg); } } + else if (tile.task == RenderTile::BAKE) { + render(task, tile, kg); + } else if (tile.task == RenderTile::DENOISE) { denoise(denoising, tile); task.update_progress(&tile, tile.w * tile.h); diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h index d6f4fb43061..389268e1c2a 100644 --- a/intern/cycles/device/opencl/device_opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -451,6 +451,7 @@ class OpenCLDevice : public Device { device_ptr rgba_half); void shader(DeviceTask &task); void update_adaptive(DeviceTask &task, RenderTile &tile, int sample); + void bake(DeviceTask &task, RenderTile &tile); void denoise(RenderTile &tile, DenoisingTask &denoising); diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 2766f85d17c..beb3174b111 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -1367,6 +1367,9 @@ void OpenCLDevice::thread_run(DeviceTask *task) */ clFinish(cqCommandQueue); } + else if (tile.task == RenderTile::BAKE) { + bake(*task, tile); + } else if (tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising); @@ -1858,10 +1861,7 @@ void OpenCLDevice::shader(DeviceTask &task) cl_int d_offset = task.offset; OpenCLDevice::OpenCLProgram *program = &background_program; - if (task.shader_eval_type >= SHADER_EVAL_BAKE) { - program = &bake_program; - } - else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { + if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { program = &displace_program; } program->wait_for_availability(); @@ -1892,6 +1892,51 @@ void OpenCLDevice::shader(DeviceTask &task) } } +void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile) +{ + scoped_timer timer(&rtile.buffers->render_time); + + /* Cast arguments to cl types. */ + cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); + cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); + cl_int d_x = rtile.x; + cl_int d_y = rtile.y; + cl_int d_w = rtile.w; + cl_int d_h = rtile.h; + cl_int d_offset = rtile.offset; + cl_int d_stride = rtile.stride; + + bake_program.wait_for_availability(); + cl_kernel kernel = bake_program(); + + cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer); + + set_kernel_arg_buffers(kernel, &start_arg_index); + + start_arg_index += kernel_set_args( + kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride); + + int start_sample = rtile.start_sample; + int end_sample = rtile.start_sample + rtile.num_samples; + + for (int sample = start_sample; sample < end_sample; sample++) { + if (task.get_cancel()) { + if (task.need_finish_queue == false) + break; + } + + kernel_set_args(kernel, start_arg_index, sample); + + enqueue_kernel(kernel, d_w, d_h); + + rtile.sample = sample + 1; + + task.update_progress(&rtile, rtile.w * rtile.h); + } + + clFinish(cqCommandQueue); +} + string OpenCLDevice::kernel_build_options(const string *debug_src) { string build_options = "-cl-no-signed-zeros -cl-mad-enable "; diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index f1fc697553a..2709a9da734 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -18,38 +18,40 @@ CCL_NAMESPACE_BEGIN #ifdef __BAKING__ -ccl_device_inline void compute_light_pass( +ccl_device_noinline void compute_light_pass( KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample) { kernel_assert(kernel_data.film.use_light_pass); - PathRadiance L_sample; - PathState state; - Ray ray; float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - /* emission and indirect shader data memory used by various functions */ - ShaderData emission_sd, indirect_sd; - - ray.P = sd->P + sd->Ng; - ray.D = -sd->Ng; - ray.t = FLT_MAX; -# ifdef __CAMERA_MOTION__ - ray.time = 0.5f; -# endif + /* Emission and indirect shader data memory used by various functions. */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + ShaderData indirect_sd; - /* init radiance */ - path_radiance_init(kg, &L_sample); + /* Init radiance. */ + path_radiance_init(kg, L); - /* init path state */ - path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); + /* Init path state. */ + PathState state; + path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL); - /* evaluate surface shader */ + /* Evaluate surface shader. */ shader_eval_surface(kg, sd, &state, NULL, state.flag); /* TODO, disable more closures we don't need besides transparent */ shader_bsdf_disable_transparency(kg, sd); + /* Init ray. */ + Ray ray; + ray.P = sd->P + sd->Ng; + ray.D = -sd->Ng; + ray.t = FLT_MAX; +# ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +# endif + # ifdef __BRANCHED_PATH__ if (!kernel_data.integrator.branched) { /* regular path tracer */ @@ -57,14 +59,13 @@ ccl_device_inline void compute_light_pass( /* sample ambient occlusion */ if (pass_filter & BAKE_FILTER_AO) { - kernel_path_ao( - kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd)); + kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd)); } /* sample emission */ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission); + path_radiance_accum_emission(kg, L, &state, throughput, emission); } bool is_sss_sample = false; @@ -77,12 +78,10 @@ ccl_device_inline void compute_light_pass( SubsurfaceIndirectRays ss_indirect; kernel_path_subsurface_init_indirect(&ss_indirect); if (kernel_path_subsurface_scatter( - kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) { + kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) { while (ss_indirect.num_rays) { - kernel_path_subsurface_setup_indirect( - kg, &ss_indirect, &state, &ray, &L_sample, &throughput); - kernel_path_indirect( - kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput); + kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L); } is_sss_sample = true; } @@ -91,18 +90,18 @@ ccl_device_inline void compute_light_pass( /* sample light and BSDF */ if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { - kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample); + kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L); - if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) { + if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) { # ifdef __LAMP_MIS__ state.ray_t = 0.0f; # endif /* compute indirect light */ - kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L); /* sum and reset indirect light pass variables for the next samples */ - path_radiance_sum_indirect(&L_sample); - path_radiance_reset_indirect(&L_sample); + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); } } # ifdef __BRANCHED_PATH__ @@ -112,13 +111,13 @@ ccl_device_inline void compute_light_pass( /* sample ambient occlusion */ if (pass_filter & BAKE_FILTER_AO) { - kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput); + kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput); } /* sample emission */ if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission); + path_radiance_accum_emission(kg, L, &state, throughput, emission); } # ifdef __SUBSURFACE__ @@ -127,7 +126,7 @@ ccl_device_inline void compute_light_pass( /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting * if scattering was successful. */ kernel_branched_path_subsurface_scatter( - kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput); + kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput); } # endif @@ -138,19 +137,16 @@ ccl_device_inline void compute_light_pass( if (kernel_data.integrator.use_direct_light) { int all = kernel_data.integrator.sample_all_lights_direct; kernel_branched_path_surface_connect_light( - kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all); + kg, sd, emission_sd, &state, throughput, 1.0f, L, all); } # endif /* indirect light */ kernel_branched_path_surface_indirect_light( - kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample); + kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L); } } # endif - - /* accumulate into master L */ - path_radiance_accum_sample(L, &L_sample); } /* this helps with AA but it's not the real solution as it does not AA the geometry @@ -225,41 +221,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, return out; } -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, - ccl_global uint4 *input, - ccl_global float4 *output, - ShaderEvalType type, - int pass_filter, - int i, - int offset, - int sample) +ccl_device void kernel_bake_evaluate( + KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { - ShaderData sd; - PathState state = {0}; - uint4 in = input[i * 2]; - uint4 diff = input[i * 2 + 1]; - - float3 out = make_float3(0.0f, 0.0f, 0.0f); + /* Setup render buffers. */ + const int index = offset + x + y * stride; + const int pass_stride = kernel_data.film.pass_stride; + buffer += index * pass_stride; - int object = in.x; - int prim = in.y; + ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive; + ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential; + ccl_global float *output = buffer + kernel_data.film.pass_combined; + int prim = __float_as_uint(primitive[1]); if (prim == -1) return; - float u = __uint_as_float(in.z); - float v = __uint_as_float(in.w); - - float dudx = __uint_as_float(diff.x); - float dudy = __uint_as_float(diff.y); - float dvdx = __uint_as_float(diff.z); - float dvdy = __uint_as_float(diff.w); + prim += kernel_data.bake.tri_offset; + /* Random number generator. */ + uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed; int num_samples = kernel_data.integrator.aa_samples; - /* random number generator */ - uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed); - float filter_x, filter_y; if (sample == 0) { filter_x = filter_y = 0.5f; @@ -268,23 +251,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y); } - /* subpixel u/v offset */ + /* Barycentric UV with subpixel offset. */ + float u = primitive[2]; + float v = primitive[3]; + + float dudx = differential[0]; + float dudy = differential[1]; + float dvdx = differential[2]; + float dvdy = differential[3]; + if (sample > 0) { u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), 1.0f - u); } - /* triangle */ + /* Shader data setup. */ + int object = kernel_data.bake.object_index; int shader; float3 P, Ng; triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - /* light passes */ - PathRadiance L; - path_radiance_init(kg, &L); - + ShaderData sd; shader_setup_from_sample( kg, &sd, @@ -302,7 +291,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, LAMP_NONE); sd.I = sd.N; - /* update differentials */ + /* Setup differentials. */ sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx; sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy; sd.du.dx = dudx; @@ -310,17 +299,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, sd.dv.dx = dvdx; sd.dv.dy = dvdy; - /* set RNG state for shaders that use sampling */ + /* Set RNG state for shaders that use sampling. */ + PathState state = {0}; state.rng_hash = rng_hash; state.rng_offset = 0; state.sample = sample; state.num_samples = num_samples; state.min_ray_pdf = FLT_MAX; - /* light passes if we need more than color */ - if (pass_filter & ~BAKE_FILTER_COLOR) + /* Light passes if we need more than color. */ + PathRadiance L; + int pass_filter = kernel_data.bake.pass_filter; + + if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR) compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample); + float3 out = make_float3(0.0f, 0.0f, 0.0f); + + ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type; switch (type) { /* data passes */ case SHADER_EVAL_NORMAL: @@ -441,10 +437,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, } /* write output */ - const float output_fac = 1.0f / num_samples; - const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac; - - output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result; + const float4 result = make_float4(out.x, out.y, out.z, 1.0f); + kernel_write_pass_float4(output, result); } #endif /* __BAKING__ */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a1f8c35348d..304835a1685 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -395,6 +395,10 @@ typedef enum PassType { PASS_VOLUME_INDIRECT, /* No Scatter color since it's tricky to define what it would even mean. */ PASS_CATEGORY_LIGHT_END = 63, + + PASS_BAKE_PRIMITIVE, + PASS_BAKE_DIFFERENTIAL, + PASS_CATEGORY_BAKE_END = 95 } PassType; #define PASS_ANY (~0) @@ -1248,6 +1252,10 @@ typedef struct KernelFilm { float4 xyz_to_b; float4 rgb_to_y; + int pass_bake_primitive; + int pass_bake_differential; + int pad; + #ifdef __KERNEL_DEBUG__ int pass_bvh_traversed_nodes; int pass_bvh_traversed_instances; @@ -1427,6 +1435,14 @@ typedef struct KernelTables { } KernelTables; static_assert_align(KernelTables, 16); +typedef struct KernelBake { + int object_index; + int tri_offset; + int type; + int pass_filter; +} KernelBake; +static_assert_align(KernelBake, 16); + typedef struct KernelData { KernelCamera cam; KernelFilm film; @@ -1435,6 +1451,7 @@ typedef struct KernelData { KernelBVH bvh; KernelCurves curve; KernelTables tables; + KernelBake bake; } KernelData; static_assert_align(KernelData, 16); diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 683f4b88d79..ea3103f12c3 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample); +void KERNEL_FUNCTION_FULL_NAME(bake)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); + /* Split kernels */ void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg, diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 091e53cfd83..5aa3fb14318 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -132,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, # endif /* KERNEL_STUB */ } +/* Bake */ + +void KERNEL_FUNCTION_FULL_NAME(bake)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) +{ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, bake); +# else + kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride); +# endif /* KERNEL_STUB */ +} + /* Shader Evaluate */ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, @@ -146,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, # ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, shader); # else - if (type >= SHADER_EVAL_BAKE) { -# ifdef __BAKING__ - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample); -# endif - } - else if (type == SHADER_EVAL_DISPLACE) { + if (type == SHADER_EVAL_DISPLACE) { kernel_displace_evaluate(kg, input, output, i); } else { diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index c4c810c6a82..d4f41132a11 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -214,13 +214,16 @@ kernel_cuda_background(uint4 *input, #ifdef __BAKING__ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample) +kernel_cuda_bake(WorkTile *tile, uint total_work_size) { - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int work_index = ccl_global_id(0); + + if(work_index < total_work_size) { + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - if(x < sx + sw) { KernelGlobals kg; - kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); + kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } } #endif diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 35f942b3e9b..6044182a51a 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -15,6 +15,7 @@ */ #include "render/bake.h" +#include "render/buffers.h" #include "render/integrator.h" #include "render/mesh.h" #include "render/object.h" @@ -24,272 +25,130 @@ CCL_NAMESPACE_BEGIN -BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels) - : m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels) +static int aa_samples(Scene *scene, Object *object, ShaderEvalType type) { - m_primitive.resize(num_pixels); - m_u.resize(num_pixels); - m_v.resize(num_pixels); - m_dudx.resize(num_pixels); - m_dudy.resize(num_pixels); - m_dvdx.resize(num_pixels); - m_dvdy.resize(num_pixels); -} - -BakeData::~BakeData() -{ - m_primitive.clear(); - m_u.clear(); - m_v.clear(); - m_dudx.clear(); - m_dudy.clear(); - m_dvdx.clear(); - m_dvdy.clear(); -} - -void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy) -{ - m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim); - m_u[i] = uv[0]; - m_v[i] = uv[1]; - m_dudx[i] = dudx; - m_dudy[i] = dudy; - m_dvdx[i] = dvdx; - m_dvdy[i] = dvdy; -} - -void BakeData::set_null(int i) -{ - m_primitive[i] = -1; -} - -int BakeData::object() -{ - return m_object; -} - -size_t BakeData::size() -{ - return m_num_pixels; -} + if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) { + return 1; + } + else if (type == SHADER_EVAL_NORMAL) { + /* Only antialias normal if mesh has bump mapping. */ + if (object->geometry) { + foreach (Shader *shader, object->geometry->used_shaders) { + if (shader->has_bump) { + return scene->integrator->aa_samples; + } + } + } -bool BakeData::is_valid(int i) -{ - return m_primitive[i] != -1; + return 1; + } + else { + return scene->integrator->aa_samples; + } } -uint4 BakeData::data(int i) +/* Keep it synced with kernel_bake.h logic */ +static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter) { - return make_uint4(m_object, m_primitive[i], __float_as_int(m_u[i]), __float_as_int(m_v[i])); -} + const int component_flags = pass_filter & + (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR); -uint4 BakeData::differentials(int i) -{ - return make_uint4(__float_as_int(m_dudx[i]), - __float_as_int(m_dudy[i]), - __float_as_int(m_dvdx[i]), - __float_as_int(m_dvdy[i])); + switch (type) { + case SHADER_EVAL_AO: + return BAKE_FILTER_AO; + case SHADER_EVAL_SHADOW: + return BAKE_FILTER_DIRECT; + case SHADER_EVAL_DIFFUSE: + return BAKE_FILTER_DIFFUSE | component_flags; + case SHADER_EVAL_GLOSSY: + return BAKE_FILTER_GLOSSY | component_flags; + case SHADER_EVAL_TRANSMISSION: + return BAKE_FILTER_TRANSMISSION | component_flags; + case SHADER_EVAL_COMBINED: + return pass_filter; + default: + return 0; + } } BakeManager::BakeManager() { - m_bake_data = NULL; - m_is_baking = false; + type = SHADER_EVAL_BAKE; + pass_filter = 0; + need_update = true; - m_shader_limit = 512 * 512; } BakeManager::~BakeManager() { - if (m_bake_data) - delete m_bake_data; } bool BakeManager::get_baking() { - return m_is_baking; -} - -void BakeManager::set_baking(const bool value) -{ - m_is_baking = value; + return !object_name.empty(); } -BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels) +void BakeManager::set(Scene *scene, + const std::string &object_name_, + ShaderEvalType type_, + int pass_filter_) { - m_bake_data = new BakeData(object, tri_offset, num_pixels); - return m_bake_data; -} - -void BakeManager::set_shader_limit(const size_t x, const size_t y) -{ - m_shader_limit = x * y; - m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2))); -} + object_name = object_name_; + type = type_; + pass_filter = shader_type_to_pass_filter(type_, pass_filter_); -bool BakeManager::bake(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress, - ShaderEvalType shader_type, - const int pass_filter, - BakeData *bake_data, - float result[]) -{ - size_t num_pixels = bake_data->size(); - - int num_samples = aa_samples(scene, bake_data, shader_type); + Pass::add(PASS_BAKE_PRIMITIVE, scene->film->passes); + Pass::add(PASS_BAKE_DIFFERENTIAL, scene->film->passes); - /* calculate the total pixel samples for the progress bar */ - total_pixel_samples = 0; - for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { - size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - total_pixel_samples += shader_size * num_samples; + if (type == SHADER_EVAL_UV) { + /* force UV to be available */ + Pass::add(PASS_UV, scene->film->passes); } - progress.reset_sample(); - progress.set_total_pixel_samples(total_pixel_samples); - - /* needs to be up to date for baking specific AA samples */ - dscene->data.integrator.aa_samples = num_samples; - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - - for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { - size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - /* setup input for device task */ - device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY); - uint4 *d_input_data = d_input.alloc(shader_size * 2); - size_t d_input_size = 0; - - for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) { - d_input_data[d_input_size++] = bake_data->data(i); - d_input_data[d_input_size++] = bake_data->differentials(i); - } - - if (d_input_size == 0) { - m_is_baking = false; - return false; - } - - /* run device task */ - device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE); - d_output.alloc(shader_size); - d_output.zero_to_device(); - d_input.copy_to_device(); - - DeviceTask task(DeviceTask::SHADER); - task.shader_input = d_input.device_pointer; - task.shader_output = d_output.device_pointer; - task.shader_eval_type = shader_type; - task.shader_filter = pass_filter; - task.shader_x = 0; - task.offset = shader_offset; - task.shader_w = d_output.size(); - task.num_samples = num_samples; - task.get_cancel = function_bind(&Progress::get_cancel, &progress); - task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2); - - device->task_add(task); - device->task_wait(); - - if (progress.get_cancel()) { - d_input.free(); - d_output.free(); - m_is_baking = false; - return false; - } - - d_output.copy_from_device(0, 1, d_output.size()); - d_input.free(); - - /* read result */ - int k = 0; - - float4 *offset = d_output.data(); - - size_t depth = 4; - for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) { - size_t index = i * depth; - float4 out = offset[k++]; - - if (bake_data->is_valid(i)) { - for (size_t j = 0; j < 4; j++) { - result[index + j] = out[j]; - } - } - } - - d_output.free(); + /* force use_light_pass to be true if we bake more than just colors */ + if (pass_filter & ~BAKE_FILTER_COLOR) { + Pass::add(PASS_LIGHT, scene->film->passes); } - m_is_baking = false; - return true; + /* create device and update scene */ + scene->film->tag_update(scene); + scene->integrator->tag_update(scene); + + need_update = true; } void BakeManager::device_update(Device * /*device*/, - DeviceScene * /*dscene*/, - Scene * /*scene*/, - Progress &progress) + DeviceScene *dscene, + Scene *scene, + Progress & /* progress */) { if (!need_update) return; - if (progress.get_cancel()) - return; + KernelIntegrator *kintegrator = &dscene->data.integrator; + KernelBake *kbake = &dscene->data.bake; - need_update = false; -} - -void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/) -{ -} - -int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type) -{ - if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) { - return 1; - } - else if (type == SHADER_EVAL_NORMAL) { - /* Only antialias normal if mesh has bump mapping. */ - Object *object = scene->objects[bake_data->object()]; + kbake->type = type; + kbake->pass_filter = pass_filter; - if (object->geometry) { - foreach (Shader *shader, object->geometry->used_shaders) { - if (shader->has_bump) { - return scene->integrator->aa_samples; - } - } + int object_index = 0; + foreach (Object *object, scene->objects) { + const Geometry *geom = object->geometry; + if (object->name == object_name && geom->type == Geometry::MESH) { + kbake->object_index = object_index; + kbake->tri_offset = geom->prim_offset; + kintegrator->aa_samples = aa_samples(scene, object, type); + break; } - return 1; - } - else { - return scene->integrator->aa_samples; + object_index++; } + + need_update = false; } -/* Keep it synced with kernel_bake.h logic */ -int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter) +void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/) { - const int component_flags = pass_filter & - (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR); - - switch (type) { - case SHADER_EVAL_AO: - return BAKE_FILTER_AO; - case SHADER_EVAL_SHADOW: - return BAKE_FILTER_DIRECT; - case SHADER_EVAL_DIFFUSE: - return BAKE_FILTER_DIFFUSE | component_flags; - case SHADER_EVAL_GLOSSY: - return BAKE_FILTER_GLOSSY | component_flags; - case SHADER_EVAL_TRANSMISSION: - return BAKE_FILTER_TRANSMISSION | component_flags; - case SHADER_EVAL_COMBINED: - return pass_filter; - default: - return 0; - } } CCL_NAMESPACE_END diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 88537623efb..93e664c2ab1 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -25,67 +25,23 @@ CCL_NAMESPACE_BEGIN -class BakeData { - public: - BakeData(const int object, const size_t tri_offset, const size_t num_pixels); - ~BakeData(); - - void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy); - void set_null(int i); - int object(); - size_t size(); - uint4 data(int i); - uint4 differentials(int i); - bool is_valid(int i); - - private: - int m_object; - size_t m_tri_offset; - size_t m_num_pixels; - vector<int> m_primitive; - vector<float> m_u; - vector<float> m_v; - vector<float> m_dudx; - vector<float> m_dudy; - vector<float> m_dvdx; - vector<float> m_dvdy; -}; - class BakeManager { public: BakeManager(); ~BakeManager(); + void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter); bool get_baking(); - void set_baking(const bool value); - - BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels); - - void set_shader_limit(const size_t x, const size_t y); - - bool bake(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress, - ShaderEvalType shader_type, - const int pass_filter, - BakeData *bake_data, - float result[]); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); void device_free(Device *device, DeviceScene *dscene); - static int shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter); - static int aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type); - bool need_update; - size_t total_pixel_samples; - private: - BakeData *m_bake_data; - bool m_is_baking; - size_t m_shader_limit; + ShaderEvalType type; + int pass_filter; + std::string object_name; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 2d89fb9ffba..b26366af852 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -459,6 +459,40 @@ bool RenderBuffers::get_pass_rect( return false; } +bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels) +{ + if (buffer.data() == NULL) { + return false; + } + + int pass_offset = 0; + + for (size_t j = 0; j < params.passes.size(); j++) { + Pass &pass = params.passes[j]; + + if (pass.type != type) { + pass_offset += pass.components; + continue; + } + + float *out = buffer.data() + pass_offset; + int pass_stride = params.get_passes_size(); + int size = params.width * params.height; + + assert(pass.components == components); + + for (int i = 0; i < size; i++, out += pass_stride, pixels += components) { + for (int j = 0; j < components; j++) { + out[j] = pixels[j]; + } + } + + return true; + } + + return false; +} + /* Display Buffer */ DisplayBuffer::DisplayBuffer(Device *device, bool linear) diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 42efb031843..975bae2239c 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -92,6 +92,7 @@ class RenderBuffers { const string &name, float exposure, int sample, int components, float *pixels); bool get_denoising_pass_rect( int offset, float exposure, int sample, int components, float *pixels); + bool set_pass_rect(PassType type, int components, float *pixels); }; /* Display Buffer @@ -130,7 +131,7 @@ class DisplayBuffer { class RenderTile { public: - typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task; + typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task; Task task; int x, y, w, h; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 26eda93fadd..d7cbf4a3581 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -196,6 +196,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name) case PASS_AOV_VALUE: pass.components = 1; break; + case PASS_BAKE_PRIMITIVE: + case PASS_BAKE_DIFFERENTIAL: + pass.components = 4; + break; default: assert(false); break; @@ -386,11 +390,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) if (pass.type <= PASS_CATEGORY_MAIN_END) { kfilm->pass_flag |= pass_flag; } - else { - assert(pass.type <= PASS_CATEGORY_LIGHT_END); + else if (pass.type <= PASS_CATEGORY_LIGHT_END) { kfilm->use_light_pass = 1; kfilm->light_pass_flag |= pass_flag; } + else { + assert(pass.type <= PASS_CATEGORY_BAKE_END); + } switch (pass.type) { case PASS_COMBINED: @@ -471,6 +477,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_volume_direct = kfilm->pass_stride; break; + case PASS_BAKE_PRIMITIVE: + kfilm->pass_bake_primitive = kfilm->pass_stride; + break; + case PASS_BAKE_DIFFERENTIAL: + kfilm->pass_bake_differential = kfilm->pass_stride; + break; + #ifdef WITH_CYCLES_DEBUG case PASS_BVH_TRAVERSED_NODES: kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index f7df81a0601..361a1465aac 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -410,7 +410,16 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; rtile.tile_index = tile->index; - rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE; + + if (tile->state == Tile::DENOISE) { + rtile.task = RenderTile::DENOISE; + } + else if (read_bake_tile_cb) { + rtile.task = RenderTile::BAKE; + } + else { + rtile.task = RenderTile::PATH_TRACE; + } tile_lock.unlock(); @@ -451,11 +460,20 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ rtile.buffers = tile->buffers; rtile.sample = tile_manager.state.sample; - /* this will tag tile as IN PROGRESS in blender-side render pipeline, - * which is needed to highlight currently rendering tile before first - * sample was processed for it - */ - update_tile_sample(rtile); + if (read_bake_tile_cb) { + /* This will read any passes needed as input for baking. */ + { + thread_scoped_lock tile_lock(tile_mutex); + read_bake_tile_cb(rtile); + } + rtile.buffers->buffer.copy_to_device(); + } + else { + /* This will tag tile as IN PROGRESS in blender-side render pipeline, + * which is needed to highlight currently rendering tile before first + * sample was processed for it. */ + update_tile_sample(rtile); + } return true; } @@ -484,6 +502,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise) bool delete_tile; if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) { + /* Finished tile pixels write. */ if (write_render_tile_cb && params.progressive_refine == false) { write_render_tile_cb(rtile); } @@ -494,6 +513,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise) } } else { + /* In progress tile pixels update. */ if (update_render_tile_cb && params.progressive_refine == false) { update_render_tile_cb(rtile, false); } diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index f06952e8020..2707eed5531 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -148,6 +148,7 @@ class Session { function<void(RenderTile &)> write_render_tile_cb; function<void(RenderTile &, bool)> update_render_tile_cb; + function<void(RenderTile &)> read_bake_tile_cb; explicit Session(const SessionParams ¶ms); ~Session(); |