diff options
Diffstat (limited to 'intern/cycles/render')
54 files changed, 2704 insertions, 1375 deletions
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 2bb3515cb2b..ace1bd33fdf 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -1,14 +1,6 @@ set(INC - . - ../device - ../graph - ../kernel - ../kernel/svm - ../kernel/osl - ../bvh - ../subd - ../util + .. ../../glew-mx ) diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp index 12fa58a84e2..ea685c6f6e1 100644 --- a/intern/cycles/render/attribute.cpp +++ b/intern/cycles/render/attribute.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "image.h" -#include "mesh.h" -#include "attribute.h" +#include "render/image.h" +#include "render/mesh.h" +#include "render/attribute.h" -#include "util_debug.h" -#include "util_foreach.h" -#include "util_transform.h" +#include "util/util_debug.h" +#include "util/util_foreach.h" +#include "util/util_transform.h" CCL_NAMESPACE_BEGIN @@ -506,6 +506,16 @@ Attribute *AttributeSet::find(AttributeRequest& req) return find(req.std); } +void AttributeSet::remove(Attribute *attribute) +{ + if(attribute->std == ATTR_STD_NONE) { + remove(attribute->name); + } + else { + remove(attribute->std); + } +} + void AttributeSet::resize(bool reserve_only) { foreach(Attribute& attr, attributes) { diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h index f4538c76369..d15ee401a72 100644 --- a/intern/cycles/render/attribute.h +++ b/intern/cycles/render/attribute.h @@ -17,12 +17,12 @@ #ifndef __ATTRIBUTE_H__ #define __ATTRIBUTE_H__ -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "util_list.h" -#include "util_param.h" -#include "util_types.h" -#include "util_vector.h" +#include "util/util_list.h" +#include "util/util_param.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -120,6 +120,8 @@ public: Attribute *find(AttributeRequest& req); + void remove(Attribute *attribute); + void resize(bool reserve_only = false); void clear(); }; diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp index 8d7d7b847fd..930debe1e33 100644 --- a/intern/cycles/render/background.cpp +++ b/intern/cycles/render/background.cpp @@ -14,17 +14,17 @@ * limitations under the License. */ -#include "background.h" -#include "device.h" -#include "integrator.h" -#include "graph.h" -#include "nodes.h" -#include "scene.h" -#include "shader.h" - -#include "util_foreach.h" -#include "util_math.h" -#include "util_types.h" +#include "render/background.h" +#include "device/device.h" +#include "render/integrator.h" +#include "render/graph.h" +#include "render/nodes.h" +#include "render/scene.h" +#include "render/shader.h" + +#include "util/util_foreach.h" +#include "util/util_math.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h index 8029c6a9e80..db20b6ebf87 100644 --- a/intern/cycles/render/background.h +++ b/intern/cycles/render/background.h @@ -17,9 +17,9 @@ #ifndef __BACKGROUND_H__ #define __BACKGROUND_H__ -#include "node.h" +#include "graph/node.h" -#include "util_types.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -30,7 +30,7 @@ class Shader; class Background : public Node { public: - NODE_DECLARE; + NODE_DECLARE float ao_factor; float ao_distance; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 13310a61761..2bedf3668f7 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "bake.h" -#include "integrator.h" +#include "render/bake.h" +#include "render/integrator.h" CCL_NAMESPACE_BEGIN @@ -135,20 +135,16 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre { size_t num_pixels = bake_data->size(); - progress.reset_sample(); - this->num_parts = 0; + int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; - /* calculate the total parts for the progress bar */ + /* calculate the total pixel samples for the progress bar */ + total_pixel_samples = 0; for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - - DeviceTask task(DeviceTask::SHADER); - task.shader_w = shader_size; - - this->num_parts += device->get_split_task_count(task); + total_pixel_samples += shader_size * num_samples; } - - this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; + progress.reset_sample(); + progress.set_total_pixel_samples(total_pixel_samples); for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); @@ -175,9 +171,10 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("bake_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_READ_WRITE); + device->mem_alloc("bake_output", d_output, MEM_READ_WRITE); + device->mem_zero(d_output); DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; @@ -187,9 +184,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre task.shader_x = 0; task.offset = shader_offset; task.shader_w = d_output.size(); - task.num_samples = this->num_samples; + task.num_samples = num_samples; task.get_cancel = function_bind(&Progress::get_cancel, &progress); - task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); + task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2); device->task_add(task); device->task_wait(); diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 8377e387197..ceb94cfb682 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -17,11 +17,11 @@ #ifndef __BAKE_H__ #define __BAKE_H__ -#include "device.h" -#include "scene.h" +#include "device/device.h" +#include "render/scene.h" -#include "util_progress.h" -#include "util_vector.h" +#include "util/util_progress.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -73,8 +73,7 @@ public: bool need_update; - int num_samples; - int num_parts; + size_t total_pixel_samples; private: BakeData *m_bake_data; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index cb20e811708..98c7ff8ce14 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -16,17 +16,17 @@ #include <stdlib.h> -#include "buffers.h" -#include "device.h" - -#include "util_debug.h" -#include "util_foreach.h" -#include "util_hash.h" -#include "util_image.h" -#include "util_math.h" -#include "util_opengl.h" -#include "util_time.h" -#include "util_types.h" +#include "render/buffers.h" +#include "device/device.h" + +#include "util/util_debug.h" +#include "util/util_foreach.h" +#include "util/util_hash.h" +#include "util/util_image.h" +#include "util/util_math.h" +#include "util/util_opengl.h" +#include "util/util_time.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -42,6 +42,9 @@ BufferParams::BufferParams() full_width = 0; full_height = 0; + denoising_data_pass = false; + denoising_clean_pass = false; + Pass::add(PASS_COMBINED, passes); } @@ -68,10 +71,25 @@ int BufferParams::get_passes_size() for(size_t i = 0; i < passes.size(); i++) size += passes[i].components; - + + if(denoising_data_pass) { + size += DENOISING_PASS_SIZE_BASE; + if(denoising_clean_pass) size += DENOISING_PASS_SIZE_CLEAN; + } + return align_up(size, 4); } +int BufferParams::get_denoising_offset() +{ + int offset = 0; + + for(size_t i = 0; i < passes.size(); i++) + offset += passes[i].components; + + return offset; +} + /* Render Buffer Task */ RenderTile::RenderTile() @@ -90,7 +108,6 @@ RenderTile::RenderTile() stride = 0; buffer = 0; - rng_state = 0; buffers = NULL; } @@ -113,11 +130,6 @@ void RenderBuffers::device_free() device->mem_free(buffer); buffer.clear(); } - - if(rng_state.device_pointer) { - device->mem_free(rng_state); - rng_state.clear(); - } } void RenderBuffers::reset(Device *device, BufferParams& params_) @@ -129,21 +141,62 @@ void RenderBuffers::reset(Device *device, BufferParams& params_) /* allocate buffer */ buffer.resize(params.width*params.height*params.get_passes_size()); - device->mem_alloc(buffer, MEM_READ_WRITE); + device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE); device->mem_zero(buffer); +} - /* allocate rng state */ - rng_state.resize(params.width, params.height); - - device->mem_alloc(rng_state, MEM_READ_WRITE); +void RenderBuffers::zero(Device *device) +{ + if(buffer.device_pointer) { + device->mem_zero(buffer); + } } -bool RenderBuffers::copy_from_device() +bool RenderBuffers::copy_from_device(Device *from_device) { if(!buffer.device_pointer) return false; - device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float)); + if(!from_device) { + from_device = device; + } + + from_device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float)); + + return true; +} + +bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels) +{ + float scale = 1.0f/sample; + + if(offset == DENOISING_PASS_COLOR) { + scale *= exposure; + } + else if(offset == DENOISING_PASS_COLOR_VAR) { + scale *= exposure*exposure; + } + + offset += params.get_denoising_offset(); + float *in = (float*)buffer.data_pointer + offset; + int pass_stride = params.get_passes_size(); + int size = params.width*params.height; + + if(components == 1) { + for(int i = 0; i < size; i++, in += pass_stride, pixels++) { + pixels[0] = in[0]*scale; + } + } + else if(components == 3) { + for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { + pixels[0] = in[0]*scale; + pixels[1] = in[1]*scale; + pixels[2] = in[2]*scale; + } + } + else { + return false; + } return true; } @@ -185,13 +238,11 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int } } #ifdef WITH_CYCLES_DEBUG - else if(type == PASS_BVH_TRAVERSAL_STEPS) { - for(int i = 0; i < size; i++, in += pass_stride, pixels++) { - float f = *in; - pixels[0] = f*scale; - } - } - else if(type == PASS_RAY_BOUNCES) { + else if(type == PASS_BVH_TRAVERSED_NODES || + type == PASS_BVH_TRAVERSED_INSTANCES || + type == PASS_BVH_INTERSECTIONS || + type == PASS_RAY_BOUNCES) + { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; pixels[0] = f*scale; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index c9c2a21079a..2780fc8a68d 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -17,16 +17,16 @@ #ifndef __BUFFERS_H__ #define __BUFFERS_H__ -#include "device_memory.h" +#include "device/device_memory.h" -#include "film.h" +#include "render/film.h" -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "util_half.h" -#include "util_string.h" -#include "util_thread.h" -#include "util_types.h" +#include "util/util_half.h" +#include "util/util_string.h" +#include "util/util_thread.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -51,6 +51,9 @@ public: /* passes */ array<Pass> passes; + bool denoising_data_pass; + /* If only some light path types should be denoised, an additional pass is needed. */ + bool denoising_clean_pass; /* functions */ BufferParams(); @@ -59,6 +62,7 @@ public: bool modified(const BufferParams& params); void add_pass(PassType type); int get_passes_size(); + int get_denoising_offset(); }; /* Render Buffers */ @@ -70,21 +74,21 @@ public: /* float buffer */ device_vector<float> buffer; - /* random number generator state */ - device_vector<uint> rng_state; + + Device *device; explicit RenderBuffers(Device *device); ~RenderBuffers(); void reset(Device *device, BufferParams& params); + void zero(Device *device); - bool copy_from_device(); + bool copy_from_device(Device *from_device = NULL); bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); + bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels); protected: void device_free(); - - Device *device; }; /* Display Buffer @@ -131,6 +135,9 @@ protected: class RenderTile { public: + typedef enum { PATH_TRACE, DENOISE } Task; + + Task task; int x, y, w, h; int start_sample; int num_samples; @@ -138,9 +145,9 @@ public: int resolution; int offset; int stride; + int tile_index; device_ptr buffer; - device_ptr rng_state; RenderBuffers *buffers; diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index c8c51ec96d2..83ff8a10618 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -14,18 +14,18 @@ * limitations under the License. */ -#include "camera.h" -#include "mesh.h" -#include "object.h" -#include "scene.h" -#include "tables.h" - -#include "device.h" - -#include "util_foreach.h" -#include "util_function.h" -#include "util_math_cdf.h" -#include "util_vector.h" +#include "render/camera.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/scene.h" +#include "render/tables.h" + +#include "device/device.h" + +#include "util/util_foreach.h" +#include "util/util_function.h" +#include "util/util_math_cdf.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index 141ef9cccef..dd6b831b347 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -17,13 +17,13 @@ #ifndef __CAMERA_H__ #define __CAMERA_H__ -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "node.h" +#include "graph/node.h" -#include "util_boundbox.h" -#include "util_transform.h" -#include "util_types.h" +#include "util/util_boundbox.h" +#include "util/util_transform.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -39,7 +39,7 @@ class Scene; class Camera : public Node { public: - NODE_DECLARE; + NODE_DECLARE /* Specifies an offset for the shutter's time interval. */ enum MotionPosition { diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp index b7f25663bc3..943b218f0e4 100644 --- a/intern/cycles/render/constant_fold.cpp +++ b/intern/cycles/render/constant_fold.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "constant_fold.h" -#include "graph.h" +#include "render/constant_fold.h" +#include "render/graph.h" -#include "util_foreach.h" -#include "util_logging.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" CCL_NAMESPACE_BEGIN @@ -160,6 +160,14 @@ bool ConstantFolder::try_bypass_or_make_constant(ShaderInput *input, bool clamp) bypass(input->link); return true; } + else { + /* disconnect other inputs if we can't fully bypass due to clamp */ + foreach(ShaderInput *other, node->inputs) { + if(other != input && other->link) { + graph->disconnect(other); + } + } + } return false; } diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h index 7962698319f..33f93b8c0ab 100644 --- a/intern/cycles/render/constant_fold.h +++ b/intern/cycles/render/constant_fold.h @@ -17,8 +17,8 @@ #ifndef __CONSTANT_FOLD_H__ #define __CONSTANT_FOLD_H__ -#include "util_types.h" -#include "svm_types.h" +#include "util/util_types.h" +#include "kernel/svm/svm_types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp index f671eb19cae..4c085b928fb 100644 --- a/intern/cycles/render/curves.cpp +++ b/intern/cycles/render/curves.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "device.h" -#include "curves.h" -#include "mesh.h" -#include "object.h" -#include "scene.h" - -#include "util_foreach.h" -#include "util_map.h" -#include "util_progress.h" -#include "util_vector.h" +#include "device/device.h" +#include "render/curves.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/scene.h" + +#include "util/util_foreach.h" +#include "util/util_map.h" +#include "util/util_progress.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/curves.h b/intern/cycles/render/curves.h index e41967eebf5..8834764bd63 100644 --- a/intern/cycles/render/curves.h +++ b/intern/cycles/render/curves.h @@ -17,8 +17,8 @@ #ifndef __CURVES_H__ #define __CURVES_H__ -#include "util_types.h" -#include "util_vector.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index e10a938e1eb..c8213d258d5 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -14,19 +14,19 @@ * limitations under the License. */ -#include "camera.h" -#include "device.h" -#include "film.h" -#include "integrator.h" -#include "mesh.h" -#include "scene.h" -#include "tables.h" - -#include "util_algorithm.h" -#include "util_debug.h" -#include "util_foreach.h" -#include "util_math.h" -#include "util_math_cdf.h" +#include "render/camera.h" +#include "device/device.h" +#include "render/film.h" +#include "render/integrator.h" +#include "render/mesh.h" +#include "render/scene.h" +#include "render/tables.h" + +#include "util/util_algorithm.h" +#include "util/util_debug.h" +#include "util/util_foreach.h" +#include "util/util_math.h" +#include "util/util_math_cdf.h" CCL_NAMESPACE_BEGIN @@ -154,14 +154,9 @@ void Pass::add(PassType type, array<Pass>& passes) pass.components = 0; break; #ifdef WITH_CYCLES_DEBUG - case PASS_BVH_TRAVERSAL_STEPS: - pass.components = 1; - pass.exposure = false; - break; + case PASS_BVH_TRAVERSED_NODES: case PASS_BVH_TRAVERSED_INSTANCES: - pass.components = 1; - pass.exposure = false; - break; + case PASS_BVH_INTERSECTIONS: case PASS_RAY_BOUNCES: pass.components = 1; pass.exposure = false; @@ -284,6 +279,10 @@ NODE_DEFINE(Film) SOCKET_BOOLEAN(use_sample_clamp, "Use Sample Clamp", false); + SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false); + SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); + SOCKET_INT(denoising_flags, "Denoising Flags", 0); + return type; } @@ -421,12 +420,15 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) break; #ifdef WITH_CYCLES_DEBUG - case PASS_BVH_TRAVERSAL_STEPS: - kfilm->pass_bvh_traversal_steps = kfilm->pass_stride; + case PASS_BVH_TRAVERSED_NODES: + kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride; break; case PASS_BVH_TRAVERSED_INSTANCES: kfilm->pass_bvh_traversed_instances = kfilm->pass_stride; break; + case PASS_BVH_INTERSECTIONS: + kfilm->pass_bvh_intersections = kfilm->pass_stride; + break; case PASS_RAY_BOUNCES: kfilm->pass_ray_bounces = kfilm->pass_stride; break; @@ -439,6 +441,20 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride += pass.components; } + kfilm->pass_denoising_data = 0; + kfilm->pass_denoising_clean = 0; + kfilm->denoising_flags = 0; + if(denoising_data_pass) { + kfilm->pass_denoising_data = kfilm->pass_stride; + kfilm->pass_stride += DENOISING_PASS_SIZE_BASE; + kfilm->denoising_flags = denoising_flags; + if(denoising_clean_pass) { + kfilm->pass_denoising_clean = kfilm->pass_stride; + kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN; + kfilm->use_light_pass = 1; + } + } + kfilm->pass_stride = align_up(kfilm->pass_stride, 4); kfilm->pass_alpha_threshold = pass_alpha_threshold; @@ -453,6 +469,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->mist_inv_depth = (mist_depth > 0.0f)? 1.0f/mist_depth: 0.0f; kfilm->mist_falloff = mist_falloff; + pass_stride = kfilm->pass_stride; + denoising_data_offset = kfilm->pass_denoising_data; + denoising_clean_offset = kfilm->pass_denoising_clean; + need_update = false; } diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 9fa51c51f52..29b1e7e9157 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -17,12 +17,12 @@ #ifndef __FILM_H__ #define __FILM_H__ -#include "util_string.h" -#include "util_vector.h" +#include "util/util_string.h" +#include "util/util_vector.h" -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "node.h" +#include "graph/node.h" CCL_NAMESPACE_BEGIN @@ -53,12 +53,19 @@ public: class Film : public Node { public: - NODE_DECLARE; + NODE_DECLARE float exposure; array<Pass> passes; + bool denoising_data_pass; + bool denoising_clean_pass; + int denoising_flags; float pass_alpha_threshold; + int pass_stride; + int denoising_data_offset; + int denoising_clean_offset; + FilterType filter_type; float filter_width; size_t filter_table_offset; diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index f6c83fb5c7e..08203163d1a 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -14,17 +14,18 @@ * limitations under the License. */ -#include "attribute.h" -#include "graph.h" -#include "nodes.h" -#include "shader.h" -#include "constant_fold.h" - -#include "util_algorithm.h" -#include "util_debug.h" -#include "util_foreach.h" -#include "util_queue.h" -#include "util_logging.h" +#include "render/attribute.h" +#include "render/graph.h" +#include "render/nodes.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/constant_fold.h" + +#include "util/util_algorithm.h" +#include "util/util_debug.h" +#include "util/util_foreach.h" +#include "util/util_queue.h" +#include "util/util_logging.h" CCL_NAMESPACE_BEGIN @@ -195,6 +196,7 @@ bool ShaderNode::equals(const ShaderNode& other) ShaderGraph::ShaderGraph() { finalized = false; + simplified = false; num_node_ids = 0; add(new OutputNode()); } @@ -207,6 +209,8 @@ ShaderGraph::~ShaderGraph() ShaderNode *ShaderGraph::add(ShaderNode *node) { assert(!finalized); + simplified = false; + node->id = num_node_ids++; nodes.push_back(node); return node; @@ -217,26 +221,6 @@ OutputNode *ShaderGraph::output() return (OutputNode*)nodes.front(); } -ShaderGraph *ShaderGraph::copy() -{ - ShaderGraph *newgraph = new ShaderGraph(); - - /* copy nodes */ - ShaderNodeSet nodes_all; - foreach(ShaderNode *node, nodes) - nodes_all.insert(node); - - ShaderNodeMap nodes_copy; - copy_nodes(nodes_all, nodes_copy); - - /* add nodes (in same order, so output is still first) */ - newgraph->clear_nodes(); - foreach(ShaderNode *node, nodes) - newgraph->add(nodes_copy[node]); - - return newgraph; -} - void ShaderGraph::connect(ShaderOutput *from, ShaderInput *to) { assert(!finalized); @@ -273,6 +257,7 @@ void ShaderGraph::connect(ShaderOutput *from, ShaderInput *to) void ShaderGraph::disconnect(ShaderOutput *from) { assert(!finalized); + simplified = false; foreach(ShaderInput *sock, from->links) { sock->link = NULL; @@ -285,6 +270,7 @@ void ShaderGraph::disconnect(ShaderInput *to) { assert(!finalized); assert(to->link); + simplified = false; ShaderOutput *from = to->link; @@ -294,6 +280,8 @@ void ShaderGraph::disconnect(ShaderInput *to) void ShaderGraph::relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to) { + simplified = false; + /* Copy because disconnect modifies this list */ vector<ShaderInput*> outputs = from->links; @@ -310,9 +298,19 @@ void ShaderGraph::relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to) } } +void ShaderGraph::simplify(Scene *scene) +{ + if(!simplified) { + default_inputs(scene->shader_manager->use_osl()); + clean(scene); + refine_bump_nodes(); + + simplified = true; + } +} + void ShaderGraph::finalize(Scene *scene, bool do_bump, - bool do_osl, bool do_simplify, bool bump_in_object_space) { @@ -322,9 +320,7 @@ void ShaderGraph::finalize(Scene *scene, * modified afterwards. */ if(!finalized) { - default_inputs(do_osl); - clean(scene); - refine_bump_nodes(); + simplify(scene); if(do_bump) bump_from_displacement(bump_in_object_space); @@ -405,7 +401,8 @@ void ShaderGraph::copy_nodes(ShaderNodeSet& nodes, ShaderNodeMap& nnodemap) /* Graph simplification */ /* ******************** */ -/* Step 1: Remove proxy nodes. +/* Remove proxy nodes. + * * These only exists temporarily when exporting groups, and we must remove them * early so that node->attributes() and default links do not see them. */ @@ -475,7 +472,8 @@ void ShaderGraph::remove_proxy_nodes() } } -/* Step 2: Constant folding. +/* Constant folding. + * * Try to constant fold some nodes, and pipe result directly to * the input socket of connected nodes. */ @@ -536,7 +534,7 @@ void ShaderGraph::constant_fold() } } -/* Step 3: Simplification. */ +/* Simplification. */ void ShaderGraph::simplify_settings(Scene *scene) { foreach(ShaderNode *node, nodes) { @@ -544,7 +542,7 @@ void ShaderGraph::simplify_settings(Scene *scene) } } -/* Step 4: Deduplicate nodes with same settings. */ +/* Deduplicate nodes with same settings. */ void ShaderGraph::deduplicate_nodes() { /* NOTES: @@ -620,6 +618,48 @@ void ShaderGraph::deduplicate_nodes() } } +/* Check whether volume output has meaningful nodes, otherwise + * disconnect the output. + */ +void ShaderGraph::verify_volume_output() +{ + /* Check whether we can optimize the whole volume graph out. */ + ShaderInput *volume_in = output()->input("Volume"); + if(volume_in->link == NULL) { + return; + } + bool has_valid_volume = false; + ShaderNodeSet scheduled; + queue<ShaderNode*> traverse_queue; + /* Schedule volume output. */ + traverse_queue.push(volume_in->link->parent); + scheduled.insert(volume_in->link->parent); + /* Traverse down the tree. */ + while(!traverse_queue.empty()) { + ShaderNode *node = traverse_queue.front(); + traverse_queue.pop(); + /* Node is fully valid for volume, can't optimize anything out. */ + if(node->has_volume_support()) { + has_valid_volume = true; + break; + } + foreach(ShaderInput *input, node->inputs) { + if(input->link == NULL) { + continue; + } + if(scheduled.find(input->link->parent) != scheduled.end()) { + continue; + } + traverse_queue.push(input->link->parent); + scheduled.insert(input->link->parent); + } + } + if(!has_valid_volume) { + VLOG(1) << "Disconnect meaningless volume output."; + disconnect(volume_in->link); + } +} + void ShaderGraph::break_cycles(ShaderNode *node, vector<bool>& visited, vector<bool>& on_stack) { visited[node->id] = true; @@ -648,16 +688,11 @@ void ShaderGraph::clean(Scene *scene) { /* Graph simplification */ - /* 1: Remove proxy nodes was already done. */ - - /* 2: Constant folding. */ + /* NOTE: Remove proxy nodes was already done. */ constant_fold(); - - /* 3: Simplification. */ simplify_settings(scene); - - /* 4: De-duplication. */ deduplicate_nodes(); + verify_volume_output(); /* we do two things here: find cycles and break them, and remove unused * nodes that don't feed into the output. how cycles are broken is @@ -980,6 +1015,12 @@ int ShaderGraph::get_num_closures() else if(CLOSURE_IS_BSDF_MULTISCATTER(closure_type)) { num_closures += 2; } + else if(CLOSURE_IS_PRINCIPLED(closure_type)) { + num_closures += 8; + } + else if(CLOSURE_IS_VOLUME(closure_type)) { + num_closures += VOLUME_STACK_SIZE; + } else { ++num_closures; } diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index 780fdf49ca4..f0fd789c6bd 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -17,17 +17,17 @@ #ifndef __GRAPH_H__ #define __GRAPH_H__ -#include "node.h" -#include "node_type.h" +#include "graph/node.h" +#include "graph/node_type.h" -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "util_list.h" -#include "util_map.h" -#include "util_param.h" -#include "util_set.h" -#include "util_types.h" -#include "util_vector.h" +#include "util/util_list.h" +#include "util/util_map.h" +#include "util/util_param.h" +#include "util/util_set.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -151,11 +151,12 @@ public: virtual bool has_surface_emission() { return false; } virtual bool has_surface_transparent() { return false; } virtual bool has_surface_bssrdf() { return false; } + virtual bool has_bump() { return false; } virtual bool has_bssrdf_bump() { return false; } virtual bool has_spatial_varying() { return false; } virtual bool has_object_dependency() { return false; } virtual bool has_integrator_dependency() { return false; } - + virtual bool has_volume_support() { return false; } vector<ShaderInput*> inputs; vector<ShaderOutput*> outputs; @@ -201,14 +202,14 @@ public: /* Node definition utility macros */ #define SHADER_NODE_CLASS(type) \ - NODE_DECLARE; \ + NODE_DECLARE \ type(); \ virtual ShaderNode *clone() const { return new type(*this); } \ virtual void compile(SVMCompiler& compiler); \ virtual void compile(OSLCompiler& compiler); \ #define SHADER_NODE_NO_CLONE_CLASS(type) \ - NODE_DECLARE; \ + NODE_DECLARE \ type(); \ virtual void compile(SVMCompiler& compiler); \ virtual void compile(OSLCompiler& compiler); \ @@ -240,12 +241,11 @@ public: list<ShaderNode*> nodes; size_t num_node_ids; bool finalized; + bool simplified; ShaderGraph(); ~ShaderGraph(); - ShaderGraph *copy(); - ShaderNode *add(ShaderNode *node); OutputNode *output(); @@ -255,9 +255,9 @@ public: void relink(ShaderNode *node, ShaderOutput *from, ShaderOutput *to); void remove_proxy_nodes(); + void simplify(Scene *scene); void finalize(Scene *scene, bool do_bump = false, - bool do_osl = false, bool do_simplify = false, bool bump_in_object_space = false); @@ -283,6 +283,7 @@ protected: void constant_fold(); void simplify_settings(Scene *scene); void deduplicate_nodes(); + void verify_volume_output(); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 11193bf4974..bb94b9bb82a 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -14,15 +14,15 @@ * limitations under the License. */ -#include "device.h" -#include "image.h" -#include "scene.h" +#include "device/device.h" +#include "render/image.h" +#include "render/scene.h" -#include "util_foreach.h" -#include "util_logging.h" -#include "util_path.h" -#include "util_progress.h" -#include "util_texture.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_path.h" +#include "util/util_progress.h" +#include "util/util_texture.h" #ifdef WITH_OSL #include <OSL/oslexec.h> @@ -30,10 +30,19 @@ CCL_NAMESPACE_BEGIN +/* Some helpers to silence warning in templated function. */ +static bool isfinite(uchar /*value*/) +{ + return false; +} +static bool isfinite(half /*value*/) +{ + return false; +} + ImageManager::ImageManager(const DeviceInfo& info) { need_update = true; - pack_images = false; osl_texture_system = NULL; animation_frame = 0; @@ -49,54 +58,24 @@ ImageManager::ImageManager(const DeviceInfo& info) } /* Set image limits */ -#define SET_TEX_IMAGES_LIMITS(ARCH) \ - { \ - tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \ - } - - if(device_type == DEVICE_CPU) { - SET_TEX_IMAGES_LIMITS(CPU); - } - else if(device_type == DEVICE_CUDA) { - if(info.has_bindless_textures) { - SET_TEX_IMAGES_LIMITS(CUDA_KEPLER); - } - else { - SET_TEX_IMAGES_LIMITS(CUDA); + max_num_images = TEX_NUM_MAX; + has_half_images = true; + cuda_fermi_limits = false; + + if(device_type == DEVICE_CUDA) { + if(!info.has_bindless_textures) { + /* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */ + cuda_fermi_limits = true; + has_half_images = false; } } else if(device_type == DEVICE_OPENCL) { - SET_TEX_IMAGES_LIMITS(OPENCL); - } - else { - /* Should not happen. */ - tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0; - tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; - tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; - tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF] = 0; - tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0; - tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; - tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0; - tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF] = 0; - assert(0); + has_half_images = false; } -#undef SET_TEX_IMAGES_LIMITS + for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + tex_num_images[type] = 0; + } } ImageManager::~ImageManager() @@ -107,11 +86,6 @@ ImageManager::~ImageManager() } } -void ImageManager::set_pack_images(bool pack_images_) -{ - pack_images = pack_images_; -} - void ImageManager::set_osl_texture_system(void *texture_system) { osl_texture_system = texture_system; @@ -133,18 +107,20 @@ bool ImageManager::set_animation_frame_update(int frame) return false; } -ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename, - void *builtin_data, - bool& is_linear) +ImageDataType ImageManager::get_image_metadata(const string& filename, + void *builtin_data, + bool& is_linear, + bool& builtin_free_cache) { bool is_float = false, is_half = false; is_linear = false; + builtin_free_cache = false; int channels = 4; if(builtin_data) { if(builtin_image_info_cb) { int width, height, depth; - builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels); + builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels, builtin_free_cache); } if(is_float) { @@ -156,6 +132,16 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } } + /* Perform preliminary checks, with meaningful logging. */ + if(!path_exists(filename)) { + VLOG(1) << "File '" << filename << "' does not exist."; + return IMAGE_DATA_TYPE_BYTE4; + } + if(path_is_directory(filename)) { + VLOG(1) << "File '" << filename << "' is a directory, can't use as image."; + return IMAGE_DATA_TYPE_BYTE4; + } + ImageInput *in = ImageInput::create(filename); if(in) { @@ -216,26 +202,28 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } } -/* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, half4, float, byte, half. +int ImageManager::max_flattened_slot(ImageDataType type) +{ + if(tex_num_images[type] == 0) { + /* No textures for the type, no slots needs allocation. */ + return 0; + } + return type_index_to_flattened_slot(tex_num_images[type], type); +} + +/* The lower three bits of a device texture slot number indicate its type. * These functions convert the slot ids from ImageManager "images" ones - * to device ones and vice versa. */ + * to device ones and vice verse. + */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) { - return slot + tex_start_images[type]; + return (slot << IMAGE_DATA_TYPE_SHIFT) | (type); } int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type) { - for(int i = IMAGE_DATA_NUM_TYPES - 1; i >= 0; i--) { - if(flat_slot >= tex_start_images[i]) { - *type = (ImageDataType)i; - return flat_slot - tex_start_images[i]; - } - } - - /* Should not happen. */ - return flat_slot; + *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK); + return flat_slot >> IMAGE_DATA_TYPE_SHIFT; } string ImageManager::name_from_type(int type) @@ -258,12 +246,14 @@ static bool image_equals(ImageManager::Image *image, const string& filename, void *builtin_data, InterpolationType interpolation, - ExtensionType extension) + ExtensionType extension, + bool use_alpha) { return image->filename == filename && image->builtin_data == builtin_data && image->interpolation == interpolation && - image->extension == extension; + image->extension == extension && + image->use_alpha == use_alpha; } int ImageManager::add_image(const string& filename, @@ -278,24 +268,32 @@ int ImageManager::add_image(const string& filename, { Image *img; size_t slot; + bool builtin_free_cache; - ImageDataType type = get_image_metadata(filename, builtin_data, is_linear); + ImageDataType type = get_image_metadata(filename, builtin_data, is_linear, builtin_free_cache); thread_scoped_lock device_lock(device_mutex); - /* Do we have a float? */ - if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) - is_float = true; + /* Check whether it's a float texture. */ + is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4); /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ - if((type == IMAGE_DATA_TYPE_FLOAT || - type == IMAGE_DATA_TYPE_HALF4 || - type == IMAGE_DATA_TYPE_HALF) && - tex_num_images[type] == 0) { - type = IMAGE_DATA_TYPE_FLOAT4; + if(!has_half_images) { + if(type == IMAGE_DATA_TYPE_HALF4) { + type = IMAGE_DATA_TYPE_FLOAT4; + } + else if(type == IMAGE_DATA_TYPE_HALF) { + type = IMAGE_DATA_TYPE_FLOAT; + } } - if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) { - type = IMAGE_DATA_TYPE_BYTE4; + + if(cuda_fermi_limits) { + if(type == IMAGE_DATA_TYPE_FLOAT) { + type = IMAGE_DATA_TYPE_FLOAT4; + } + else if(type == IMAGE_DATA_TYPE_BYTE) { + type = IMAGE_DATA_TYPE_BYTE4; + } } /* Fnd existing image. */ @@ -305,7 +303,8 @@ int ImageManager::add_image(const string& filename, filename, builtin_data, interpolation, - extension)) + extension, + use_alpha)) { if(img->frame != frame) { img->frame = frame; @@ -326,14 +325,30 @@ int ImageManager::add_image(const string& filename, break; } - if(slot == images[type].size()) { - /* Max images limit reached. */ - if(images[type].size() == tex_num_images[type]) { + /* Count if we're over the limit */ + if(cuda_fermi_limits) { + if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA + || tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA) + { printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n", - name_from_type(type).c_str(), tex_num_images[type], filename.c_str()); + name_from_type(type).c_str(), tex_num_images[type], filename.c_str()); return -1; } + } + else { + /* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */ + int tex_count = 0; + for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + tex_count += tex_num_images[type]; + } + if(tex_count > max_num_images) { + printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n", + max_num_images, filename.c_str()); + return -1; + } + } + if(slot == images[type].size()) { images[type].resize(images[type].size() + 1); } @@ -341,6 +356,7 @@ int ImageManager::add_image(const string& filename, img = new Image(); img->filename = filename; img->builtin_data = builtin_data; + img->builtin_free_cache = builtin_free_cache; img->need_load = true; img->animated = animated; img->frame = frame; @@ -351,6 +367,8 @@ int ImageManager::add_image(const string& filename, images[type][slot] = img; + ++tex_num_images[type]; + need_update = true; return type_index_to_flattened_slot(slot, type); @@ -377,7 +395,8 @@ void ImageManager::remove_image(int flat_slot) void ImageManager::remove_image(const string& filename, void *builtin_data, InterpolationType interpolation, - ExtensionType extension) + ExtensionType extension, + bool use_alpha) { size_t slot; @@ -387,7 +406,8 @@ void ImageManager::remove_image(const string& filename, filename, builtin_data, interpolation, - extension)) + extension, + use_alpha)) { remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type)); return; @@ -403,7 +423,8 @@ void ImageManager::remove_image(const string& filename, void ImageManager::tag_reload_image(const string& filename, void *builtin_data, InterpolationType interpolation, - ExtensionType extension) + ExtensionType extension, + bool use_alpha) { for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { for(size_t slot = 0; slot < images[type].size(); slot++) { @@ -411,7 +432,8 @@ void ImageManager::tag_reload_image(const string& filename, filename, builtin_data, interpolation, - extension)) + extension, + use_alpha)) { images[type][slot]->need_load = true; break; @@ -420,12 +442,22 @@ void ImageManager::tag_reload_image(const string& filename, } } -bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components) +bool ImageManager::file_load_image_generic(Image *img, + ImageInput **in, + int &width, + int &height, + int &depth, + int &components) { if(img->filename == "") return false; if(!img->builtin_data) { + /* NOTE: Error logging is done in meta data acquisition. */ + if(!path_exists(img->filename) || path_is_directory(img->filename)) { + return false; + } + /* load image from file through OIIO */ *in = ImageInput::create(img->filename); @@ -454,8 +486,8 @@ bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &wid if(!builtin_image_info_cb || !builtin_image_pixels_cb) return false; - bool is_float; - builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components); + bool is_float, free_cache; + builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components, free_cache); } /* we only handle certain number of components */ @@ -490,6 +522,10 @@ bool ImageManager::file_load_image(Image *img, vector<StorageType> pixels_storage; StorageType *pixels; const size_t max_size = max(max(width, height), depth); + if(max_size == 0) { + /* Don't bother with invalid images. */ + return false; + } if(texture_limit > 0 && max_size > texture_limit) { pixels_storage.resize(((size_t)width)*height*depth*4); pixels = &pixels_storage[0]; @@ -497,7 +533,12 @@ bool ImageManager::file_load_image(Image *img, else { pixels = (StorageType*)tex_img.resize(width, height, depth); } + if(pixels == NULL) { + /* Could be that we've run out of memory. */ + return false; + } bool cmyk = false; + const size_t num_pixels = ((size_t)width) * height * depth; if(in) { StorageType *readpixels = pixels; vector<StorageType> tmppixels; @@ -534,12 +575,16 @@ bool ImageManager::file_load_image(Image *img, if(FileFormat == TypeDesc::FLOAT) { builtin_image_float_pixels_cb(img->filename, img->builtin_data, - (float*)&pixels[0]); + (float*)&pixels[0], + num_pixels * components, + img->builtin_free_cache); } else if(FileFormat == TypeDesc::UINT8) { builtin_image_pixels_cb(img->filename, img->builtin_data, - (uchar*)&pixels[0]); + (uchar*)&pixels[0], + num_pixels * components, + img->builtin_free_cache); } else { /* TODO(dingto): Support half for ImBuf. */ @@ -552,7 +597,6 @@ bool ImageManager::file_load_image(Image *img, type == IMAGE_DATA_TYPE_HALF4 || type == IMAGE_DATA_TYPE_BYTE4); if(is_rgba) { - size_t num_pixels = ((size_t)width) * height * depth; if(cmyk) { /* CMYK */ for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { @@ -595,6 +639,37 @@ bool ImageManager::file_load_image(Image *img, } } } + /* Make sure we don't have buggy values. */ + if(FileFormat == TypeDesc::FLOAT) { + /* For RGBA buffers we put all channels to 0 if either of them is not + * finite. This way we avoid possible artifacts caused by fully changed + * hue. + */ + if(is_rgba) { + for(size_t i = 0; i < num_pixels; i += 4) { + StorageType *pixel = &pixels[i*4]; + if(!isfinite(pixel[0]) || + !isfinite(pixel[1]) || + !isfinite(pixel[2]) || + !isfinite(pixel[3])) + { + pixel[0] = 0; + pixel[1] = 0; + pixel[2] = 0; + pixel[3] = 0; + } + } + } + else { + for(size_t i = 0; i < num_pixels; ++i) { + StorageType *pixel = &pixels[i]; + if(!isfinite(pixel[0])) { + pixel[0] = 0; + } + } + } + } + /* Scale image down if needed. */ if(pixels_storage.size() > 0) { float scale_factor = 1.0f; while(max_size * scale_factor > texture_limit) { @@ -643,16 +718,12 @@ void ImageManager::device_load_image(Device *device, /* Slot assignment */ int flat_slot = type_index_to_flattened_slot(slot, type); - string name; - if(flat_slot >= 100) - name = string_printf("__tex_image_%s_%d", name_from_type(type).c_str(), flat_slot); - else if(flat_slot >= 10) - name = string_printf("__tex_image_%s_0%d", name_from_type(type).c_str(), flat_slot); - else - name = string_printf("__tex_image_%s_00%d", name_from_type(type).c_str(), flat_slot); + string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + if(dscene->tex_float4_image[slot] == NULL) + dscene->tex_float4_image[slot] = new device_vector<float4>(); + device_vector<float4>& tex_img = *dscene->tex_float4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -673,7 +744,7 @@ void ImageManager::device_load_image(Device *device, pixels[3] = TEX_IMAGE_MISSING_A; } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -682,7 +753,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector<float>& tex_img = dscene->tex_float_image[slot]; + if(dscene->tex_float_image[slot] == NULL) + dscene->tex_float_image[slot] = new device_vector<float>(); + device_vector<float>& tex_img = *dscene->tex_float_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -700,7 +773,7 @@ void ImageManager::device_load_image(Device *device, pixels[0] = TEX_IMAGE_MISSING_R; } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -709,7 +782,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; + if(dscene->tex_byte4_image[slot] == NULL) + dscene->tex_byte4_image[slot] = new device_vector<uchar4>(); + device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -730,7 +805,7 @@ void ImageManager::device_load_image(Device *device, pixels[3] = (TEX_IMAGE_MISSING_A * 255); } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -739,7 +814,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_BYTE){ - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + if(dscene->tex_byte_image[slot] == NULL) + dscene->tex_byte_image[slot] = new device_vector<uchar>(); + device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -756,7 +833,7 @@ void ImageManager::device_load_image(Device *device, pixels[0] = (TEX_IMAGE_MISSING_R * 255); } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -765,7 +842,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_HALF4){ - device_vector<half4>& tex_img = dscene->tex_half4_image[slot]; + if(dscene->tex_half4_image[slot] == NULL) + dscene->tex_half4_image[slot] = new device_vector<half4>(); + device_vector<half4>& tex_img = *dscene->tex_half4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -785,7 +864,7 @@ void ImageManager::device_load_image(Device *device, pixels[3] = TEX_IMAGE_MISSING_A; } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -794,7 +873,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_HALF){ - device_vector<half>& tex_img = dscene->tex_half_image[slot]; + if(dscene->tex_half_image[slot] == NULL) + dscene->tex_half_image[slot] = new device_vector<half>(); + device_vector<half>& tex_img = *dscene->tex_half_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -811,7 +892,7 @@ void ImageManager::device_load_image(Device *device, pixels[0] = TEX_IMAGE_MISSING_R; } - if(!pack_images) { + { thread_scoped_lock device_lock(device_mutex); device->tex_alloc(name.c_str(), tex_img, @@ -834,69 +915,100 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD ((OSL::TextureSystem*)osl_texture_system)->invalidate(filename); #endif } - else if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector<float>& tex_img = dscene->tex_float_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_BYTE){ - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_HALF4){ - device_vector<half4>& tex_img = dscene->tex_half4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); + else { + device_memory *tex_img = NULL; + switch(type) { + case IMAGE_DATA_TYPE_FLOAT4: + if(slot >= dscene->tex_float4_image.size()) { + break; + } + tex_img = dscene->tex_float4_image[slot]; + dscene->tex_float4_image[slot] = NULL; + break; + case IMAGE_DATA_TYPE_BYTE4: + if(slot >= dscene->tex_byte4_image.size()) { + break; + } + tex_img = dscene->tex_byte4_image[slot]; + dscene->tex_byte4_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_HALF4: + if(slot >= dscene->tex_half4_image.size()) { + break; + } + tex_img = dscene->tex_half4_image[slot]; + dscene->tex_half4_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_FLOAT: + if(slot >= dscene->tex_float_image.size()) { + break; + } + tex_img = dscene->tex_float_image[slot]; + dscene->tex_float_image[slot] = NULL; + break; + case IMAGE_DATA_TYPE_BYTE: + if(slot >= dscene->tex_byte_image.size()) { + break; + } + tex_img = dscene->tex_byte_image[slot]; + dscene->tex_byte_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_HALF: + if(slot >= dscene->tex_half_image.size()) { + break; + } + tex_img = dscene->tex_half_image[slot]; + dscene->tex_half_image[slot]= NULL; + break; + default: + assert(0); + tex_img = NULL; } + if(tex_img) { + if(tex_img->device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(*tex_img); + } - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_HALF){ - device_vector<half>& tex_img = dscene->tex_half_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); + delete tex_img; } - - tex_img.clear(); } delete images[type][slot]; images[type][slot] = NULL; + --tex_num_images[type]; + } +} + +void ImageManager::device_prepare_update(DeviceScene *dscene) +{ + for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + switch(type) { + case IMAGE_DATA_TYPE_FLOAT4: + if(dscene->tex_float4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT4]) + dscene->tex_float4_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT4]); + break; + case IMAGE_DATA_TYPE_BYTE4: + if(dscene->tex_byte4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE4]) + dscene->tex_byte4_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE4]); + break; + case IMAGE_DATA_TYPE_HALF4: + if(dscene->tex_half4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF4]) + dscene->tex_half4_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF4]); + break; + case IMAGE_DATA_TYPE_BYTE: + if(dscene->tex_byte_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE]) + dscene->tex_byte_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE]); + break; + case IMAGE_DATA_TYPE_FLOAT: + if(dscene->tex_float_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT]) + dscene->tex_float_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT]); + break; + case IMAGE_DATA_TYPE_HALF: + if(dscene->tex_half_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF]) + dscene->tex_half_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF]); + break; + } } } @@ -905,11 +1017,14 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress& progress) { - if(!need_update) + if(!need_update) { return; + } - TaskPool pool; + /* Make sure arrays are proper size. */ + device_prepare_update(dscene); + TaskPool pool; for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -934,9 +1049,6 @@ void ImageManager::device_update(Device *device, pool.wait_work(); - if(pack_images) - device_pack_images(device, dscene, progress); - need_update = false; } @@ -966,199 +1078,6 @@ void ImageManager::device_update_slot(Device *device, } } -uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot) -{ - uint8_t options = 0; - - /* Image Options are packed into one uint: - * bit 0 -> Interpolation - * bit 1 + 2 + 3-> Extension */ - if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST) - options |= (1 << 0); - - if(images[type][slot]->extension == EXTENSION_REPEAT) - options |= (1 << 1); - else if(images[type][slot]->extension == EXTENSION_EXTEND) - options |= (1 << 2); - else /* EXTENSION_CLIP */ - options |= (1 << 3); - - return options; -} - -void ImageManager::device_pack_images(Device *device, - DeviceScene *dscene, - Progress& /*progess*/) -{ - /* For OpenCL, we pack all image textures into a single large texture, and - * do our own interpolation in the kernel. */ - size_t size = 0, offset = 0; - ImageDataType type; - - int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4] - + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE]; - uint4 *info = dscene->tex_image_packed_info.resize(info_size*2); - - /* Byte4 Textures*/ - type = IMAGE_DATA_TYPE_BYTE4; - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; - size += tex_img.size(); - } - - uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size); - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; - - uint8_t options = pack_image_options(type, slot); - - int index = type_index_to_flattened_slot(slot, type) * 2; - info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); - - memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); - offset += tex_img.size(); - } - - /* Float4 Textures*/ - type = IMAGE_DATA_TYPE_FLOAT4; - size = 0, offset = 0; - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; - size += tex_img.size(); - } - - float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size); - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; - - /* todo: support 3D textures, only CPU for now */ - - uint8_t options = pack_image_options(type, slot); - - int index = type_index_to_flattened_slot(slot, type) * 2; - info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); - - memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); - offset += tex_img.size(); - } - - /* Byte Textures*/ - type = IMAGE_DATA_TYPE_BYTE; - size = 0, offset = 0; - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; - size += tex_img.size(); - } - - uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size); - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; - - uint8_t options = pack_image_options(type, slot); - - int index = type_index_to_flattened_slot(slot, type) * 2; - info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); - - memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); - offset += tex_img.size(); - } - - /* Float Textures*/ - type = IMAGE_DATA_TYPE_FLOAT; - size = 0, offset = 0; - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<float>& tex_img = dscene->tex_float_image[slot]; - size += tex_img.size(); - } - - float *pixels_float = dscene->tex_image_float_packed.resize(size); - - for(size_t slot = 0; slot < images[type].size(); slot++) { - if(!images[type][slot]) - continue; - - device_vector<float>& tex_img = dscene->tex_float_image[slot]; - - /* todo: support 3D textures, only CPU for now */ - - uint8_t options = pack_image_options(type, slot); - - int index = type_index_to_flattened_slot(slot, type) * 2; - info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); - - memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); - offset += tex_img.size(); - } - - if(dscene->tex_image_byte4_packed.size()) { - if(dscene->tex_image_byte4_packed.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_byte4_packed); - } - device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed); - } - if(dscene->tex_image_float4_packed.size()) { - if(dscene->tex_image_float4_packed.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_float4_packed); - } - device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed); - } - if(dscene->tex_image_byte_packed.size()) { - if(dscene->tex_image_byte_packed.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_byte_packed); - } - device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed); - } - if(dscene->tex_image_float_packed.size()) { - if(dscene->tex_image_float_packed.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_float_packed); - } - device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed); - } - if(dscene->tex_image_packed_info.size()) { - if(dscene->tex_image_packed_info.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(dscene->tex_image_packed_info); - } - device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info); - } -} - void ImageManager::device_free_builtin(Device *device, DeviceScene *dscene) { for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { @@ -1178,17 +1097,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) images[type].clear(); } - device->tex_free(dscene->tex_image_byte4_packed); - device->tex_free(dscene->tex_image_float4_packed); - device->tex_free(dscene->tex_image_byte_packed); - device->tex_free(dscene->tex_image_float_packed); - device->tex_free(dscene->tex_image_packed_info); - - dscene->tex_image_byte4_packed.clear(); - dscene->tex_image_float4_packed.clear(); - dscene->tex_image_byte_packed.clear(); - dscene->tex_image_float_packed.clear(); - dscene->tex_image_packed_info.clear(); + dscene->tex_float4_image.clear(); + dscene->tex_byte4_image.clear(); + dscene->tex_half4_image.clear(); + dscene->tex_float_image.clear(); + dscene->tex_byte_image.clear(); + dscene->tex_half_image.clear(); } CCL_NAMESPACE_END diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 3da7338985c..c86d1cbedbf 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -17,13 +17,13 @@ #ifndef __IMAGE_H__ #define __IMAGE_H__ -#include "device.h" -#include "device_memory.h" +#include "device/device.h" +#include "device/device_memory.h" -#include "util_image.h" -#include "util_string.h" -#include "util_thread.h" -#include "util_vector.h" +#include "util/util_image.h" +#include "util/util_string.h" +#include "util/util_thread.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -37,17 +37,6 @@ public: explicit ImageManager(const DeviceInfo& info); ~ImageManager(); - enum ImageDataType { - IMAGE_DATA_TYPE_FLOAT4 = 0, - IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_HALF4 = 2, - IMAGE_DATA_TYPE_FLOAT = 3, - IMAGE_DATA_TYPE_BYTE = 4, - IMAGE_DATA_TYPE_HALF = 5, - - IMAGE_DATA_NUM_TYPES - }; - int add_image(const string& filename, void *builtin_data, bool animated, @@ -61,13 +50,19 @@ public: void remove_image(const string& filename, void *builtin_data, InterpolationType interpolation, - ExtensionType extension); + ExtensionType extension, + bool use_alpha); void tag_reload_image(const string& filename, void *builtin_data, InterpolationType interpolation, - ExtensionType extension); - ImageDataType get_image_metadata(const string& filename, void *builtin_data, bool& is_linear); - + ExtensionType extension, + bool use_alpha); + ImageDataType get_image_metadata(const string& filename, + void *builtin_data, + bool& is_linear, + bool& builtin_free_cache); + + void device_prepare_update(DeviceScene *dscene); void device_update(Device *device, DeviceScene *dscene, Scene *scene, @@ -81,18 +76,37 @@ public: void device_free_builtin(Device *device, DeviceScene *dscene); void set_osl_texture_system(void *texture_system); - void set_pack_images(bool pack_images_); bool set_animation_frame_update(int frame); bool need_update; - function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &depth, int &channels)> builtin_image_info_cb; - function<bool(const string &filename, void *data, unsigned char *pixels)> builtin_image_pixels_cb; - function<bool(const string &filename, void *data, float *pixels)> builtin_image_float_pixels_cb; + /* NOTE: Here pixels_size is a size of storage, which equals to + * width * height * depth. + * Use this to avoid some nasty memory corruptions. + */ + function<void(const string &filename, + void *data, + bool &is_float, + int &width, + int &height, + int &depth, + int &channels, + bool &free_cache)> builtin_image_info_cb; + function<bool(const string &filename, + void *data, + unsigned char *pixels, + const size_t pixels_size, + const bool free_cache)> builtin_image_pixels_cb; + function<bool(const string &filename, + void *data, + float *pixels, + const size_t pixels_size, + const bool free_cache)> builtin_image_float_pixels_cb; struct Image { string filename; void *builtin_data; + bool builtin_free_cache; bool use_alpha; bool need_load; @@ -106,16 +120,22 @@ public: private: int tex_num_images[IMAGE_DATA_NUM_TYPES]; - int tex_start_images[IMAGE_DATA_NUM_TYPES]; + int max_num_images; + bool has_half_images; + bool cuda_fermi_limits; thread_mutex device_mutex; int animation_frame; vector<Image*> images[IMAGE_DATA_NUM_TYPES]; void *osl_texture_system; - bool pack_images; - bool file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components); + bool file_load_image_generic(Image *img, + ImageInput **in, + int &width, + int &height, + int &depth, + int &components); template<TypeDesc::BASETYPE FileFormat, typename StorageType, @@ -125,12 +145,11 @@ private: int texture_limit, device_vector<DeviceType>& tex_img); + int max_flattened_slot(ImageDataType type); int type_index_to_flattened_slot(int slot, ImageDataType type); int flattened_slot_to_type_index(int flat_slot, ImageDataType *type); string name_from_type(int type); - uint8_t pack_image_options(ImageDataType type, size_t slot); - void device_load_image(Device *device, DeviceScene *dscene, Scene *scene, @@ -141,10 +160,6 @@ private: DeviceScene *dscene, ImageDataType type, int slot); - - void device_pack_images(Device *device, - DeviceScene *dscene, - Progress& progess); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index a9a33d2e789..b268478e6d3 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "device.h" -#include "integrator.h" -#include "film.h" -#include "light.h" -#include "scene.h" -#include "shader.h" -#include "sobol.h" +#include "device/device.h" +#include "render/integrator.h" +#include "render/film.h" +#include "render/light.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/sobol.h" -#include "util_foreach.h" -#include "util_hash.h" +#include "util/util_foreach.h" +#include "util/util_hash.h" CCL_NAMESPACE_BEGIN @@ -31,7 +31,6 @@ NODE_DEFINE(Integrator) { NodeType *type = NodeType::add("integrator", create); - SOCKET_INT(min_bounce, "Min Bounce", 2); SOCKET_INT(max_bounce, "Max Bounce", 7); SOCKET_INT(max_diffuse_bounce, "Max Diffuse Bounce", 7); @@ -39,9 +38,9 @@ NODE_DEFINE(Integrator) SOCKET_INT(max_transmission_bounce, "Max Transmission Bounce", 7); SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7); - SOCKET_INT(transparent_min_bounce, "Transparent Min Bounce", 2); SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7); - SOCKET_BOOLEAN(transparent_shadows, "Transparent Shadows", false); + + SOCKET_INT(ao_bounces, "AO Bounces", 0); SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024); SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f); @@ -62,6 +61,7 @@ NODE_DEFINE(Integrator) SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1); SOCKET_INT(subsurface_samples, "Subsurface Samples", 1); SOCKET_INT(volume_samples, "Volume Samples", 1); + SOCKET_INT(start_sample, "Start Sample", 0); SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true); SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true); @@ -101,7 +101,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene /* integrator parameters */ kintegrator->max_bounce = max_bounce + 1; - kintegrator->min_bounce = min_bounce + 1; kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1; kintegrator->max_glossy_bounce = max_glossy_bounce + 1; @@ -109,25 +108,26 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->max_volume_bounce = max_volume_bounce + 1; kintegrator->transparent_max_bounce = transparent_max_bounce + 1; - kintegrator->transparent_min_bounce = transparent_min_bounce + 1; + + if(ao_bounces == 0) { + kintegrator->ao_bounces = INT_MAX; + } + else { + kintegrator->ao_bounces = ao_bounces - 1; + } /* Transparent Shadows * We only need to enable transparent shadows, if we actually have * transparent shaders in the scene. Otherwise we can disable it * to improve performance a bit. */ - if(transparent_shadows) { - kintegrator->transparent_shadows = false; - foreach(Shader *shader, scene->shaders) { - /* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */ - if((shader->has_surface_transparent && shader->use_transparent_shadow) || shader->has_volume) { - kintegrator->transparent_shadows = true; - break; - } + kintegrator->transparent_shadows = false; + foreach(Shader *shader, scene->shaders) { + /* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */ + if((shader->has_surface_transparent && shader->use_transparent_shadow) || shader->has_volume) { + kintegrator->transparent_shadows = true; + break; } } - else { - kintegrator->transparent_shadows = false; - } kintegrator->volume_max_steps = volume_max_steps; kintegrator->volume_step_size = volume_step_size; @@ -145,6 +145,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f; kintegrator->branched = (method == BRANCHED_PATH); + kintegrator->volume_decoupled = device->info.has_volume_decoupled; kintegrator->diffuse_samples = diffuse_samples; kintegrator->glossy_samples = glossy_samples; kintegrator->transmission_samples = transmission_samples; @@ -152,6 +153,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->mesh_light_samples = mesh_light_samples; kintegrator->subsurface_samples = subsurface_samples; kintegrator->volume_samples = volume_samples; + kintegrator->start_sample = start_sample; if(method == BRANCHED_PATH) { kintegrator->sample_all_lights_direct = sample_all_lights_direct; diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 17fdd0ef1db..3cb430d72b4 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -17,9 +17,9 @@ #ifndef __INTEGRATOR_H__ #define __INTEGRATOR_H__ -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "node.h" +#include "graph/node.h" CCL_NAMESPACE_BEGIN @@ -29,9 +29,8 @@ class Scene; class Integrator : public Node { public: - NODE_DECLARE; + NODE_DECLARE - int min_bounce; int max_bounce; int max_diffuse_bounce; @@ -39,9 +38,9 @@ public: int max_transmission_bounce; int max_volume_bounce; - int transparent_min_bounce; int transparent_max_bounce; - bool transparent_shadows; + + int ao_bounces; int volume_max_steps; float volume_step_size; @@ -64,6 +63,7 @@ public: int mesh_light_samples; int subsurface_samples; int volume_samples; + int start_sample; bool sample_all_lights_direct; bool sample_all_lights_indirect; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 2245c861d5a..6a7f985b756 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -14,19 +14,19 @@ * limitations under the License. */ -#include "background.h" -#include "device.h" -#include "integrator.h" -#include "film.h" -#include "light.h" -#include "mesh.h" -#include "object.h" -#include "scene.h" -#include "shader.h" - -#include "util_foreach.h" -#include "util_progress.h" -#include "util_logging.h" +#include "render/background.h" +#include "device/device.h" +#include "render/integrator.h" +#include "render/film.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/scene.h" +#include "render/shader.h" + +#include "util/util_foreach.h" +#include "util/util_progress.h" +#include "util/util_logging.h" CCL_NAMESPACE_BEGIN @@ -57,9 +57,10 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY); + device->mem_zero(d_output); DeviceTask main_task(DeviceTask::SHADER); main_task.shader_input = d_input.device_pointer; @@ -224,12 +225,12 @@ void LightManager::disable_ineffective_light(Device *device, Scene *scene) bool LightManager::object_usable_as_light(Object *object) { Mesh *mesh = object->mesh; - /* Skip if we are not visible for BSDFs. */ - if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) { + /* Skip objects with NaNs */ + if(!object->bounds.valid()) { return false; } - /* Skip motion blurred deforming meshes, not supported yet. */ - if(mesh->has_motion_blur()) { + /* Skip if we are not visible for BSDFs. */ + if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) { return false; } /* Skip if we have no emission shaders. */ diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h index f56530b6490..7e9014eb823 100644 --- a/intern/cycles/render/light.h +++ b/intern/cycles/render/light.h @@ -17,12 +17,12 @@ #ifndef __LIGHT_H__ #define __LIGHT_H__ -#include "kernel_types.h" +#include "kernel/kernel_types.h" -#include "node.h" +#include "graph/node.h" -#include "util_types.h" -#include "util_vector.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index df4327d021a..c5eb3a0d3a8 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -14,29 +14,29 @@ * limitations under the License. */ -#include "bvh.h" -#include "bvh_build.h" - -#include "camera.h" -#include "curves.h" -#include "device.h" -#include "graph.h" -#include "shader.h" -#include "light.h" -#include "mesh.h" -#include "nodes.h" -#include "object.h" -#include "scene.h" - -#include "osl_globals.h" - -#include "subd_split.h" -#include "subd_patch_table.h" - -#include "util_foreach.h" -#include "util_logging.h" -#include "util_progress.h" -#include "util_set.h" +#include "bvh/bvh.h" +#include "bvh/bvh_build.h" + +#include "render/camera.h" +#include "render/curves.h" +#include "device/device.h" +#include "render/graph.h" +#include "render/shader.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/nodes.h" +#include "render/object.h" +#include "render/scene.h" + +#include "kernel/osl/osl_globals.h" + +#include "subd/subd_split.h" +#include "subd/subd_patch_table.h" + +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_progress.h" +#include "util/util_set.h" CCL_NAMESPACE_BEGIN @@ -49,6 +49,77 @@ void Mesh::Triangle::bounds_grow(const float3 *verts, BoundBox& bounds) const bounds.grow(verts[v[2]]); } +void Mesh::Triangle::motion_verts(const float3 *verts, + const float3 *vert_steps, + size_t num_verts, + size_t num_steps, + float time, + float3 r_verts[3]) const +{ + /* Figure out which steps we need to fetch and their interpolation factor. */ + const size_t max_step = num_steps - 1; + const size_t step = min((int)(time * max_step), max_step - 1); + const float t = time*max_step - step; + /* Fetch vertex coordinates. */ + float3 curr_verts[3]; + float3 next_verts[3]; + verts_for_step(verts, + vert_steps, + num_verts, + num_steps, + step, + curr_verts); + verts_for_step(verts, + vert_steps, + num_verts, + num_steps, + step + 1, + next_verts); + /* Interpolate between steps. */ + r_verts[0] = (1.0f - t)*curr_verts[0] + t*next_verts[0]; + r_verts[1] = (1.0f - t)*curr_verts[1] + t*next_verts[1]; + r_verts[2] = (1.0f - t)*curr_verts[2] + t*next_verts[2]; +} + +void Mesh::Triangle::verts_for_step(const float3 *verts, + const float3 *vert_steps, + size_t num_verts, + size_t num_steps, + size_t step, + float3 r_verts[3]) const +{ + const size_t center_step = ((num_steps - 1) / 2); + if(step == center_step) { + /* Center step: regular vertex location. */ + r_verts[0] = verts[v[0]]; + r_verts[1] = verts[v[1]]; + r_verts[2] = verts[v[2]]; + } + else { + /* Center step not stored in the attribute array array. */ + if(step > center_step) { + step--; + } + size_t offset = step * num_verts; + r_verts[0] = vert_steps[offset + v[0]]; + r_verts[1] = vert_steps[offset + v[1]]; + r_verts[2] = vert_steps[offset + v[2]]; + } +} + +float3 Mesh::Triangle::compute_normal(const float3 *verts) const +{ + const float3& v0 = verts[v[0]]; + const float3& v1 = verts[v[1]]; + const float3& v2 = verts[v[2]]; + const float3 norm = cross(v1 - v0, v2 - v0); + const float normlen = len(norm); + if(normlen == 0.0f) { + return make_float3(1.0f, 0.0f, 0.0f); + } + return norm / normlen; +} + /* Curve */ void Mesh::Curve::bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox& bounds) const @@ -104,6 +175,205 @@ void Mesh::Curve::bounds_grow(const int k, bounds.grow(upper, mr); } +void Mesh::Curve::bounds_grow(float4 keys[4], BoundBox& bounds) const +{ + float3 P[4] = { + float4_to_float3(keys[0]), + float4_to_float3(keys[1]), + float4_to_float3(keys[2]), + float4_to_float3(keys[3]), + }; + + float3 lower; + float3 upper; + + curvebounds(&lower.x, &upper.x, P, 0); + curvebounds(&lower.y, &upper.y, P, 1); + curvebounds(&lower.z, &upper.z, P, 2); + + float mr = max(keys[1].w, keys[2].w); + + bounds.grow(lower, mr); + bounds.grow(upper, mr); +} + +void Mesh::Curve::motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, size_t k1, + float4 r_keys[2]) const +{ + /* Figure out which steps we need to fetch and their interpolation factor. */ + const size_t max_step = num_steps - 1; + const size_t step = min((int)(time * max_step), max_step - 1); + const float t = time*max_step - step; + /* Fetch vertex coordinates. */ + float4 curr_keys[2]; + float4 next_keys[2]; + keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step, + k0, k1, + curr_keys); + keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step + 1, + k0, k1, + next_keys); + /* Interpolate between steps. */ + r_keys[0] = (1.0f - t)*curr_keys[0] + t*next_keys[0]; + r_keys[1] = (1.0f - t)*curr_keys[1] + t*next_keys[1]; +} + +void Mesh::Curve::cardinal_motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, size_t k1, + size_t k2, size_t k3, + float4 r_keys[4]) const +{ + /* Figure out which steps we need to fetch and their interpolation factor. */ + const size_t max_step = num_steps - 1; + const size_t step = min((int)(time * max_step), max_step - 1); + const float t = time*max_step - step; + /* Fetch vertex coordinates. */ + float4 curr_keys[4]; + float4 next_keys[4]; + cardinal_keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step, + k0, k1, k2, k3, + curr_keys); + cardinal_keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step + 1, + k0, k1, k2, k3, + next_keys); + /* Interpolate between steps. */ + r_keys[0] = (1.0f - t)*curr_keys[0] + t*next_keys[0]; + r_keys[1] = (1.0f - t)*curr_keys[1] + t*next_keys[1]; + r_keys[2] = (1.0f - t)*curr_keys[2] + t*next_keys[2]; + r_keys[3] = (1.0f - t)*curr_keys[3] + t*next_keys[3]; +} + +void Mesh::Curve::keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, size_t k1, + float4 r_keys[2]) const +{ + k0 = max(k0, 0); + k1 = min(k1, num_keys - 1); + const size_t center_step = ((num_steps - 1) / 2); + if(step == center_step) { + /* Center step: regular key location. */ + /* TODO(sergey): Consider adding make_float4(float3, float) + * function. + */ + r_keys[0] = make_float4(curve_keys[first_key + k0].x, + curve_keys[first_key + k0].y, + curve_keys[first_key + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(curve_keys[first_key + k1].x, + curve_keys[first_key + k1].y, + curve_keys[first_key + k1].z, + curve_radius[first_key + k1]); + } + else { + /* Center step is not stored in this array. */ + if(step > center_step) { + step--; + } + const size_t offset = first_key + step * num_curve_keys; + r_keys[0] = make_float4(key_steps[offset + k0].x, + key_steps[offset + k0].y, + key_steps[offset + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(key_steps[offset + k1].x, + key_steps[offset + k1].y, + key_steps[offset + k1].z, + curve_radius[first_key + k1]); + } +} + +void Mesh::Curve::cardinal_keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, size_t k1, + size_t k2, size_t k3, + float4 r_keys[4]) const +{ + k0 = max(k0, 0); + k3 = min(k3, num_keys - 1); + const size_t center_step = ((num_steps - 1) / 2); + if(step == center_step) { + /* Center step: regular key location. */ + r_keys[0] = make_float4(curve_keys[first_key + k0].x, + curve_keys[first_key + k0].y, + curve_keys[first_key + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(curve_keys[first_key + k1].x, + curve_keys[first_key + k1].y, + curve_keys[first_key + k1].z, + curve_radius[first_key + k1]); + r_keys[2] = make_float4(curve_keys[first_key + k2].x, + curve_keys[first_key + k2].y, + curve_keys[first_key + k2].z, + curve_radius[first_key + k2]); + r_keys[3] = make_float4(curve_keys[first_key + k3].x, + curve_keys[first_key + k3].y, + curve_keys[first_key + k3].z, + curve_radius[first_key + k3]); + } + else { + /* Center step is not stored in this array. */ + if(step > center_step) { + step--; + } + const size_t offset = first_key + step * num_curve_keys; + r_keys[0] = make_float4(key_steps[offset + k0].x, + key_steps[offset + k0].y, + key_steps[offset + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(key_steps[offset + k1].x, + key_steps[offset + k1].y, + key_steps[offset + k1].z, + curve_radius[first_key + k1]); + r_keys[2] = make_float4(key_steps[offset + k2].x, + key_steps[offset + k2].y, + key_steps[offset + k2].z, + curve_radius[first_key + k2]); + r_keys[3] = make_float4(key_steps[offset + k3].x, + key_steps[offset + k3].y, + key_steps[offset + k3].z, + curve_radius[first_key + k3]); + } +} + /* SubdFace */ float3 Mesh::SubdFace::normal(const Mesh *mesh) const @@ -394,7 +664,7 @@ void Mesh::compute_bounds() if(use_motion_blur && attr) { size_t steps_size = verts.size() * (motion_steps - 1); float3 *vert_steps = attr->data_float3(); - + for(size_t i = 0; i < steps_size; i++) bnds.grow(vert_steps[i]); } @@ -403,7 +673,7 @@ void Mesh::compute_bounds() if(use_motion_blur && curve_attr) { size_t steps_size = curve_keys.size() * (motion_steps - 1); float3 *key_steps = curve_attr->data_float3(); - + for(size_t i = 0; i < steps_size; i++) bnds.grow(key_steps[i]); } @@ -417,11 +687,11 @@ void Mesh::compute_bounds() for(size_t i = 0; i < curve_keys_size; i++) bnds.grow_safe(curve_keys[i], curve_radius[i]); - + if(use_motion_blur && attr) { size_t steps_size = verts.size() * (motion_steps - 1); float3 *vert_steps = attr->data_float3(); - + for(size_t i = 0; i < steps_size; i++) bnds.grow_safe(vert_steps[i]); } @@ -429,7 +699,7 @@ void Mesh::compute_bounds() if(use_motion_blur && curve_attr) { size_t steps_size = curve_keys.size() * (motion_steps - 1); float3 *key_steps = curve_attr->data_float3(); - + for(size_t i = 0; i < steps_size; i++) bnds.grow_safe(key_steps[i]); } @@ -444,27 +714,12 @@ void Mesh::compute_bounds() bounds = bnds; } -static float3 compute_face_normal(const Mesh::Triangle& t, float3 *verts) -{ - float3 v0 = verts[t.v[0]]; - float3 v1 = verts[t.v[1]]; - float3 v2 = verts[t.v[2]]; - - float3 norm = cross(v1 - v0, v2 - v0); - float normlen = len(norm); - - if(normlen == 0.0f) - return make_float3(1.0f, 0.0f, 0.0f); - - return norm / normlen; -} - void Mesh::add_face_normals() { /* don't compute if already there */ if(attributes.find(ATTR_STD_FACE_NORMAL)) return; - + /* get attributes */ Attribute *attr_fN = attributes.add(ATTR_STD_FACE_NORMAL); float3 *fN = attr_fN->data_float3(); @@ -476,7 +731,7 @@ void Mesh::add_face_normals() float3 *verts_ptr = verts.data(); for(size_t i = 0; i < triangles_size; i++) { - fN[i] = compute_face_normal(get_triangle(i), verts_ptr); + fN[i] = get_triangle(i).compute_normal(verts_ptr); } } @@ -538,7 +793,7 @@ void Mesh::add_vertex_normals() for(size_t i = 0; i < triangles_size; i++) { for(size_t j = 0; j < 3; j++) { - float3 fN = compute_face_normal(get_triangle(i), mP); + float3 fN = get_triangle(i).compute_normal(mP); mN[get_triangle(i).v[j]] += fN; } } @@ -646,7 +901,7 @@ void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal) float3 vNi = vN[i]; if(do_transform) - vNi = normalize(transform_direction(&ntfm, vNi)); + vNi = safe_normalize(transform_direction(&ntfm, vNi)); vnormal[i] = make_float4(vNi.x, vNi.y, vNi.z, 0.0f); } @@ -759,7 +1014,8 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui } } -void Mesh::compute_bvh(DeviceScene *dscene, +void Mesh::compute_bvh(Device *device, + DeviceScene *dscene, SceneParams *params, Progress *progress, int n, @@ -793,9 +1049,11 @@ void Mesh::compute_bvh(DeviceScene *dscene, BVHParams bparams; bparams.use_spatial_split = params->use_bvh_spatial_split; - bparams.use_qbvh = params->use_qbvh; + bparams.use_qbvh = params->use_qbvh && device->info.has_qbvh; bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && params->use_bvh_unaligned_nodes; + bparams.num_motion_triangle_steps = params->num_bvh_time_steps; + bparams.num_motion_curve_steps = params->num_bvh_time_steps; delete bvh; bvh = BVH::create(bparams, objects); @@ -1002,7 +1260,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce if(attr_map_stride == 0) return; - + /* create attribute map */ uint4 *attr_map = dscene->attributes_map.resize(attr_map_stride*scene->objects.size()); memset(attr_map, 0, dscene->attributes_map.size()*sizeof(uint)); @@ -1555,15 +1813,17 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * /* bvh build */ progress.set_status("Updating Scene BVH", "Building"); - VLOG(1) << (scene->params.use_qbvh ? "Using QBVH optimization structure" - : "Using regular BVH optimization structure"); - BVHParams bparams; bparams.top_level = true; - bparams.use_qbvh = scene->params.use_qbvh; + bparams.use_qbvh = scene->params.use_qbvh && device->info.has_qbvh; bparams.use_spatial_split = scene->params.use_bvh_spatial_split; bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && scene->params.use_bvh_unaligned_nodes; + bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps; + bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps; + + VLOG(1) << (bparams.use_qbvh ? "Using QBVH optimization structure" + : "Using regular BVH optimization structure"); delete bvh; bvh = BVH::create(bparams, scene->objects); @@ -1612,9 +1872,14 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * dscene->prim_object.reference((uint*)&pack.prim_object[0], pack.prim_object.size()); device->tex_alloc("__prim_object", dscene->prim_object); } + if(pack.prim_time.size()) { + dscene->prim_time.reference((float2*)&pack.prim_time[0], pack.prim_time.size()); + device->tex_alloc("__prim_time", dscene->prim_time); + } dscene->data.bvh.root = pack.root_index; - dscene->data.bvh.use_qbvh = scene->params.use_qbvh; + dscene->data.bvh.use_qbvh = bparams.use_qbvh; + dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0); } void MeshManager::device_update_flags(Device * /*device*/, @@ -1659,16 +1924,7 @@ void MeshManager::device_update_displacement_images(Device *device, if(node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) { continue; } - if(device->info.pack_images) { - /* If device requires packed images we need to update all - * images now, even if they're not used for displacement. - */ - image_manager->device_update(device, - dscene, - scene, - progress); - return; - } + ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode*>(node); int slot = image_node->slot; if(slot != -1) { @@ -1678,6 +1934,7 @@ void MeshManager::device_update_displacement_images(Device *device, } } } + image_manager->device_prepare_update(dscene); foreach(int slot, bump_images) { pool.push(function_bind(&ImageManager::device_update_slot, image_manager, @@ -1826,6 +2083,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen if(mesh->need_update) { pool.push(function_bind(&Mesh::compute_bvh, mesh, + device, dscene, &scene->params, &progress, @@ -1891,6 +2149,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) device->tex_free(dscene->prim_visibility); device->tex_free(dscene->prim_index); device->tex_free(dscene->prim_object); + device->tex_free(dscene->prim_time); device->tex_free(dscene->tri_shader); device->tex_free(dscene->tri_vnormal); device->tex_free(dscene->tri_vindex); @@ -1912,6 +2171,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) dscene->prim_visibility.clear(); dscene->prim_index.clear(); dscene->prim_object.clear(); + dscene->prim_time.clear(); dscene->tri_shader.clear(); dscene->tri_vnormal.clear(); dscene->tri_vindex.clear(); @@ -1946,14 +2206,14 @@ bool Mesh::need_attribute(Scene *scene, AttributeStandard std) { if(std == ATTR_STD_NONE) return false; - + if(scene->need_global_attribute(std)) return true; foreach(Shader *shader, used_shaders) if(shader->attributes.find(std)) return true; - + return false; } @@ -1965,9 +2225,8 @@ bool Mesh::need_attribute(Scene * /*scene*/, ustring name) foreach(Shader *shader, used_shaders) if(shader->attributes.find(name)) return true; - + return false; } CCL_NAMESPACE_END - diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index c0310f45840..3483ab4fd69 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -17,20 +17,22 @@ #ifndef __MESH_H__ #define __MESH_H__ -#include "attribute.h" -#include "node.h" -#include "shader.h" - -#include "util_boundbox.h" -#include "util_list.h" -#include "util_map.h" -#include "util_param.h" -#include "util_transform.h" -#include "util_types.h" -#include "util_vector.h" +#include "graph/node.h" + +#include "render/attribute.h" +#include "render/shader.h" + +#include "util/util_boundbox.h" +#include "util/util_list.h" +#include "util/util_map.h" +#include "util/util_param.h" +#include "util/util_transform.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN +class Attribute; class BVH; class Device; class DeviceScene; @@ -47,18 +49,36 @@ struct PackedPatchTable; class Mesh : public Node { public: - NODE_DECLARE; + NODE_DECLARE /* Mesh Triangle */ struct Triangle { int v[3]; void bounds_grow(const float3 *verts, BoundBox& bounds) const; + + void motion_verts(const float3 *verts, + const float3 *vert_steps, + size_t num_verts, + size_t num_steps, + float time, + float3 r_verts[3]) const; + + void verts_for_step(const float3 *verts, + const float3 *vert_steps, + size_t num_verts, + size_t num_steps, + size_t step, + float3 r_verts[3]) const; + + float3 compute_normal(const float3 *verts) const; }; Triangle get_triangle(size_t i) const { - Triangle tri = {{triangles[i*3 + 0], triangles[i*3 + 1], triangles[i*3 + 2]}}; + Triangle tri = {{triangles[i*3 + 0], + triangles[i*3 + 1], + triangles[i*3 + 2]}}; return tri; } @@ -78,11 +98,48 @@ public: const float3 *curve_keys, const float *curve_radius, BoundBox& bounds) const; + void bounds_grow(float4 keys[4], BoundBox& bounds) const; void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, const Transform& aligned_space, BoundBox& bounds) const; + + void motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, size_t k1, + float4 r_keys[2]) const; + void cardinal_motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, size_t k1, + size_t k2, size_t k3, + float4 r_keys[4]) const; + + void keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, size_t k1, + float4 r_keys[2]) const; + void cardinal_keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, size_t k1, + size_t k2, size_t k3, + float4 r_keys[4]) const; }; Curve get_curve(size_t i) const @@ -227,7 +284,8 @@ public: void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset); void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset); - void compute_bvh(DeviceScene *dscene, + void compute_bvh(Device *device, + DeviceScene *dscene, SceneParams *params, Progress *progress, int n, diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index adc5b820298..350a56bf185 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -14,15 +14,15 @@ * limitations under the License. */ -#include "device.h" +#include "device/device.h" -#include "mesh.h" -#include "object.h" -#include "scene.h" -#include "shader.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/scene.h" +#include "render/shader.h" -#include "util_foreach.h" -#include "util_progress.h" +#include "util/util_foreach.h" +#include "util/util_progress.h" CCL_NAMESPACE_BEGIN @@ -121,9 +121,10 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("displace_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY); + device->mem_zero(d_output); DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; @@ -169,6 +170,8 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me if(!done[t.v[j]]) { done[t.v[j]] = true; float3 off = float4_to_float3(offset[k++]); + /* Avoid illegal vertex coordinates. */ + off = ensure_finite3(off); mesh->verts[t.v[j]] += off; if(attr_mP != NULL) { for(int step = 0; step < mesh->motion_steps - 1; step++) { diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp index 913c3c74b42..585ed77b026 100644 --- a/intern/cycles/render/mesh_subdivision.cpp +++ b/intern/cycles/render/mesh_subdivision.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "mesh.h" -#include "attribute.h" -#include "camera.h" +#include "render/mesh.h" +#include "render/attribute.h" +#include "render/camera.h" -#include "subd_split.h" -#include "subd_patch.h" -#include "subd_patch_table.h" +#include "subd/subd_split.h" +#include "subd/subd_patch.h" +#include "subd/subd_patch_table.h" -#include "util_foreach.h" -#include "util_algorithm.h" +#include "util/util_foreach.h" +#include "util/util_algorithm.h" CCL_NAMESPACE_BEGIN @@ -92,7 +92,7 @@ namespace Far { if(vert_edges.size() == 2) { float sharpness = refiner.getLevel(0).getEdgeSharpness(vert_edges[0]); - sharpness = min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1])); + sharpness = ccl::min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1])); setBaseVertexSharpness(refiner, i, sharpness); } diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index f293af3c40a..2b682756c6a 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -14,20 +14,21 @@ * limitations under the License. */ -#include "image.h" -#include "integrator.h" -#include "nodes.h" -#include "scene.h" -#include "svm.h" -#include "svm_color_util.h" -#include "svm_ramp_util.h" -#include "svm_math_util.h" -#include "osl.h" -#include "constant_fold.h" - -#include "util_sky_model.h" -#include "util_foreach.h" -#include "util_transform.h" +#include "render/image.h" +#include "render/integrator.h" +#include "render/nodes.h" +#include "render/scene.h" +#include "render/svm.h" +#include "kernel/svm/svm_color_util.h" +#include "kernel/svm/svm_ramp_util.h" +#include "kernel/svm/svm_math_util.h" +#include "render/osl.h" +#include "render/constant_fold.h" + +#include "util/util_sky_model.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_transform.h" CCL_NAMESPACE_BEGIN @@ -263,7 +264,8 @@ ImageTextureNode::~ImageTextureNode() image_manager->remove_image(filename.string(), builtin_data, interpolation, - extension); + extension, + use_alpha); } } @@ -362,9 +364,10 @@ void ImageTextureNode::compile(OSLCompiler& compiler) image_manager = compiler.image_manager; if(is_float == -1) { if(builtin_data == NULL) { - ImageManager::ImageDataType type; - type = image_manager->get_image_metadata(filename.string(), NULL, is_linear); - if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4) + ImageDataType type; + bool builtin_free_cache; + type = image_manager->get_image_metadata(filename.string(), NULL, is_linear, builtin_free_cache); + if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = 1; } else { @@ -462,7 +465,8 @@ EnvironmentTextureNode::~EnvironmentTextureNode() image_manager->remove_image(filename.string(), builtin_data, interpolation, - EXTENSION_REPEAT); + EXTENSION_REPEAT, + use_alpha); } } @@ -550,9 +554,10 @@ void EnvironmentTextureNode::compile(OSLCompiler& compiler) image_manager = compiler.image_manager; if(is_float == -1) { if(builtin_data == NULL) { - ImageManager::ImageDataType type; - type = image_manager->get_image_metadata(filename.string(), NULL, is_linear); - if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4) + ImageDataType type; + bool builtin_free_cache; + type = image_manager->get_image_metadata(filename.string(), NULL, is_linear, builtin_free_cache); + if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = 1; } else { @@ -1381,7 +1386,8 @@ PointDensityTextureNode::~PointDensityTextureNode() image_manager->remove_image(filename.string(), builtin_data, interpolation, - EXTENSION_CLIP); + EXTENSION_CLIP, + true); } } @@ -1442,14 +1448,14 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler) else { if(use_density) { compiler.add_node(NODE_VALUE_F, - __float_as_int(0.0f), - compiler.stack_assign(density_out)); + __float_as_int(0.0f), + compiler.stack_assign(density_out)); } if(use_color) { compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out)); compiler.add_node(NODE_VALUE_V, make_float3(TEX_IMAGE_MISSING_R, - TEX_IMAGE_MISSING_G, - TEX_IMAGE_MISSING_B)); + TEX_IMAGE_MISSING_G, + TEX_IMAGE_MISSING_B)); } } } @@ -1787,12 +1793,27 @@ void ConvertNode::compile(OSLCompiler& compiler) assert(0); } +/* Base type for all closure-type nodes */ + +BsdfBaseNode::BsdfBaseNode(const NodeType *node_type) + : ShaderNode(node_type) +{ + special_type = SHADER_SPECIAL_TYPE_CLOSURE; +} + +bool BsdfBaseNode::has_bump() +{ + /* detect if anything is plugged into the normal input besides the default */ + ShaderInput *normal_in = input("Normal"); + return (normal_in && normal_in->link && + normal_in->link->parent->special_type != SHADER_SPECIAL_TYPE_GEOMETRY); +} + /* BSDF Closure */ BsdfNode::BsdfNode(const NodeType *node_type) -: ShaderNode(node_type) +: BsdfBaseNode(node_type) { - special_type = SHADER_SPECIAL_TYPE_CLOSURE; } void BsdfNode::compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3, ShaderInput *param4) @@ -1928,21 +1949,38 @@ GlossyBsdfNode::GlossyBsdfNode() void GlossyBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp glossy BSDF."; distribution = CLOSURE_BSDF_REFLECTION_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_REFLECTION_ID) + { + VLOG(1) << "Using GGX glossy with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -1950,7 +1988,8 @@ void GlossyBsdfNode::simplify_settings(Scene *scene) bool GlossyBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_REFLECTION_ID || roughness <= 1e-4f); } void GlossyBsdfNode::compile(SVMCompiler& compiler) @@ -2005,21 +2044,38 @@ GlassBsdfNode::GlassBsdfNode() void GlassBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp glass BSDF."; distribution = CLOSURE_BSDF_SHARP_GLASS_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_SHARP_GLASS_ID) + { + VLOG(1) << "Using GGX glass with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -2027,7 +2083,8 @@ void GlassBsdfNode::simplify_settings(Scene *scene) bool GlassBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_SHARP_GLASS_ID || roughness <= 1e-4f); } void GlassBsdfNode::compile(SVMCompiler& compiler) @@ -2082,21 +2139,38 @@ RefractionBsdfNode::RefractionBsdfNode() void RefractionBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp refraction BSDF."; distribution = CLOSURE_BSDF_REFRACTION_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_REFRACTION_ID) + { + VLOG(1) << "Using GGX refraction with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -2104,7 +2178,8 @@ void RefractionBsdfNode::simplify_settings(Scene *scene) bool RefractionBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_REFRACTION_ID || roughness <= 1e-4f); } void RefractionBsdfNode::compile(SVMCompiler& compiler) @@ -2228,6 +2303,153 @@ void DiffuseBsdfNode::compile(OSLCompiler& compiler) compiler.add(this, "node_diffuse_bsdf"); } +/* Disney principled BSDF Closure */ +NODE_DEFINE(PrincipledBsdfNode) +{ + NodeType* type = NodeType::add("principled_bsdf", create, NodeType::SHADER); + + static NodeEnum distribution_enum; + distribution_enum.insert("GGX", CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID); + distribution_enum.insert("Multiscatter GGX", CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID); + SOCKET_ENUM(distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID); + SOCKET_IN_COLOR(base_color, "Base Color", make_float3(0.8f, 0.8f, 0.8f)); + SOCKET_IN_COLOR(subsurface_color, "Subsurface Color", make_float3(0.8f, 0.8f, 0.8f)); + SOCKET_IN_FLOAT(metallic, "Metallic", 0.0f); + SOCKET_IN_FLOAT(subsurface, "Subsurface", 0.0f); + SOCKET_IN_VECTOR(subsurface_radius, "Subsurface Radius", make_float3(0.1f, 0.1f, 0.1f)); + SOCKET_IN_FLOAT(specular, "Specular", 0.0f); + SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f); + SOCKET_IN_FLOAT(specular_tint, "Specular Tint", 0.0f); + SOCKET_IN_FLOAT(anisotropic, "Anisotropic", 0.0f); + SOCKET_IN_FLOAT(sheen, "Sheen", 0.0f); + SOCKET_IN_FLOAT(sheen_tint, "Sheen Tint", 0.0f); + SOCKET_IN_FLOAT(clearcoat, "Clearcoat", 0.0f); + SOCKET_IN_FLOAT(clearcoat_roughness, "Clearcoat Roughness", 0.03f); + SOCKET_IN_FLOAT(ior, "IOR", 0.0f); + SOCKET_IN_FLOAT(transmission, "Transmission", 0.0f); + SOCKET_IN_FLOAT(transmission_roughness, "Transmission Roughness", 0.0f); + SOCKET_IN_FLOAT(anisotropic_rotation, "Anisotropic Rotation", 0.0f); + SOCKET_IN_NORMAL(normal, "Normal", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_NORMAL); + SOCKET_IN_NORMAL(clearcoat_normal, "Clearcoat Normal", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_NORMAL); + SOCKET_IN_NORMAL(tangent, "Tangent", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TANGENT); + SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL); + + SOCKET_OUT_CLOSURE(BSDF, "BSDF"); + + return type; +} + +PrincipledBsdfNode::PrincipledBsdfNode() + : BsdfBaseNode(node_type) +{ + closure = CLOSURE_BSDF_PRINCIPLED_ID; + distribution = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; + distribution_orig = NBUILTIN_CLOSURES; +} + +bool PrincipledBsdfNode::has_surface_bssrdf() +{ + ShaderInput *subsurface_in = input("Subsurface"); + return (subsurface_in->link != NULL || subsurface > CLOSURE_WEIGHT_CUTOFF); +} + +void PrincipledBsdfNode::attributes(Shader *shader, AttributeRequestSet *attributes) +{ + if(shader->has_surface) { + ShaderInput *tangent_in = input("Tangent"); + + if(!tangent_in->link) + attributes->add(ATTR_STD_GENERATED); + } + + ShaderNode::attributes(shader, attributes); +} + +void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, ShaderInput *p_subsurface, ShaderInput *p_subsurface_radius, + ShaderInput *p_specular, ShaderInput *p_roughness, ShaderInput *p_specular_tint, ShaderInput *p_anisotropic, + ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_roughness, + ShaderInput *p_ior, ShaderInput *p_transmission, ShaderInput *p_anisotropic_rotation, ShaderInput *p_transmission_roughness) +{ + ShaderInput *base_color_in = input("Base Color"); + ShaderInput *subsurface_color_in = input("Subsurface Color"); + ShaderInput *normal_in = input("Normal"); + ShaderInput *clearcoat_normal_in = input("Clearcoat Normal"); + ShaderInput *tangent_in = input("Tangent"); + + float3 weight = make_float3(1.0f, 1.0f, 1.0f); + + compiler.add_node(NODE_CLOSURE_SET_WEIGHT, weight); + + int normal_offset = compiler.stack_assign_if_linked(normal_in); + int clearcoat_normal_offset = compiler.stack_assign_if_linked(clearcoat_normal_in); + int tangent_offset = compiler.stack_assign_if_linked(tangent_in); + int specular_offset = compiler.stack_assign(p_specular); + int roughness_offset = compiler.stack_assign(p_roughness); + int specular_tint_offset = compiler.stack_assign(p_specular_tint); + int anisotropic_offset = compiler.stack_assign(p_anisotropic); + int sheen_offset = compiler.stack_assign(p_sheen); + int sheen_tint_offset = compiler.stack_assign(p_sheen_tint); + int clearcoat_offset = compiler.stack_assign(p_clearcoat); + int clearcoat_roughness_offset = compiler.stack_assign(p_clearcoat_roughness); + int ior_offset = compiler.stack_assign(p_ior); + int transmission_offset = compiler.stack_assign(p_transmission); + int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness); + int anisotropic_rotation_offset = compiler.stack_assign(p_anisotropic_rotation); + int subsurface_radius_offset = compiler.stack_assign(p_subsurface_radius); + + compiler.add_node(NODE_CLOSURE_BSDF, + compiler.encode_uchar4(closure, + compiler.stack_assign(p_metallic), + compiler.stack_assign(p_subsurface), + compiler.closure_mix_weight_offset()), + __float_as_int((p_metallic) ? get_float(p_metallic->socket_type) : 0.0f), + __float_as_int((p_subsurface) ? get_float(p_subsurface->socket_type) : 0.0f)); + + compiler.add_node(normal_offset, tangent_offset, + compiler.encode_uchar4(specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset), + compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset)); + + compiler.add_node(compiler.encode_uchar4(ior_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset), + distribution, SVM_STACK_INVALID, SVM_STACK_INVALID); + + float3 bc_default = get_float3(base_color_in->socket_type); + + compiler.add_node(((base_color_in->link) ? compiler.stack_assign(base_color_in) : SVM_STACK_INVALID), + __float_as_int(bc_default.x), __float_as_int(bc_default.y), __float_as_int(bc_default.z)); + + compiler.add_node(clearcoat_normal_offset, subsurface_radius_offset, SVM_STACK_INVALID, SVM_STACK_INVALID); + + float3 ss_default = get_float3(subsurface_color_in->socket_type); + + compiler.add_node(((subsurface_color_in->link) ? compiler.stack_assign(subsurface_color_in) : SVM_STACK_INVALID), + __float_as_int(ss_default.x), __float_as_int(ss_default.y), __float_as_int(ss_default.z)); +} + +bool PrincipledBsdfNode::has_integrator_dependency() +{ + ShaderInput *roughness_input = input("Roughness"); + return !roughness_input->link && roughness <= 1e-4f; +} + +void PrincipledBsdfNode::compile(SVMCompiler& compiler) +{ + compile(compiler, input("Metallic"), input("Subsurface"), input("Subsurface Radius"), input("Specular"), + input("Roughness"), input("Specular Tint"), input("Anisotropic"), input("Sheen"), input("Sheen Tint"), + input("Clearcoat"), input("Clearcoat Roughness"), input("IOR"), input("Transmission"), + input("Anisotropic Rotation"), input("Transmission Roughness")); +} + +void PrincipledBsdfNode::compile(OSLCompiler& compiler) +{ + compiler.parameter(this, "distribution"); + compiler.add(this, "node_principled_bsdf"); +} + +bool PrincipledBsdfNode::has_bssrdf_bump() +{ + return has_surface_bssrdf() && has_bump(); +} + /* Translucent BSDF Closure */ NODE_DEFINE(TranslucentBsdfNode) @@ -2421,7 +2643,7 @@ void BackgroundNode::compile(SVMCompiler& compiler) if(color_in->link || strength_in->link) { compiler.add_node(NODE_EMISSION_WEIGHT, compiler.stack_assign(color_in), - compiler.stack_assign(strength_in)); + compiler.stack_assign(strength_in)); } else compiler.add_node(NODE_CLOSURE_SET_WEIGHT, color*strength); @@ -3027,6 +3249,8 @@ NODE_DEFINE(LightPathNode) SOCKET_OUT_FLOAT(is_volume_scatter_ray, "Is Volume Scatter Ray"); SOCKET_OUT_FLOAT(ray_length, "Ray Length"); SOCKET_OUT_FLOAT(ray_depth, "Ray Depth"); + SOCKET_OUT_FLOAT(diffuse_depth, "Diffuse Depth"); + SOCKET_OUT_FLOAT(glossy_depth, "Glossy Depth"); SOCKET_OUT_FLOAT(transparent_depth, "Transparent Depth"); SOCKET_OUT_FLOAT(transmission_depth, "Transmission Depth"); @@ -3093,6 +3317,16 @@ void LightPathNode::compile(SVMCompiler& compiler) compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_depth, compiler.stack_assign(out)); } + out = output("Diffuse Depth"); + if(!out->links.empty()) { + compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_diffuse, compiler.stack_assign(out)); + } + + out = output("Glossy Depth"); + if(!out->links.empty()) { + compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_glossy, compiler.stack_assign(out)); + } + out = output("Transparent Depth"); if(!out->links.empty()) { compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_transparent, compiler.stack_assign(out)); @@ -3993,7 +4227,7 @@ NODE_DEFINE(SeparateRGBNode) SOCKET_IN_COLOR(color, "Image", make_float3(0.0f, 0.0f, 0.0f)); - SOCKET_OUT_FLOAT(g, "R"); + SOCKET_OUT_FLOAT(r, "R"); SOCKET_OUT_FLOAT(g, "G"); SOCKET_OUT_FLOAT(b, "B"); diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index eb0f7977dd1..4ec485d521b 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -17,10 +17,10 @@ #ifndef __NODES_H__ #define __NODES_H__ -#include "graph.h" -#include "node.h" +#include "render/graph.h" +#include "graph/node.h" -#include "util_string.h" +#include "util/util_string.h" CCL_NAMESPACE_BEGIN @@ -252,6 +252,7 @@ public: class PointDensityTextureNode : public ShaderNode { public: SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode) + virtual int get_group() { return NODE_GROUP_LEVEL_3; } ~PointDensityTextureNode(); ShaderNode *clone() const; @@ -321,25 +322,33 @@ private: static bool initialized; }; -class BsdfNode : public ShaderNode { +class BsdfBaseNode : public ShaderNode { public: - explicit BsdfNode(const NodeType *node_type); - SHADER_NODE_BASE_CLASS(BsdfNode); + BsdfBaseNode(const NodeType *node_type); bool has_spatial_varying() { return true; } - void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL, ShaderInput *param4 = NULL); virtual ClosureType get_closure_type() { return closure; } - - float3 color; - float3 normal; - float surface_mix_weight; - ClosureType closure; + virtual bool has_bump(); virtual bool equals(const ShaderNode& /*other*/) { /* TODO(sergey): With some care BSDF nodes can be de-duplicated. */ return false; } + + ClosureType closure; +}; + +class BsdfNode : public BsdfBaseNode { +public: + explicit BsdfNode(const NodeType *node_type); + SHADER_NODE_BASE_CLASS(BsdfNode) + + void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL, ShaderInput *param4 = NULL); + + float3 color; + float3 normal; + float surface_mix_weight; }; class AnisotropicBsdfNode : public BsdfNode { @@ -361,6 +370,31 @@ public: float roughness; }; +/* Disney principled BRDF */ +class PrincipledBsdfNode : public BsdfBaseNode { +public: + SHADER_NODE_CLASS(PrincipledBsdfNode) + + bool has_surface_bssrdf(); + bool has_bssrdf_bump(); + void compile(SVMCompiler& compiler, ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius, + ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, ShaderInput *anisotropic, + ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_roughness, + ShaderInput *ior, ShaderInput *transmission, ShaderInput *anisotropic_rotation, ShaderInput *transmission_roughness); + + float3 base_color; + float3 subsurface_color, subsurface_radius; + float metallic, subsurface, specular, roughness, specular_tint, anisotropic, + sheen, sheen_tint, clearcoat, clearcoat_roughness, ior, transmission, + anisotropic_rotation, transmission_roughness; + float3 normal, clearcoat_normal, tangent; + float surface_mix_weight; + ClosureType distribution, distribution_orig; + + bool has_integrator_dependency(); + void attributes(Shader *shader, AttributeRequestSet *attributes); +}; + class TranslucentBsdfNode : public BsdfNode { public: SHADER_NODE_CLASS(TranslucentBsdfNode) @@ -388,7 +422,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness; + float roughness, roughness_orig; ClosureType distribution, distribution_orig; }; @@ -400,7 +434,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness, IOR; + float roughness, roughness_orig, IOR; ClosureType distribution, distribution_orig; }; @@ -412,7 +446,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness, IOR; + float roughness, roughness_orig, IOR; ClosureType distribution, distribution_orig; }; @@ -445,6 +479,7 @@ public: virtual ClosureType get_closure_type() { return CLOSURE_EMISSION_ID; } bool has_surface_emission() { return true; } + bool has_volume_support() { return true; } float3 color; float strength; @@ -496,6 +531,7 @@ public: return ShaderNode::get_feature() | NODE_FEATURE_VOLUME; } virtual ClosureType get_closure_type() { return closure; } + virtual bool has_volume_support() { return true; } float3 color; float density; @@ -641,7 +677,7 @@ public: class MixClosureWeightNode : public ShaderNode { public: - SHADER_NODE_CLASS(MixClosureWeightNode); + SHADER_NODE_CLASS(MixClosureWeightNode) float weight; float fac; @@ -887,7 +923,7 @@ public: class CurvesNode : public ShaderNode { public: explicit CurvesNode(const NodeType *node_type); - SHADER_NODE_BASE_CLASS(CurvesNode); + SHADER_NODE_BASE_CLASS(CurvesNode) virtual int get_group() { return NODE_GROUP_LEVEL_3; } @@ -946,6 +982,8 @@ public: /* ideally we could beter detect this, but we can't query this now */ bool has_spatial_varying() { return true; } + bool has_volume_support() { return true; } + virtual bool equals(const ShaderNode& /*other*/) { return false; } string filepath; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 8b8b988b969..12690090066 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -14,22 +14,22 @@ * limitations under the License. */ -#include "camera.h" -#include "device.h" -#include "light.h" -#include "mesh.h" -#include "curves.h" -#include "object.h" -#include "particles.h" -#include "scene.h" - -#include "util_foreach.h" -#include "util_logging.h" -#include "util_map.h" -#include "util_progress.h" -#include "util_vector.h" - -#include "subd_patch_table.h" +#include "render/camera.h" +#include "device/device.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/curves.h" +#include "render/object.h" +#include "render/particles.h" +#include "render/scene.h" + +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_map.h" +#include "util/util_progress.h" +#include "util/util_vector.h" + +#include "subd/subd_patch_table.h" CCL_NAMESPACE_BEGIN @@ -49,6 +49,8 @@ NODE_DEFINE(Object) SOCKET_POINT(dupli_generated, "Dupli Generated", make_float3(0.0f, 0.0f, 0.0f)); SOCKET_POINT2(dupli_uv, "Dupli UV", make_float2(0.0f, 0.0f)); + SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", false); + return type; } @@ -166,7 +168,7 @@ void Object::apply_transform(bool apply_to_motion) float3 c0 = transform_get_column(&tfm, 0); float3 c1 = transform_get_column(&tfm, 1); float3 c2 = transform_get_column(&tfm, 2); - float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f); + float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f); /* apply transform to curve keys */ for(size_t i = 0; i < mesh->curve_keys.size(); i++) { @@ -260,6 +262,17 @@ bool Object::is_traceable() return true; } +uint Object::visibility_for_tracing() const { + uint trace_visibility = visibility; + if (is_shadow_catcher) { + trace_visibility &= ~PATH_RAY_SHADOW_NON_CATCHER; + } + else { + trace_visibility &= ~PATH_RAY_SHADOW_CATCHER; + } + return trace_visibility; +} + /* Object Manager */ ObjectManager::ObjectManager() @@ -354,6 +367,13 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s /* OBJECT_PROPERTIES */ objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); + if(mesh->use_motion_blur) { + state->have_motion = true; + } + if(mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { + flag |= SD_OBJECT_HAS_VERTEX_MOTION; + } + if(state->need_motion == Scene::MOTION_PASS) { /* Motion transformations, is world/object space depending if mesh * comes with deformed position in object space, or if we transform @@ -374,9 +394,6 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s mtfm.pre = mtfm.pre * itfm; mtfm.post = mtfm.post * itfm; } - else { - flag |= SD_OBJECT_HAS_VERTEX_MOTION; - } memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm.pre, sizeof(float4)*3); memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm.post, sizeof(float4)*3); @@ -395,10 +412,6 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s } #endif - if(mesh->use_motion_blur) { - state->have_motion = true; - } - /* Dupli object coords and motion info. */ int totalsteps = mesh->motion_steps; int numsteps = (totalsteps - 1)/2; @@ -410,7 +423,7 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s /* Object flag. */ if(ob->use_holdout) { - flag |= SD_HOLDOUT_MASK; + flag |= SD_OBJECT_HOLDOUT_MASK; } state->object_flag[object_index] = flag; @@ -597,6 +610,12 @@ void ObjectManager::device_update_flags(Device *device, else { object_flag[object_index] &= ~SD_OBJECT_HAS_VOLUME; } + if(object->is_shadow_catcher) { + object_flag[object_index] |= SD_OBJECT_SHADOW_CATCHER; + } + else { + object_flag[object_index] &= ~SD_OBJECT_SHADOW_CATCHER; + } if(bounds_valid) { foreach(Object *volume_object, volume_objects) { @@ -716,9 +735,9 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u if(progress.get_cancel()) return; } - object_flag[i] |= SD_TRANSFORM_APPLIED; + object_flag[i] |= SD_OBJECT_TRANSFORM_APPLIED; if(object->mesh->transform_negative_scaled) - object_flag[i] |= SD_NEGATIVE_SCALE_APPLIED; + object_flag[i] |= SD_OBJECT_NEGATIVE_SCALE_APPLIED; } else have_instancing = true; diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 7e306fab2a8..6927bbfe4c7 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -17,14 +17,14 @@ #ifndef __OBJECT_H__ #define __OBJECT_H__ -#include "node.h" -#include "scene.h" +#include "graph/node.h" +#include "render/scene.h" -#include "util_boundbox.h" -#include "util_param.h" -#include "util_transform.h" -#include "util_thread.h" -#include "util_types.h" +#include "util/util_boundbox.h" +#include "util/util_param.h" +#include "util/util_transform.h" +#include "util/util_thread.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -40,7 +40,7 @@ struct Transform; class Object : public Node { public: - NODE_DECLARE; + NODE_DECLARE Mesh *mesh; Transform tfm; @@ -53,13 +53,14 @@ public: bool use_motion; bool hide_on_missing_motion; bool use_holdout; + bool is_shadow_catcher; float3 dupli_generated; float2 dupli_uv; ParticleSystem *particle_system; int particle_index; - + Object(); ~Object(); @@ -74,6 +75,11 @@ public: * kernel scene. */ bool is_traceable(); + + /* Combine object's visibility with all possible internal run-time + * determined flags which denotes trace-time visibility. + */ + uint visibility_for_tracing() const; }; /* Object Manager */ diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 67b68e63cb2..5c5ac6e2be9 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -14,26 +14,26 @@ * limitations under the License. */ -#include "device.h" +#include "device/device.h" -#include "graph.h" -#include "light.h" -#include "osl.h" -#include "scene.h" -#include "shader.h" -#include "nodes.h" +#include "render/graph.h" +#include "render/light.h" +#include "render/osl.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/nodes.h" #ifdef WITH_OSL -#include "osl_globals.h" -#include "osl_services.h" -#include "osl_shader.h" +#include "kernel/osl/osl_globals.h" +#include "kernel/osl/osl_services.h" +#include "kernel/osl/osl_shader.h" -#include "util_foreach.h" -#include "util_logging.h" -#include "util_md5.h" -#include "util_path.h" -#include "util_progress.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_md5.h" +#include "util/util_path.h" +#include "util/util_progress.h" #endif @@ -156,6 +156,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s og->surface_state.clear(); og->volume_state.clear(); og->displacement_state.clear(); + og->bump_state.clear(); og->background_state.reset(); } @@ -232,8 +233,10 @@ void OSLShaderManager::shading_system_init() "glossy", /* PATH_RAY_GLOSSY */ "singular", /* PATH_RAY_SINGULAR */ "transparent", /* PATH_RAY_TRANSPARENT */ - "shadow", /* PATH_RAY_SHADOW_OPAQUE */ - "shadow", /* PATH_RAY_SHADOW_TRANSPARENT */ + "shadow", /* PATH_RAY_SHADOW_OPAQUE_NON_CATCHER */ + "shadow", /* PATH_RAY_SHADOW_OPAQUE_CATCHER */ + "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER */ + "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_CATCHER */ "__unused__", "__unused__", @@ -718,6 +721,7 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) current_shader->has_surface_bssrdf = true; current_shader->has_bssrdf_bump = true; /* can't detect yet */ } + current_shader->has_bump = true; /* can't detect yet */ } if(node->has_spatial_varying()) { @@ -1026,6 +1030,9 @@ void OSLCompiler::generate_nodes(const ShaderNodeSet& nodes) if(node->has_bssrdf_bump()) current_shader->has_bssrdf_bump = true; } + if(node->has_bump()) { + current_shader->has_bump = true; + } } else if(current_type == SHADER_TYPE_VOLUME) { if(node->has_spatial_varying()) @@ -1088,23 +1095,14 @@ void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader) ShaderGraph *graph = shader->graph; ShaderNode *output = (graph)? graph->output(): NULL; - /* copy graph for shader with bump mapping */ - if(output->input("Surface")->link && output->input("Displacement")->link) - if(!shader->graph_bump) - shader->graph_bump = shader->graph->copy(); + bool has_bump = (shader->displacement_method != DISPLACE_TRUE) && + output->input("Surface")->link && output->input("Displacement")->link; /* finalize */ shader->graph->finalize(scene, - false, - true, - shader->has_integrator_dependency); - if(shader->graph_bump) { - shader->graph_bump->finalize(scene, - true, - true, - shader->has_integrator_dependency, - shader->displacement_method == DISPLACE_BOTH); - } + has_bump, + shader->has_integrator_dependency, + shader->displacement_method == DISPLACE_BOTH); current_shader = shader; @@ -1112,7 +1110,8 @@ void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader) shader->has_surface_emission = false; shader->has_surface_transparent = false; shader->has_surface_bssrdf = false; - shader->has_bssrdf_bump = false; + shader->has_bump = has_bump; + shader->has_bssrdf_bump = has_bump; shader->has_volume = false; shader->has_displacement = false; shader->has_surface_spatial_varying = false; @@ -1124,8 +1123,8 @@ void OSLCompiler::compile(Scene *scene, OSLGlobals *og, Shader *shader) if(shader->used && graph && output->input("Surface")->link) { shader->osl_surface_ref = compile_type(shader, shader->graph, SHADER_TYPE_SURFACE); - if(shader->graph_bump && shader->displacement_method != DISPLACE_TRUE) - shader->osl_surface_bump_ref = compile_type(shader, shader->graph_bump, SHADER_TYPE_BUMP); + if(has_bump) + shader->osl_surface_bump_ref = compile_type(shader, shader->graph, SHADER_TYPE_BUMP); else shader->osl_surface_bump_ref = OSL::ShaderGroupRef(); diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h index b131b672b8c..2be1126fdd3 100644 --- a/intern/cycles/render/osl.h +++ b/intern/cycles/render/osl.h @@ -17,13 +17,13 @@ #ifndef __OSL_H__ #define __OSL_H__ -#include "util_set.h" -#include "util_string.h" -#include "util_thread.h" +#include "util/util_set.h" +#include "util/util_string.h" +#include "util/util_thread.h" -#include "graph.h" -#include "nodes.h" -#include "shader.h" +#include "render/graph.h" +#include "render/nodes.h" +#include "render/shader.h" #ifdef WITH_OSL #include <OSL/oslcomp.h> diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index 1a35d60fb4b..a51822a08be 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -14,15 +14,15 @@ * limitations under the License. */ -#include "device.h" -#include "particles.h" -#include "scene.h" - -#include "util_foreach.h" -#include "util_logging.h" -#include "util_map.h" -#include "util_progress.h" -#include "util_vector.h" +#include "device/device.h" +#include "render/particles.h" +#include "render/scene.h" + +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_map.h" +#include "util/util_progress.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/particles.h b/intern/cycles/render/particles.h index 2509e27b44b..66d46114b3e 100644 --- a/intern/cycles/render/particles.h +++ b/intern/cycles/render/particles.h @@ -17,8 +17,8 @@ #ifndef __PARTICLES_H__ #define __PARTICLES_H__ -#include "util_types.h" -#include "util_vector.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 193e29d3bf4..1940ac22f6f 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -16,28 +16,28 @@ #include <stdlib.h> -#include "background.h" -#include "bake.h" -#include "camera.h" -#include "curves.h" -#include "device.h" -#include "film.h" -#include "integrator.h" -#include "light.h" -#include "mesh.h" -#include "object.h" -#include "osl.h" -#include "particles.h" -#include "scene.h" -#include "shader.h" -#include "svm.h" -#include "tables.h" +#include "render/background.h" +#include "render/bake.h" +#include "render/camera.h" +#include "render/curves.h" +#include "device/device.h" +#include "render/film.h" +#include "render/integrator.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/osl.h" +#include "render/particles.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/svm.h" +#include "render/tables.h" #include "volume.h" -#include "util_foreach.h" -#include "util_guarded_allocator.h" -#include "util_logging.h" -#include "util_progress.h" +#include "util/util_foreach.h" +#include "util/util_guarded_allocator.h" +#include "util/util_logging.h" +#include "util/util_progress.h" CCL_NAMESPACE_BEGIN @@ -155,8 +155,6 @@ void Scene::device_update(Device *device_, Progress& progress) * - Film needs light manager to run for use_light_visibility * - Lookup tables are done a second time to handle film tables */ - - image_manager->set_pack_images(device->info.pack_images); progress.set_status("Updating Shaders"); shader_manager->device_update(device, &dscene, this, progress); diff --git a/intern/cycles/render/scene.cpp.orig b/intern/cycles/render/scene.cpp.orig new file mode 100644 index 00000000000..0297e361f03 --- /dev/null +++ b/intern/cycles/render/scene.cpp.orig @@ -0,0 +1,377 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> + +<<<<<<< HEAD +#include "background.h" +#include "bake.h" +#include "camera.h" +#include "curves.h" +#include "device.h" +#include "film.h" +#include "integrator.h" +#include "light.h" +#include "mesh.h" +#include "object.h" +#include "osl.h" +#include "particles.h" +#include "scene.h" +#include "shader.h" +#include "svm.h" +#include "tables.h" +#include "volume.h" + +#include "util_foreach.h" +#include "util_guarded_allocator.h" +#include "util_logging.h" +#include "util_progress.h" +======= +#include "render/background.h" +#include "render/bake.h" +#include "render/camera.h" +#include "render/curves.h" +#include "device/device.h" +#include "render/film.h" +#include "render/integrator.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/osl.h" +#include "render/particles.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/svm.h" +#include "render/tables.h" + +#include "util/util_foreach.h" +#include "util/util_guarded_allocator.h" +#include "util/util_logging.h" +#include "util/util_progress.h" +>>>>>>> origin/master + +CCL_NAMESPACE_BEGIN + +Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) +: params(params_) +{ + device = NULL; + memset(&dscene.data, 0, sizeof(dscene.data)); + + camera = new Camera(); + lookup_tables = new LookupTables(); + film = new Film(); + background = new Background(); + light_manager = new LightManager(); + mesh_manager = new MeshManager(); + object_manager = new ObjectManager(); + integrator = new Integrator(); + image_manager = new ImageManager(device_info_); + particle_system_manager = new ParticleSystemManager(); + curve_system_manager = new CurveSystemManager(); + bake_manager = new BakeManager(); + volume_manager = new VolumeManager(); + + /* OSL only works on the CPU */ + if(device_info_.type == DEVICE_CPU) + shader_manager = ShaderManager::create(this, params.shadingsystem); + else + shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM); +} + +Scene::~Scene() +{ + free_memory(true); +} + +void Scene::free_memory(bool final) +{ + foreach(Shader *s, shaders) + delete s; + foreach(Mesh *m, meshes) + delete m; + foreach(Object *o, objects) + delete o; + foreach(Light *l, lights) + delete l; + foreach(ParticleSystem *p, particle_systems) + delete p; + foreach(Volume *v, volumes) + delete v; + + shaders.clear(); + meshes.clear(); + objects.clear(); + lights.clear(); + particle_systems.clear(); + volumes.clear(); + + if(device) { + camera->device_free(device, &dscene, this); + film->device_free(device, &dscene, this); + background->device_free(device, &dscene); + integrator->device_free(device, &dscene); + + object_manager->device_free(device, &dscene); + mesh_manager->device_free(device, &dscene); + shader_manager->device_free(device, &dscene, this); + light_manager->device_free(device, &dscene); + + particle_system_manager->device_free(device, &dscene); + curve_system_manager->device_free(device, &dscene); + + bake_manager->device_free(device, &dscene); + + if(!params.persistent_data || final) + image_manager->device_free(device, &dscene); + else + image_manager->device_free_builtin(device, &dscene); + + lookup_tables->device_free(device, &dscene); + volume_manager->device_free(device, &dscene); + } + + if(final) { + delete lookup_tables; + delete camera; + delete film; + delete background; + delete integrator; + delete object_manager; + delete mesh_manager; + delete shader_manager; + delete light_manager; + delete particle_system_manager; + delete curve_system_manager; + delete image_manager; + delete bake_manager; + delete volume_manager; + } +} + +void Scene::device_update(Device *device_, Progress& progress) +{ + if(!device) + device = device_; + + bool print_stats = need_data_update(); + + /* The order of updates is important, because there's dependencies between + * the different managers, using data computed by previous managers. + * + * - Image manager uploads images used by shaders. + * - Camera may be used for adaptive subdivision. + * - Displacement shader must have all shader data available. + * - Light manager needs lookup tables and final mesh data to compute emission CDF. + * - Film needs light manager to run for use_light_visibility + * - Lookup tables are done a second time to handle film tables + */ + + progress.set_status("Updating Shaders"); + shader_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Background"); + background->device_update(device, &dscene, this); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Camera"); + camera->device_update(device, &dscene, this); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Meshes Flags"); + mesh_manager->device_update_flags(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Objects"); + object_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Meshes"); + mesh_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Objects Flags"); + object_manager->device_update_flags(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Images"); + image_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Camera Volume"); + camera->device_update_volume(device, &dscene, this); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Hair Systems"); + curve_system_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Lookup Tables"); + lookup_tables->device_update(device, &dscene); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Lights"); + light_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Particle Systems"); + particle_system_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Integrator"); + integrator->device_update(device, &dscene, this); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Film"); + film->device_update(device, &dscene, this); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Lookup Tables"); + lookup_tables->device_update(device, &dscene); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating Baking"); + bake_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + progress.set_status("Updating OpenVDB Volumes"); + volume_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel() || device->have_error()) return; + + if(device->have_error() == false) { + progress.set_status("Updating Device", "Writing constant memory"); + device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); + } + + if(print_stats) { + size_t mem_used = util_guarded_get_mem_used(); + size_t mem_peak = util_guarded_get_mem_peak(); + + VLOG(1) << "System memory statistics after full device sync:\n" + << " Usage: " << string_human_readable_number(mem_used) + << " (" << string_human_readable_size(mem_used) << ")\n" + << " Peak: " << string_human_readable_number(mem_peak) + << " (" << string_human_readable_size(mem_peak) << ")"; + } +} + +Scene::MotionType Scene::need_motion(bool advanced_shading) +{ + if(integrator->motion_blur) + return (advanced_shading)? MOTION_BLUR: MOTION_NONE; + else if(Pass::contains(film->passes, PASS_MOTION)) + return MOTION_PASS; + else + return MOTION_NONE; +} + +float Scene::motion_shutter_time() +{ + if(need_motion() == Scene::MOTION_PASS) + return 2.0f; + else + return camera->shuttertime; +} + +bool Scene::need_global_attribute(AttributeStandard std) +{ + if(std == ATTR_STD_UV) + return Pass::contains(film->passes, PASS_UV); + else if(std == ATTR_STD_MOTION_VERTEX_POSITION) + return need_motion() != MOTION_NONE; + else if(std == ATTR_STD_MOTION_VERTEX_NORMAL) + return need_motion() == MOTION_BLUR; + + return false; +} + +void Scene::need_global_attributes(AttributeRequestSet& attributes) +{ + for(int std = ATTR_STD_NONE; std < ATTR_STD_NUM; std++) + if(need_global_attribute((AttributeStandard)std)) + attributes.add((AttributeStandard)std); +} + +bool Scene::need_update() +{ + return (need_reset() || film->need_update); +} + +bool Scene::need_data_update() +{ + return (background->need_update + || image_manager->need_update + || object_manager->need_update + || mesh_manager->need_update + || light_manager->need_update + || lookup_tables->need_update + || integrator->need_update + || shader_manager->need_update + || particle_system_manager->need_update + || curve_system_manager->need_update + || bake_manager->need_update + || volume_manager->need_update + || film->need_update); +} + +bool Scene::need_reset() +{ + return need_data_update() || camera->need_update; +} + +void Scene::reset() +{ + shader_manager->reset(this); + shader_manager->add_default(this); + + /* ensure all objects are updated */ + camera->tag_update(); + film->tag_update(this); + background->tag_update(this); + integrator->tag_update(this); + object_manager->tag_update(this); + mesh_manager->tag_update(this); + light_manager->tag_update(this); + particle_system_manager->tag_update(this); + curve_system_manager->tag_update(this); +} + +void Scene::device_free() +{ + free_memory(false); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 6829fa6ef16..db7831ac14b 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -17,18 +17,18 @@ #ifndef __SCENE_H__ #define __SCENE_H__ -#include "image.h" -#include "shader.h" +#include "render/image.h" +#include "render/shader.h" -#include "device_memory.h" +#include "device/device_memory.h" -#include "util_param.h" -#include "util_string.h" -#include "util_system.h" -#include "util_texture.h" -#include "util_thread.h" -#include "util_types.h" -#include "util_vector.h" +#include "util/util_param.h" +#include "util/util_string.h" +#include "util/util_system.h" +#include "util/util_texture.h" +#include "util/util_thread.h" +#include "util/util_types.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -71,6 +71,7 @@ public: device_vector<uint> prim_visibility; device_vector<uint> prim_index; device_vector<uint> prim_object; + device_vector<float2> prim_time; /* mesh */ device_vector<uint> tri_shader; @@ -115,19 +116,12 @@ public: device_vector<uint> sobol_directions; /* cpu images */ - device_vector<uchar4> tex_byte4_image[TEX_NUM_BYTE4_CPU]; - device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_CPU]; - device_vector<float> tex_float_image[TEX_NUM_FLOAT_CPU]; - device_vector<uchar> tex_byte_image[TEX_NUM_BYTE_CPU]; - device_vector<half4> tex_half4_image[TEX_NUM_HALF4_CPU]; - device_vector<half> tex_half_image[TEX_NUM_HALF_CPU]; - - /* opencl images */ - device_vector<uchar4> tex_image_byte4_packed; - device_vector<float4> tex_image_float4_packed; - device_vector<uchar> tex_image_byte_packed; - device_vector<float> tex_image_float_packed; - device_vector<uint4> tex_image_packed_info; + vector<device_vector<float4>* > tex_float4_image; + vector<device_vector<uchar4>* > tex_byte4_image; + vector<device_vector<half4>* > tex_half4_image; + vector<device_vector<float>* > tex_float_image; + vector<device_vector<uchar>* > tex_byte_image; + vector<device_vector<half>* > tex_half_image; /* volume */ device_vector<uint> vol_shader; @@ -148,6 +142,7 @@ public: } bvh_type; bool use_bvh_spatial_split; bool use_bvh_unaligned_nodes; + int num_bvh_time_steps; bool use_qbvh; bool persistent_data; int texture_limit; @@ -158,7 +153,8 @@ public: bvh_type = BVH_DYNAMIC; use_bvh_spatial_split = false; use_bvh_unaligned_nodes = true; - use_qbvh = false; + num_bvh_time_steps = 0; + use_qbvh = true; persistent_data = false; texture_limit = 0; } @@ -168,6 +164,7 @@ public: && bvh_type == params.bvh_type && use_bvh_spatial_split == params.use_bvh_spatial_split && use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes + && num_bvh_time_steps == params.num_bvh_time_steps && use_qbvh == params.use_qbvh && persistent_data == params.persistent_data && texture_limit == params.texture_limit); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 6ea902f4431..0080b1d138a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -17,24 +17,24 @@ #include <string.h> #include <limits.h> -#include "buffers.h" -#include "camera.h" -#include "device.h" -#include "graph.h" -#include "integrator.h" -#include "mesh.h" -#include "object.h" -#include "scene.h" -#include "session.h" -#include "bake.h" - -#include "util_foreach.h" -#include "util_function.h" -#include "util_logging.h" -#include "util_math.h" -#include "util_opengl.h" -#include "util_task.h" -#include "util_time.h" +#include "render/buffers.h" +#include "render/camera.h" +#include "device/device.h" +#include "render/graph.h" +#include "render/integrator.h" +#include "render/mesh.h" +#include "render/object.h" +#include "render/scene.h" +#include "render/session.h" +#include "render/bake.h" + +#include "util/util_foreach.h" +#include "util/util_function.h" +#include "util/util_logging.h" +#include "util/util_math.h" +#include "util/util_opengl.h" +#include "util/util_task.h" +#include "util/util_time.h" CCL_NAMESPACE_BEGIN @@ -46,7 +46,7 @@ Session::Session(const SessionParams& params_) : params(params_), tile_manager(params.progressive, params.samples, params.tile_size, params.start_resolution, params.background == false || params.progressive_refine, params.background, params.tile_order, - max(params.device.multi_devices.size(), 1)), + max(params.device.multi_devices.size(), 1), params.pixel_size), stats() { device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background); @@ -67,10 +67,7 @@ Session::Session(const SessionParams& params_) session_thread = NULL; scene = NULL; - start_time = 0.0; reset_time = 0.0; - preview_time = 0.0; - paused_time = 0.0; last_update_time = 0.0; delayed_reset.do_reset = false; @@ -117,8 +114,9 @@ Session::~Session() } /* clean up */ - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &rtile, render_tiles) + delete rtile.buffers; + tile_manager.free_device(); delete buffers; delete display; @@ -201,12 +199,10 @@ void Session::run_gpu() { bool tiles_written = false; - start_time = time_dt(); reset_time = time_dt(); - paused_time = 0.0; last_update_time = time_dt(); - progress.set_render_start_time(start_time + paused_time); + progress.set_render_start_time(); while(!progress.get_cancel()) { /* advance to next tile */ @@ -233,13 +229,11 @@ void Session::run_gpu() update_status_time(pause, no_tiles); while(1) { - double pause_start = time_dt(); + scoped_timer pause_timer; pause_cond.wait(pause_lock); - paused_time += time_dt() - pause_start; - - if(!params.background) - progress.set_start_time(start_time + paused_time); - progress.set_render_start_time(start_time + paused_time); + if(pause) { + progress.add_skip_time(pause_timer, params.background); + } update_status_time(pause, no_tiles); progress.set_update(); @@ -255,7 +249,9 @@ void Session::run_gpu() if(!no_tiles) { /* update scene */ + scoped_timer update_timer; update_scene(); + progress.add_skip_time(update_timer, params.background); if(!device->error_message().empty()) progress.set_error(device->error_message()); @@ -273,8 +269,8 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); - /* path trace */ - path_trace(); + /* render */ + render(); device->task_wait(); @@ -363,20 +359,22 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) thread_scoped_lock tile_lock(tile_mutex); /* get next tile from manager */ - Tile tile; + Tile *tile; int device_num = device->device_number(tile_device); if(!tile_manager.next_tile(tile, device_num)) return false; /* fill render tile */ - rtile.x = tile_manager.state.buffer.full_x + tile.x; - rtile.y = tile_manager.state.buffer.full_y + tile.y; - rtile.w = tile.w; - rtile.h = tile.h; + rtile.x = tile_manager.state.buffer.full_x + tile->x; + rtile.y = tile_manager.state.buffer.full_y + tile->y; + rtile.w = tile->w; + rtile.h = tile->h; rtile.start_sample = tile_manager.state.sample; rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; + rtile.tile_index = tile->index; + rtile.task = (tile->state == Tile::DENOISE)? RenderTile::DENOISE: RenderTile::PATH_TRACE; tile_lock.unlock(); @@ -386,56 +384,70 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); rtile.buffer = buffers->buffer.device_pointer; - rtile.rng_state = buffers->rng_state.device_pointer; rtile.buffers = buffers; + tile->buffers = buffers; device->map_tile(tile_device, rtile); return true; } - /* fill buffer parameters */ - BufferParams buffer_params = tile_manager.params; - buffer_params.full_x = rtile.x; - buffer_params.full_y = rtile.y; - buffer_params.width = rtile.w; - buffer_params.height = rtile.h; - - buffer_params.get_offset_stride(rtile.offset, rtile.stride); - - RenderBuffers *tilebuffers; + bool store_rtile = false; + if(tile->buffers == NULL) { + /* fill buffer parameters */ + BufferParams buffer_params = tile_manager.params; + buffer_params.full_x = rtile.x; + buffer_params.full_y = rtile.y; + buffer_params.width = rtile.w; + buffer_params.height = rtile.h; + + /* allocate buffers */ + if(params.progressive_refine) { + tile_lock.lock(); + + if(render_tiles.size() == 0) { + RenderTile nulltile; + nulltile.buffers = NULL; + render_tiles.resize(tile_manager.state.num_tiles, nulltile); + } - /* allocate buffers */ - if(params.progressive_refine) { - tile_lock.lock(); + /* In certain circumstances number of tiles in the tile manager could + * be changed. This is not supported by the progressive refine feature. + */ + assert(render_tiles.size() == tile_manager.state.num_tiles); - if(tile_buffers.size() == 0) - tile_buffers.resize(tile_manager.state.num_tiles, NULL); + RenderTile &stored_rtile = render_tiles[tile->index]; + if(stored_rtile.buffers == NULL) { + tile->buffers = new RenderBuffers(tile_device); + tile->buffers->reset(tile_device, buffer_params); + store_rtile = true; + } + else { + assert(rtile.x == stored_rtile.x && + rtile.y == stored_rtile.y && + rtile.w == stored_rtile.w && + rtile.h == stored_rtile.h); + tile_lock.unlock(); + tile->buffers = stored_rtile.buffers; + } + } + else { + tile->buffers = new RenderBuffers(tile_device); - /* In certain circumstances number of tiles in the tile manager could - * be changed. This is not supported by the progressive refine feature. - */ - assert(tile_buffers.size() == tile_manager.state.num_tiles); + tile->buffers->reset(tile_device, buffer_params); + } + } - tilebuffers = tile_buffers[tile.index]; - if(tilebuffers == NULL) { - tilebuffers = new RenderBuffers(tile_device); - tile_buffers[tile.index] = tilebuffers; + tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); - tilebuffers->reset(tile_device, buffer_params); - } + rtile.buffer = tile->buffers->buffer.device_pointer; + rtile.buffers = tile->buffers; + rtile.sample = 0; + if(store_rtile) { + render_tiles[tile->index] = rtile; tile_lock.unlock(); } - else { - tilebuffers = new RenderBuffers(tile_device); - - tilebuffers->reset(tile_device, buffer_params); - } - - rtile.buffer = tilebuffers->buffer.device_pointer; - rtile.rng_state = tilebuffers->rng_state.device_pointer; - rtile.buffers = tilebuffers; /* this will tag tile as IN PROGRESS in blender-side render pipeline, * which is needed to highlight currently rendering tile before first @@ -454,7 +466,7 @@ void Session::update_tile_sample(RenderTile& rtile) if(params.progressive_refine == false) { /* todo: optimize this by making it thread safe and removing lock */ - update_render_tile_cb(rtile); + update_render_tile_cb(rtile, true); } } @@ -465,18 +477,77 @@ void Session::release_tile(RenderTile& rtile) { thread_scoped_lock tile_lock(tile_mutex); - if(write_render_tile_cb) { - if(params.progressive_refine == false) { - /* todo: optimize this by making it thread safe and removing lock */ - write_render_tile_cb(rtile); + progress.add_finished_tile(rtile.task == RenderTile::DENOISE); - delete rtile.buffers; + bool delete_tile; + + if(tile_manager.finish_tile(rtile.tile_index, delete_tile)) { + if(write_render_tile_cb && params.progressive_refine == false) { + write_render_tile_cb(rtile); + if(delete_tile) { + delete rtile.buffers; + tile_manager.state.tiles[rtile.tile_index].buffers = NULL; + } + } + } + else { + if(update_render_tile_cb && params.progressive_refine == false) { + update_render_tile_cb(rtile, false); } } update_status_time(); } +void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) +{ + thread_scoped_lock tile_lock(tile_mutex); + + int center_idx = tiles[4].tile_index; + assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); + BufferParams buffer_params = tile_manager.params; + int4 image_region = make_int4(buffer_params.full_x, buffer_params.full_y, + buffer_params.full_x + buffer_params.width, buffer_params.full_y + buffer_params.height); + + for(int dy = -1, i = 0; dy <= 1; dy++) { + for(int dx = -1; dx <= 1; dx++, i++) { + int px = tiles[4].x + dx*params.tile_size.x; + int py = tiles[4].y + dy*params.tile_size.y; + if(px >= image_region.x && py >= image_region.y && + px < image_region.z && py < image_region.w) { + int tile_index = center_idx + dy*tile_manager.state.tile_stride + dx; + Tile *tile = &tile_manager.state.tiles[tile_index]; + assert(tile->buffers); + + tiles[i].buffer = tile->buffers->buffer.device_pointer; + tiles[i].x = tile_manager.state.buffer.full_x + tile->x; + tiles[i].y = tile_manager.state.buffer.full_y + tile->y; + tiles[i].w = tile->w; + tiles[i].h = tile->h; + tiles[i].buffers = tile->buffers; + + tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); + } + else { + tiles[i].buffer = (device_ptr)NULL; + tiles[i].buffers = NULL; + tiles[i].x = clamp(px, image_region.x, image_region.z); + tiles[i].y = clamp(py, image_region.y, image_region.w); + tiles[i].w = tiles[i].h = 0; + } + } + } + + assert(tiles[4].buffers); + device->map_neighbor_tiles(tile_device, tiles); +} + +void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device) +{ + thread_scoped_lock tile_lock(tile_mutex); + device->unmap_neighbor_tiles(tile_device, tiles); +} + void Session::run_cpu() { bool tiles_written = false; @@ -523,13 +594,11 @@ void Session::run_cpu() update_status_time(pause, no_tiles); while(1) { - double pause_start = time_dt(); + scoped_timer pause_timer; pause_cond.wait(pause_lock); - paused_time += time_dt() - pause_start; - - if(!params.background) - progress.set_start_time(start_time + paused_time); - progress.set_render_start_time(start_time + paused_time); + if(pause) { + progress.add_skip_time(pause_timer, params.background); + } update_status_time(pause, no_tiles); progress.set_update(); @@ -550,7 +619,9 @@ void Session::run_cpu() thread_scoped_lock buffers_lock(buffers_mutex); /* update scene */ + scoped_timer update_timer; update_scene(); + progress.add_skip_time(update_timer, params.background); if(!device->error_message().empty()) progress.set_error(device->error_message()); @@ -561,8 +632,8 @@ void Session::run_cpu() /* update status and timing */ update_status_time(); - /* path trace */ - path_trace(); + /* render */ + render(); /* update status and timing */ update_status_time(); @@ -641,23 +712,33 @@ DeviceRequestedFeatures Session::get_requested_device_features() requested_features.use_patch_evaluation = true; } #endif + if(object->is_shadow_catcher) { + requested_features.use_shadow_tricks = true; + } } BakeManager *bake_manager = scene->bake_manager; requested_features.use_baking = bake_manager->get_baking(); requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH); + requested_features.use_denoising = params.use_denoising; return requested_features; } -void Session::load_kernels() +void Session::load_kernels(bool lock_scene) { - thread_scoped_lock scene_lock(scene->mutex); + thread_scoped_lock scene_lock; + if(lock_scene) { + scene_lock = thread_scoped_lock(scene->mutex); + } + + DeviceRequestedFeatures requested_features = get_requested_device_features(); - if(!kernels_loaded) { + if(!kernels_loaded || loaded_kernel_features.modified(requested_features)) { progress.set_status("Loading render kernels (may take a few minutes the first time)"); - DeviceRequestedFeatures requested_features = get_requested_device_features(); + scoped_timer timer; + VLOG(2) << "Requested features:\n" << requested_features; if(!device->load_kernels(requested_features)) { string message = device->error_message(); @@ -670,7 +751,11 @@ void Session::load_kernels() return; } + progress.add_skip_time(timer, false); + VLOG(1) << "Total time spent loading kernels: " << time_dt() - timer.get_start(); + kernels_loaded = true; + loaded_kernel_features = requested_features; } } @@ -719,14 +804,14 @@ void Session::reset_(BufferParams& buffer_params, int samples) } tile_manager.reset(buffer_params, samples); + progress.reset_sample(); - start_time = time_dt(); - preview_time = 0.0; - paused_time = 0.0; + bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX; + progress.set_total_pixel_samples(show_progress? tile_manager.state.total_pixel_samples : 0); if(!params.background) - progress.set_start_time(start_time); - progress.set_render_start_time(start_time); + progress.set_start_time(); + progress.set_render_start_time(); } void Session::reset(BufferParams& buffer_params, int samples) @@ -739,10 +824,10 @@ void Session::reset(BufferParams& buffer_params, int samples) if(params.progressive_refine) { thread_scoped_lock buffers_lock(buffers_mutex); - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &rtile, render_tiles) + delete rtile.buffers; - tile_buffers.clear(); + render_tiles.clear(); } } @@ -821,6 +906,8 @@ void Session::update_scene() /* update scene */ if(scene->need_update()) { + load_kernels(false); + progress.set_status("Updating Scene"); MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); } @@ -828,67 +915,51 @@ void Session::update_scene() void Session::update_status_time(bool show_pause, bool show_done) { - int sample = tile_manager.state.sample; - int resolution = tile_manager.state.resolution_divider; + int progressive_sample = tile_manager.state.sample; + int num_samples = tile_manager.get_num_effective_samples(); + + int tile = progress.get_rendered_tiles(); int num_tiles = tile_manager.state.num_tiles; - int tile = tile_manager.state.num_rendered_tiles; /* update status */ string status, substatus; if(!params.progressive) { - const int progress_sample = progress.get_sample(), - num_samples = tile_manager.get_num_effective_samples(); - const bool is_gpu = params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL; - const bool is_multidevice = params.device.multi_devices.size() > 1; const bool is_cpu = params.device.type == DEVICE_CPU; - const bool is_last_tile = (num_samples * num_tiles - progress_sample) < num_samples; - - substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles); - - if((is_gpu && !is_multidevice && !device->info.use_split_kernel) || - (is_cpu && (num_tiles == 1 || is_last_tile))) - { - /* When using split-kernel (OpenCL) each thread in a tile will be working on a different - * sample. Can't display sample number when device uses split-kernel + const bool rendering_finished = (tile == num_tiles); + const bool is_last_tile = (tile + 1) == num_tiles; + + substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles); + + if(!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) { + /* Some devices automatically support showing the sample number: + * - CUDADevice + * - OpenCLDevice when using the megakernel (the split kernel renders multiple + * samples at the same time, so the current sample isn't really defined) + * - CPUDevice when using one thread + * For these devices, the current sample is always shown. + * + * The other option is when the last tile is currently being rendered by the CPU. */ - - /* when rendering on GPU multithreading happens within single tile, as in - * tiles are handling sequentially and in this case we could display - * currently rendering sample number - * this helps a lot from feedback point of view. - * also display the info on CPU, when using 1 tile only - */ - - int status_sample = progress_sample; - if(tile > 1) { - /* sample counter is global for all tiles, subtract samples - * from already finished tiles to get sample counter for - * current tile only - */ - if(is_cpu && is_last_tile && num_tiles > 1) { - status_sample = num_samples - (num_samples * num_tiles - progress_sample); - } - else { - status_sample -= (tile - 1) * num_samples; - } - } - - substatus += string_printf(", Sample %d/%d", status_sample, num_samples); + substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples); + } + if(params.use_denoising) { + substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles()); } } else if(tile_manager.num_samples == INT_MAX) - substatus = string_printf("Path Tracing Sample %d", sample+1); + substatus = string_printf("Path Tracing Sample %d", progressive_sample+1); else substatus = string_printf("Path Tracing Sample %d/%d", - sample+1, - tile_manager.get_num_effective_samples()); + progressive_sample+1, + num_samples); if(show_pause) { status = "Paused"; } else if(show_done) { status = "Done"; + progress.set_end_time(); /* Save end time so that further calls to get_time are accurate. */ } else { status = substatus; @@ -896,37 +967,41 @@ void Session::update_status_time(bool show_pause, bool show_done) } progress.set_status(status, substatus); - - /* update timing */ - if(preview_time == 0.0 && resolution == 1) - preview_time = time_dt(); - - double tile_time = (tile == 0 || sample == 0)? 0.0: (time_dt() - preview_time - paused_time) / sample; - - /* negative can happen when we pause a bit before rendering, can discard that */ - if(preview_time < 0.0) preview_time = 0.0; - - progress.set_tile(tile, tile_time); } -void Session::update_progress_sample() +void Session::render() { - progress.increment_sample(); -} + /* Clear buffers. */ + if(buffers && tile_manager.state.sample == 0) { + buffers->zero(device); + } -void Session::path_trace() -{ - /* add path trace task */ - DeviceTask task(DeviceTask::PATH_TRACE); + /* Add path trace task. */ + DeviceTask task(DeviceTask::RENDER); task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); task.release_tile = function_bind(&Session::release_tile, this, _1); + task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); + task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); - task.update_progress_sample = function_bind(&Session::update_progress_sample, this); + task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; task.requested_tile_size = params.tile_size; + task.passes_size = tile_manager.params.get_passes_size(); + + if(params.use_denoising) { + task.denoising_radius = params.denoising_radius; + task.denoising_strength = params.denoising_strength; + task.denoising_feature_strength = params.denoising_feature_strength; + task.denoising_relative_pca = params.denoising_relative_pca; + + assert(!scene->film->need_update); + task.pass_stride = scene->film->pass_stride; + task.pass_denoising_data = scene->film->denoising_data_offset; + task.pass_denoising_clean = scene->film->denoising_clean_offset; + } device->task_add(task); } @@ -971,9 +1046,7 @@ bool Session::update_progressive_refine(bool cancel) } if(params.progressive_refine) { - foreach(RenderBuffers *buffers, tile_buffers) { - RenderTile rtile; - rtile.buffers = buffers; + foreach(RenderTile &rtile, render_tiles) { rtile.sample = sample; if(write) { @@ -982,7 +1055,7 @@ bool Session::update_progressive_refine(bool cancel) } else { if(update_render_tile_cb) - update_render_tile_cb(rtile); + update_render_tile_cb(rtile, true); } } } @@ -996,10 +1069,11 @@ void Session::device_free() { scene->device_free(); - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &tile, render_tiles) + delete tile.buffers; + tile_manager.free_device(); - tile_buffers.clear(); + render_tiles.clear(); /* used from background render only, so no need to * re-create render/display buffers here diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 1db4692e171..980eda0876d 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -17,15 +17,15 @@ #ifndef __SESSION_H__ #define __SESSION_H__ -#include "buffers.h" -#include "device.h" -#include "shader.h" -#include "tile.h" +#include "render/buffers.h" +#include "device/device.h" +#include "render/shader.h" +#include "render/tile.h" -#include "util_progress.h" -#include "util_stats.h" -#include "util_thread.h" -#include "util_vector.h" +#include "util/util_progress.h" +#include "util/util_stats.h" +#include "util/util_thread.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -53,10 +53,17 @@ public: int2 tile_size; TileOrder tile_order; int start_resolution; + int pixel_size; int threads; bool display_buffer_linear; + bool use_denoising; + int denoising_radius; + float denoising_strength; + float denoising_feature_strength; + bool denoising_relative_pca; + double cancel_timeout; double reset_timeout; double text_timeout; @@ -75,8 +82,15 @@ public: samples = INT_MAX; tile_size = make_int2(64, 64); start_resolution = INT_MAX; + pixel_size = 1; threads = 0; + use_denoising = false; + denoising_radius = 8; + denoising_strength = 0.0f; + denoising_feature_strength = 0.0f; + denoising_relative_pca = false; + display_buffer_linear = false; cancel_timeout = 0.1; @@ -98,6 +112,7 @@ public: && experimental == params.experimental && tile_size == params.tile_size && start_resolution == params.start_resolution + && pixel_size == params.pixel_size && threads == params.threads && display_buffer_linear == params.display_buffer_linear && cancel_timeout == params.cancel_timeout @@ -126,7 +141,7 @@ public: Stats stats; function<void(RenderTile&)> write_render_tile_cb; - function<void(RenderTile&)> update_render_tile_cb; + function<void(RenderTile&, bool)> update_render_tile_cb; explicit Session(const SessionParams& params); ~Session(); @@ -141,10 +156,14 @@ public: void set_pause(bool pause); void update_scene(); - void load_kernels(); + void load_kernels(bool lock_scene=true); void device_free(); + /* Returns the rendering progress or 0 if no progress can be determined + * (for example, when rendering with unlimited samples). */ + float get_progress(); + protected: struct DelayedReset { thread_mutex mutex; @@ -158,7 +177,7 @@ protected: void update_status_time(bool show_pause = false, bool show_done = false); void tonemap(int sample); - void path_trace(); + void render(); void reset_(BufferParams& params, int samples); void run_cpu(); @@ -173,7 +192,8 @@ protected: void update_tile_sample(RenderTile& tile); void release_tile(RenderTile& tile); - void update_progress_sample(); + void map_neighbor_tiles(RenderTile *tiles, Device *tile_device); + void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device); bool device_use_gl; @@ -193,17 +213,15 @@ protected: thread_mutex display_mutex; bool kernels_loaded; + DeviceRequestedFeatures loaded_kernel_features; - double start_time; double reset_time; - double preview_time; - double paused_time; /* progressive refine */ double last_update_time; bool update_progressive_refine(bool cancel); - vector<RenderBuffers *> tile_buffers; + vector<RenderTile> render_tiles; DeviceRequestedFeatures get_requested_device_features(); diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index e47bfe71ef9..efdf49f2720 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -14,26 +14,28 @@ * limitations under the License. */ -#include "background.h" -#include "camera.h" -#include "device.h" -#include "graph.h" -#include "integrator.h" -#include "light.h" -#include "mesh.h" -#include "nodes.h" -#include "object.h" -#include "osl.h" -#include "scene.h" -#include "shader.h" -#include "svm.h" -#include "tables.h" - -#include "util_foreach.h" +#include "render/background.h" +#include "render/camera.h" +#include "device/device.h" +#include "render/graph.h" +#include "render/integrator.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/nodes.h" +#include "render/object.h" +#include "render/osl.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/svm.h" +#include "render/tables.h" + +#include "util/util_foreach.h" CCL_NAMESPACE_BEGIN +thread_mutex ShaderManager::lookup_table_mutex; vector<float> ShaderManager::beckmann_table; +bool ShaderManager::beckmann_table_ready = false; /* Beckmann sampling precomputed table, see bsdf_microfacet.h */ @@ -49,6 +51,16 @@ static float beckmann_table_slope_max() return 6.0; } + +/* MSVC 2015 needs this ugly hack to prevent a codegen bug on x86 + * see T50176 for details + */ +#if defined(_MSC_VER) && (_MSC_VER == 1900) +# define MSVC_VOLATILE volatile +#else +# define MSVC_VOLATILE +#endif + /* Paper used: Importance Sampling Microfacet-Based BSDFs with the * Distribution of Visible Normals. Supplemental Material 2/2. * @@ -72,7 +84,7 @@ static void beckmann_table_rows(float *table, int row_from, int row_to) slope_x[0] = (double)-beckmann_table_slope_max(); CDF_P22_omega_i[0] = 0; - for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) { + for(MSVC_VOLATILE int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) { /* slope_x */ slope_x[index_slope_x] = (double)(-beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f)); @@ -116,6 +128,8 @@ static void beckmann_table_rows(float *table, int row_from, int row_to) } } +#undef MSVC_VOLATILE + static void beckmann_table_build(vector<float>& table) { table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE); @@ -165,7 +179,6 @@ Shader::Shader() pass_id = 0; graph = NULL; - graph_bump = NULL; has_surface = false; has_surface_transparent = false; @@ -173,11 +186,13 @@ Shader::Shader() has_surface_bssrdf = false; has_volume = false; has_displacement = false; + has_bump = false; has_bssrdf_bump = false; has_surface_spatial_varying = false; has_volume_spatial_varying = false; has_object_dependency = false; has_integrator_dependency = false; + has_volume_connected = false; displacement_method = DISPLACE_BUMP; @@ -191,7 +206,6 @@ Shader::Shader() Shader::~Shader() { delete graph; - delete graph_bump; } bool Shader::is_constant_emission(float3 *emission) @@ -226,9 +240,11 @@ void Shader::set_graph(ShaderGraph *graph_) /* assign graph */ delete graph; - delete graph_bump; graph = graph_; - graph_bump = NULL; + + /* Store info here before graph optimization to make sure that + * nodes that get optimized away still count. */ + has_volume_connected = (graph->output()->input("Volume")->link != NULL); } void Shader::tag_update(Scene *scene) @@ -319,11 +335,14 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem) (void)shadingsystem; /* Ignored when built without OSL. */ #ifdef WITH_OSL - if(shadingsystem == SHADINGSYSTEM_OSL) + if(shadingsystem == SHADINGSYSTEM_OSL) { manager = new OSLShaderManager(); + } else #endif + { manager = new SVMShaderManager(); + } add_default(scene); @@ -332,6 +351,8 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem) uint ShaderManager::get_attribute_id(ustring name) { + thread_scoped_spin_lock lock(attribute_lock_); + /* get a unique id for each name, for SVM attribute lookup */ AttributeIDMap::iterator it = unique_attribute_id.find(name); @@ -419,34 +440,31 @@ void ShaderManager::device_update_common(Device *device, flag |= SD_HAS_VOLUME; has_volumes = true; - /* in this case we can assume transparent surface */ - if(!shader->has_surface) - flag |= SD_HAS_ONLY_VOLUME; - /* todo: this could check more fine grained, to skip useless volumes * enclosed inside an opaque bsdf. */ flag |= SD_HAS_TRANSPARENT_SHADOW; } + /* in this case we can assume transparent surface */ + if(shader->has_volume_connected && !shader->has_surface) + flag |= SD_HAS_ONLY_VOLUME; if(shader->heterogeneous_volume && shader->has_volume_spatial_varying) flag |= SD_HETEROGENEOUS_VOLUME; if(shader->has_bssrdf_bump) flag |= SD_HAS_BSSRDF_BUMP; - if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR) - flag |= SD_VOLUME_EQUIANGULAR; - if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) - flag |= SD_VOLUME_MIS; + if(device->info.has_volume_decoupled) { + if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR) + flag |= SD_VOLUME_EQUIANGULAR; + if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) + flag |= SD_VOLUME_MIS; + } if(shader->volume_interpolation_method == VOLUME_INTERPOLATION_CUBIC) flag |= SD_VOLUME_CUBIC; - if(shader->graph_bump) + if(shader->has_bump) flag |= SD_HAS_BUMP; if(shader->displacement_method != DISPLACE_BUMP) flag |= SD_HAS_DISPLACEMENT; - /* shader with bump mapping */ - if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump) - flag |= SD_HAS_BSSRDF_BUMP; - /* constant emission check */ float3 constant_emission = make_float3(0.0f, 0.0f, 0.0f); if(shader->is_constant_emission(&constant_emission)) @@ -469,10 +487,11 @@ void ShaderManager::device_update_common(Device *device, /* beckmann lookup table */ if(beckmann_table_offset == TABLE_OFFSET_INVALID) { - if(beckmann_table.size() == 0) { + if(!beckmann_table_ready) { thread_scoped_lock lock(lookup_table_mutex); - if(beckmann_table.size() == 0) { + if(!beckmann_table_ready) { beckmann_table_build(beckmann_table); + beckmann_table_ready = true; } } beckmann_table_offset = scene->lookup_tables->add_table(dscene, beckmann_table); @@ -483,9 +502,7 @@ void ShaderManager::device_update_common(Device *device, KernelIntegrator *kintegrator = &dscene->data.integrator; kintegrator->use_volumes = has_volumes; /* TODO(sergey): De-duplicate with flags set in integrator.cpp. */ - if(scene->integrator->transparent_shadows) { - kintegrator->transparent_shadows = has_transparent_shadow; - } + kintegrator->transparent_shadows = has_transparent_shadow; } void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene) @@ -585,10 +602,16 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph, if(CLOSURE_IS_VOLUME(bsdf_node->closure)) { requested_features->nodes_features |= NODE_FEATURE_VOLUME; } + else if(CLOSURE_IS_PRINCIPLED(bsdf_node->closure)) { + requested_features->use_principled = true; + } } if(node->has_surface_bssrdf()) { requested_features->use_subsurface = true; } + if(node->has_surface_transparent()) { + requested_features->use_transparent = true; + } } } @@ -601,11 +624,6 @@ void ShaderManager::get_requested_features(Scene *scene, Shader *shader = scene->shaders[i]; /* Gather requested features from all the nodes from the graph nodes. */ get_requested_graph_features(shader->graph, requested_features); - /* Gather requested features from the graph itself. */ - if(shader->graph_bump) { - get_requested_graph_features(shader->graph_bump, - requested_features); - } ShaderNode *output_node = shader->graph->output(); if(output_node->input("Displacement")->link != NULL) { requested_features->nodes_features |= NODE_FEATURE_BUMP; diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 7d896652196..3fdcd3c0c5b 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -19,20 +19,20 @@ #ifdef WITH_OSL /* So no context pollution happens from indirectly included windows.h */ -# include "util_windows.h" +# include "util/util_windows.h" # include <OSL/oslexec.h> #endif -#include "attribute.h" -#include "kernel_types.h" +#include "render/attribute.h" +#include "kernel/kernel_types.h" -#include "node.h" +#include "graph/node.h" -#include "util_map.h" -#include "util_param.h" -#include "util_string.h" -#include "util_thread.h" -#include "util_types.h" +#include "util/util_map.h" +#include "util/util_param.h" +#include "util/util_string.h" +#include "util/util_thread.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -82,18 +82,13 @@ enum DisplacementMethod { class Shader : public Node { public: - NODE_DECLARE; + NODE_DECLARE int pass_id; /* shader graph */ ShaderGraph *graph; - /* shader graph with auto bump mapping included, we compile two shaders, - * with and without bump, because the displacement method is a mesh - * level setting, so we need to handle both */ - ShaderGraph *graph_bump; - /* sampling */ bool use_mis; bool use_transparent_shadow; @@ -105,6 +100,15 @@ public: bool need_update; bool need_update_attributes; + /* If the shader has only volume components, the surface is assumed to + * be transparent. + * However, graph optimization might remove the volume subgraph, but + * since the user connected something to the volume output the surface + * should still be transparent. + * Therefore, has_volume_connected stores whether some volume subtree + * was connected before optimization. */ + bool has_volume_connected; + /* information about shader after compiling */ bool has_surface; bool has_surface_emission; @@ -112,6 +116,7 @@ public: bool has_volume; bool has_displacement; bool has_surface_bssrdf; + bool has_bump; bool has_bssrdf_bump; bool has_surface_spatial_varying; bool has_volume_spatial_varying; @@ -195,13 +200,16 @@ protected: typedef unordered_map<ustring, uint, ustringHash> AttributeIDMap; AttributeIDMap unique_attribute_id; - thread_mutex lookup_table_mutex; + static thread_mutex lookup_table_mutex; static vector<float> beckmann_table; + static bool beckmann_table_ready; size_t beckmann_table_offset; void get_requested_graph_features(ShaderGraph *graph, DeviceRequestedFeatures *requested_features); + + thread_spin_lock attribute_lock_; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/sobol.cpp b/intern/cycles/render/sobol.cpp index e3c2e802067..ce93dc8c5d5 100644 --- a/intern/cycles/render/sobol.cpp +++ b/intern/cycles/render/sobol.cpp @@ -46,10 +46,10 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "util_debug.h" -#include "util_types.h" +#include "util/util_debug.h" +#include "util/util_types.h" -#include "sobol.h" +#include "render/sobol.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/sobol.h b/intern/cycles/render/sobol.h index 574f148b9a2..9fbce4e14a5 100644 --- a/intern/cycles/render/sobol.h +++ b/intern/cycles/render/sobol.h @@ -17,7 +17,7 @@ #ifndef __SOBOL_H__ #define __SOBOL_H__ -#include "util_types.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 4a7c4ffc0a0..f06be4a0ca2 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -14,20 +14,20 @@ * limitations under the License. */ -#include "device.h" -#include "graph.h" -#include "light.h" -#include "mesh.h" -#include "nodes.h" -#include "scene.h" -#include "shader.h" -#include "svm.h" - -#include "util_debug.h" -#include "util_logging.h" -#include "util_foreach.h" -#include "util_progress.h" -#include "util_task.h" +#include "device/device.h" +#include "render/graph.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/nodes.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/svm.h" + +#include "util/util_debug.h" +#include "util/util_logging.h" +#include "util/util_foreach.h" +#include "util/util_progress.h" +#include "util/util_task.h" CCL_NAMESPACE_BEGIN @@ -67,18 +67,17 @@ void SVMShaderManager::device_update_shader(Scene *scene, << "Shader name: " << shader->name << "\n" << summary.full_report(); + nodes_lock_.lock(); if(shader->use_mis && shader->has_surface_emission) { scene->light_manager->need_update = true; } - /* We only calculate offset and do re-allocation from the locked block, - * actual copy we do after the lock is releases to hopefully gain some - * percent of performance. + /* The copy needs to be done inside the lock, if another thread resizes the array + * while memcpy is running, it'll be copying into possibly invalid/freed ram. */ - nodes_lock_.lock(); size_t global_nodes_size = global_svm_nodes->size(); global_svm_nodes->resize(global_nodes_size + svm_nodes.size()); - nodes_lock_.unlock(); + /* Offset local SVM nodes to a global address space. */ int4& jump_node = global_svm_nodes->at(shader->id); jump_node.y = svm_nodes[0].y + global_nodes_size - 1; @@ -88,6 +87,7 @@ void SVMShaderManager::device_update_shader(Scene *scene, memcpy(&global_svm_nodes->at(global_nodes_size), &svm_nodes[1], sizeof(int4) * (svm_nodes.size() - 1)); + nodes_lock_.unlock(); } void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) @@ -522,6 +522,9 @@ void SVMCompiler::generate_closure_node(ShaderNode *node, if(node->has_bssrdf_bump()) current_shader->has_bssrdf_bump = true; } + if(node->has_bump()) { + current_shader->has_bump = true; + } } } @@ -800,31 +803,21 @@ void SVMCompiler::compile(Scene *scene, Summary *summary) { /* copy graph for shader with bump mapping */ - ShaderNode *node = shader->graph->output(); + ShaderNode *output = shader->graph->output(); int start_num_svm_nodes = svm_nodes.size(); const double time_start = time_dt(); - if(node->input("Surface")->link && node->input("Displacement")->link) - if(!shader->graph_bump) - shader->graph_bump = shader->graph->copy(); + bool has_bump = (shader->displacement_method != DISPLACE_TRUE) && + output->input("Surface")->link && output->input("Displacement")->link; /* finalize */ { scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL); shader->graph->finalize(scene, - false, - false, - shader->has_integrator_dependency); - } - - if(shader->graph_bump) { - scoped_timer timer((summary != NULL)? &summary->time_finalize_bump: NULL); - shader->graph_bump->finalize(scene, - true, - false, - shader->has_integrator_dependency, - shader->displacement_method == DISPLACE_BOTH); + has_bump, + shader->has_integrator_dependency, + shader->displacement_method == DISPLACE_BOTH); } current_shader = shader; @@ -833,7 +826,8 @@ void SVMCompiler::compile(Scene *scene, shader->has_surface_emission = false; shader->has_surface_transparent = false; shader->has_surface_bssrdf = false; - shader->has_bssrdf_bump = false; + shader->has_bump = has_bump; + shader->has_bssrdf_bump = has_bump; shader->has_volume = false; shader->has_displacement = false; shader->has_surface_spatial_varying = false; @@ -842,9 +836,9 @@ void SVMCompiler::compile(Scene *scene, shader->has_integrator_dependency = false; /* generate bump shader */ - if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump) { + if(has_bump) { scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL); - compile_type(shader, shader->graph_bump, SHADER_TYPE_BUMP); + compile_type(shader, shader->graph, SHADER_TYPE_BUMP); svm_nodes[index].y = svm_nodes.size(); svm_nodes.insert(svm_nodes.end(), current_svm_nodes.begin(), @@ -856,7 +850,7 @@ void SVMCompiler::compile(Scene *scene, scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL); compile_type(shader, shader->graph, SHADER_TYPE_SURFACE); /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */ - if(shader->displacement_method == DISPLACE_TRUE || !shader->graph_bump) { + if(!has_bump) { svm_nodes[index].y = svm_nodes.size(); } svm_nodes.insert(svm_nodes.end(), @@ -898,7 +892,6 @@ SVMCompiler::Summary::Summary() : num_svm_nodes(0), peak_stack_usage(0), time_finalize(0.0), - time_finalize_bump(0.0), time_generate_surface(0.0), time_generate_bump(0.0), time_generate_volume(0.0), @@ -914,10 +907,7 @@ string SVMCompiler::Summary::full_report() const report += string_printf("Peak stack usage: %d\n", peak_stack_usage); report += string_printf("Time (in seconds):\n"); - report += string_printf(" Finalize: %f\n", time_finalize); - report += string_printf(" Bump finalize: %f\n", time_finalize_bump); - report += string_printf("Finalize: %f\n", time_finalize + - time_finalize_bump); + report += string_printf("Finalize: %f\n", time_finalize); report += string_printf(" Surface: %f\n", time_generate_surface); report += string_printf(" Bump: %f\n", time_generate_bump); report += string_printf(" Volume: %f\n", time_generate_volume); diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h index eee8ba4ded8..a8f95a48a0f 100644 --- a/intern/cycles/render/svm.h +++ b/intern/cycles/render/svm.h @@ -17,13 +17,13 @@ #ifndef __SVM_H__ #define __SVM_H__ -#include "attribute.h" -#include "graph.h" -#include "shader.h" +#include "render/attribute.h" +#include "render/graph.h" +#include "render/shader.h" -#include "util_set.h" -#include "util_string.h" -#include "util_thread.h" +#include "util/util_set.h" +#include "util/util_string.h" +#include "util/util_thread.h" CCL_NAMESPACE_BEGIN @@ -75,9 +75,6 @@ public: /* Time spent on surface graph finalization. */ double time_finalize; - /* Time spent on bump graph finalization. */ - double time_finalize_bump; - /* Time spent on generating SVM nodes for surface shader. */ double time_generate_surface; diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index dfafd99961b..bf1ef12d602 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "device.h" -#include "scene.h" -#include "tables.h" +#include "device/device.h" +#include "render/scene.h" +#include "render/tables.h" -#include "util_debug.h" -#include "util_logging.h" +#include "util/util_debug.h" +#include "util/util_logging.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/tables.h b/intern/cycles/render/tables.h index 1bb70b22762..bc261c2a74d 100644 --- a/intern/cycles/render/tables.h +++ b/intern/cycles/render/tables.h @@ -17,7 +17,7 @@ #ifndef __TABLES_H__ #define __TABLES_H__ -#include <util_list.h> +#include "util/util_list.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 3a6dfea11a7..a9620f79fa0 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "tile.h" +#include "render/tile.h" -#include "util_algorithm.h" -#include "util_types.h" +#include "util/util_algorithm.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -25,37 +25,39 @@ namespace { class TileComparator { public: - TileComparator(TileOrder order, int2 center) - : order_(order), - center_(center) + TileComparator(TileOrder order_, int2 center_, Tile *tiles_) + : order(order_), + center(center_), + tiles(tiles_) {} - bool operator()(Tile &a, Tile &b) + bool operator()(int a, int b) { - switch(order_) { + switch(order) { case TILE_CENTER: { - float2 dist_a = make_float2(center_.x - (a.x + a.w/2), - center_.y - (a.y + a.h/2)); - float2 dist_b = make_float2(center_.x - (b.x + b.w/2), - center_.y - (b.y + b.h/2)); + float2 dist_a = make_float2(center.x - (tiles[a].x + tiles[a].w/2), + center.y - (tiles[a].y + tiles[a].h/2)); + float2 dist_b = make_float2(center.x - (tiles[b].x + tiles[b].w/2), + center.y - (tiles[b].y + tiles[b].h/2)); return dot(dist_a, dist_a) < dot(dist_b, dist_b); } case TILE_LEFT_TO_RIGHT: - return (a.x == b.x)? (a.y < b.y): (a.x < b.x); + return (tiles[a].x == tiles[b].x)? (tiles[a].y < tiles[b].y): (tiles[a].x < tiles[b].x); case TILE_RIGHT_TO_LEFT: - return (a.x == b.x)? (a.y < b.y): (a.x > b.x); + return (tiles[a].x == tiles[b].x)? (tiles[a].y < tiles[b].y): (tiles[a].x > tiles[b].x); case TILE_TOP_TO_BOTTOM: - return (a.y == b.y)? (a.x < b.x): (a.y > b.y); + return (tiles[a].y == tiles[b].y)? (tiles[a].x < tiles[b].x): (tiles[a].y > tiles[b].y); case TILE_BOTTOM_TO_TOP: default: - return (a.y == b.y)? (a.x < b.x): (a.y < b.y); + return (tiles[a].y == tiles[b].y)? (tiles[a].x < tiles[b].x): (tiles[a].y < tiles[b].y); } } protected: - TileOrder order_; - int2 center_; + TileOrder order; + int2 center; + Tile *tiles; }; inline int2 hilbert_index_to_pos(int n, int d) @@ -86,16 +88,19 @@ enum SpiralDirection { } /* namespace */ TileManager::TileManager(bool progressive_, int num_samples_, int2 tile_size_, int start_resolution_, - bool preserve_tile_device_, bool background_, TileOrder tile_order_, int num_devices_) + bool preserve_tile_device_, bool background_, TileOrder tile_order_, + int num_devices_, int pixel_size_) { progressive = progressive_; tile_size = tile_size_; tile_order = tile_order_; start_resolution = start_resolution_; + pixel_size = pixel_size_; num_samples = num_samples_; num_devices = num_devices_; preserve_tile_device = preserve_tile_device_; background = background_; + schedule_denoising = false; range_start_sample = 0; range_num_samples = -1; @@ -108,36 +113,73 @@ TileManager::~TileManager() { } -void TileManager::reset(BufferParams& params_, int num_samples_) +void TileManager::free_device() { - params = params_; + if(schedule_denoising) { + for(int i = 0; i < state.tiles.size(); i++) { + delete state.tiles[i].buffers; + state.tiles[i].buffers = NULL; + } + } +} +static int get_divider(int w, int h, int start_resolution) +{ int divider = 1; - int w = params.width, h = params.height; - if(start_resolution != INT_MAX) { while(w*h > start_resolution*start_resolution) { w = max(1, w/2); h = max(1, h/2); - divider *= 2; + divider <<= 1; } } + return divider; +} - num_samples = num_samples_; +void TileManager::reset(BufferParams& params_, int num_samples_) +{ + params = params_; + + set_samples(num_samples_); state.buffer = BufferParams(); state.sample = range_start_sample - 1; state.num_tiles = 0; - state.num_rendered_tiles = 0; state.num_samples = 0; - state.resolution_divider = divider; + state.resolution_divider = get_divider(params.width, params.height, start_resolution); + state.render_tiles.clear(); + state.denoising_tiles.clear(); state.tiles.clear(); } void TileManager::set_samples(int num_samples_) { num_samples = num_samples_; + + /* No real progress indication is possible when using unlimited samples. */ + if(num_samples == INT_MAX) { + state.total_pixel_samples = 0; + } + else { + uint64_t pixel_samples = 0; + /* While rendering in the viewport, the initial preview resolution is increased to the native resolution + * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */ + int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size); + while(divider > pixel_size) { + int image_w = max(1, params.width/divider); + int image_h = max(1, params.height/divider); + pixel_samples += image_w * image_h; + divider >>= 1; + } + + int image_w = max(1, params.width/divider); + int image_h = max(1, params.height/divider); + state.total_pixel_samples = pixel_samples + (uint64_t)get_num_effective_samples() * image_w*image_h; + if(schedule_denoising) { + state.total_pixel_samples += params.width*params.height; + } + } } /* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device. @@ -149,32 +191,36 @@ int TileManager::gen_tiles(bool sliced) int image_h = max(1, params.height/resolution); int2 center = make_int2(image_w/2, image_h/2); - state.tiles.clear(); - int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int slice_num = sliced? num: 1; - int tile_index = 0; + int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); state.tiles.clear(); - state.tiles.resize(num); - vector<list<Tile> >::iterator tile_list = state.tiles.begin(); + state.render_tiles.clear(); + state.denoising_tiles.clear(); + state.render_tiles.resize(num); + state.denoising_tiles.resize(num); + state.tile_stride = tile_w; + vector<list<int> >::iterator tile_list; + tile_list = state.render_tiles.begin(); if(tile_order == TILE_HILBERT_SPIRAL) { assert(!sliced); + int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); + state.tiles.resize(tile_w*tile_h); + /* Size of blocks in tiles, must be a power of 2 */ const int hilbert_size = (max(tile_size.x, tile_size.y) <= 12)? 8: 4; - int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; - int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; - int tiles_per_device = (tile_w * tile_h + num - 1) / num; + int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; int2 block_size = tile_size * make_int2(hilbert_size, hilbert_size); /* Number of blocks to fill the image */ - int blocks_x = (block_size.x >= image_w)? 1: (image_w + block_size.x - 1)/block_size.x; - int blocks_y = (block_size.y >= image_h)? 1: (image_h + block_size.y - 1)/block_size.y; + int blocks_x = (block_size.x >= image_w)? 1: divide_up(image_w, block_size.x); + int blocks_y = (block_size.y >= image_h)? 1: divide_up(image_h, block_size.y); int n = max(blocks_x, blocks_y) | 0x1; /* Side length of the spiral (must be odd) */ /* Offset of spiral (to keep it centered) */ int2 offset = make_int2((image_w - n*block_size.x)/2, (image_h - n*block_size.y)/2); @@ -205,9 +251,11 @@ int TileManager::gen_tiles(bool sliced) if(pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) { int w = min(tile_size.x, image_w - pos.x); int h = min(tile_size.y, image_h - pos.y); - tile_list->push_front(Tile(tile_index, pos.x, pos.y, w, h, cur_device)); + int2 ipos = pos / tile_size; + int idx = ipos.y*tile_w + ipos.x; + state.tiles[idx] = Tile(idx, pos.x, pos.y, w, h, cur_device, Tile::RENDER); + tile_list->push_front(idx); cur_tiles++; - tile_index++; if(cur_tiles == tiles_per_device) { tile_list++; @@ -251,27 +299,28 @@ int TileManager::gen_tiles(bool sliced) break; } } - return tile_index; + return tile_w*tile_h; } + int idx = 0; for(int slice = 0; slice < slice_num; slice++) { int slice_y = (image_h/slice_num)*slice; int slice_h = (slice == slice_num-1)? image_h - slice*(image_h/slice_num): image_h/slice_num; - int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; - int tile_h = (tile_size.y >= slice_h)? 1: (slice_h + tile_size.y - 1)/tile_size.y; + int tile_h = (tile_size.y >= slice_h)? 1: divide_up(slice_h, tile_size.y); - int tiles_per_device = (tile_w * tile_h + num - 1) / num; + int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; for(int tile_y = 0; tile_y < tile_h; tile_y++) { - for(int tile_x = 0; tile_x < tile_w; tile_x++, tile_index++) { + for(int tile_x = 0; tile_x < tile_w; tile_x++, idx++) { int x = tile_x * tile_size.x; int y = tile_y * tile_size.y; int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? slice_h - y: tile_size.y; - tile_list->push_back(Tile(tile_index, x, y + slice_y, w, h, sliced? slice: cur_device)); + state.tiles.push_back(Tile(idx, x, y + slice_y, w, h, sliced? slice: cur_device, Tile::RENDER)); + tile_list->push_back(idx); if(!sliced) { cur_tiles++; @@ -279,7 +328,7 @@ int TileManager::gen_tiles(bool sliced) if(cur_tiles == tiles_per_device) { /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that case. */ if(tile_order != TILE_BOTTOM_TO_TOP) { - tile_list->sort(TileComparator(tile_order, center)); + tile_list->sort(TileComparator(tile_order, center, &state.tiles[0])); } tile_list++; cur_tiles = 0; @@ -293,7 +342,7 @@ int TileManager::gen_tiles(bool sliced) } } - return tile_index; + return idx; } void TileManager::set_tiles() @@ -313,16 +362,111 @@ void TileManager::set_tiles() state.buffer.full_height = max(1, params.full_height/resolution); } -bool TileManager::next_tile(Tile& tile, int device) +int TileManager::get_neighbor_index(int index, int neighbor) +{ + static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; + + int resolution = state.resolution_divider; + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); + int tile_w = (tile_size.x >= image_w)? 1: divide_up(image_w, tile_size.x); + int tile_h = (tile_size.y >= image_h)? 1: divide_up(image_h, tile_size.y); + + int nx = state.tiles[index].x/tile_size.x + dx[neighbor], ny = state.tiles[index].y/tile_size.y + dy[neighbor]; + if(nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h) + return -1; + + return ny*state.tile_stride + nx; +} + +/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state min_state. */ +bool TileManager::check_neighbor_state(int index, Tile::State min_state) +{ + if(index < 0 || state.tiles[index].state < min_state) { + return false; + } + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + /* Out-of-bounds tiles don't matter. */ + if(nindex >= 0 && state.tiles[nindex].state < min_state) { + return false; + } + } + + return true; +} + +/* Returns whether the tile should be written (and freed if no denoising is used) instead of updating. */ +bool TileManager::finish_tile(int index, bool &delete_tile) +{ + delete_tile = false; + + switch(state.tiles[index].state) { + case Tile::RENDER: + { + if(!schedule_denoising) { + state.tiles[index].state = Tile::DONE; + delete_tile = true; + return true; + } + state.tiles[index].state = Tile::RENDERED; + /* For each neighbor and the tile itself, check whether all of its neighbors have been rendered. If yes, it can be denoised. */ + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + if(check_neighbor_state(nindex, Tile::RENDERED)) { + state.tiles[nindex].state = Tile::DENOISE; + state.denoising_tiles[state.tiles[nindex].device].push_back(nindex); + } + } + return false; + } + case Tile::DENOISE: + { + state.tiles[index].state = Tile::DENOISED; + /* For each neighbor and the tile itself, check whether all of its neighbors have been denoised. If yes, it can be freed. */ + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + if(check_neighbor_state(nindex, Tile::DENOISED)) { + state.tiles[nindex].state = Tile::DONE; + /* It can happen that the tile just finished denoising and already can be freed here. + * However, in that case it still has to be written before deleting, so we can't delete it yet. */ + if(neighbor == 8) { + delete_tile = true; + } + else { + delete state.tiles[nindex].buffers; + state.tiles[nindex].buffers = NULL; + } + } + } + return true; + } + default: + assert(false); + return true; + } +} + +bool TileManager::next_tile(Tile* &tile, int device) { int logical_device = preserve_tile_device? device: 0; - if((logical_device >= state.tiles.size()) || state.tiles[logical_device].empty()) + if(logical_device >= state.render_tiles.size()) + return false; + + if(!state.denoising_tiles[logical_device].empty()) { + int idx = state.denoising_tiles[logical_device].front(); + state.denoising_tiles[logical_device].pop_front(); + tile = &state.tiles[idx]; + return true; + } + + if(state.render_tiles[logical_device].empty()) return false; - tile = Tile(state.tiles[logical_device].front()); - state.tiles[logical_device].pop_front(); - state.num_rendered_tiles++; + int idx = state.render_tiles[logical_device].front(); + state.render_tiles[logical_device].pop_front(); + tile = &state.tiles[idx]; return true; } @@ -331,7 +475,7 @@ bool TileManager::done() int end_sample = (range_num_samples == -1) ? num_samples : range_start_sample + range_num_samples; - return (state.resolution_divider == 1) && + return (state.resolution_divider == pixel_size) && (state.sample+state.num_samples >= end_sample); } @@ -340,9 +484,9 @@ bool TileManager::next() if(done()) return false; - if(progressive && state.resolution_divider > 1) { + if(progressive && state.resolution_divider > pixel_size) { state.sample = 0; - state.resolution_divider /= 2; + state.resolution_divider = max(state.resolution_divider/2, pixel_size); state.num_samples = 1; set_tiles(); } @@ -356,7 +500,7 @@ bool TileManager::next() else state.num_samples = range_num_samples; - state.resolution_divider = 1; + state.resolution_divider = pixel_size; set_tiles(); } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index af1b1ed8b0f..4cd57b7b30c 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -19,8 +19,8 @@ #include <limits.h> -#include "buffers.h" -#include "util_list.h" +#include "render/buffers.h" +#include "util/util_list.h" CCL_NAMESPACE_BEGIN @@ -31,12 +31,20 @@ public: int index; int x, y, w, h; int device; + /* RENDER: The tile has to be rendered. + * RENDERED: The tile has been rendered, but can't be denoised yet (waiting for neighbors). + * DENOISE: The tile can be denoised now. + * DENOISED: The tile has been denoised, but can't be freed yet (waiting for neighbors). + * DONE: The tile is finished and has been freed. */ + typedef enum { RENDER = 0, RENDERED, DENOISE, DENOISED, DONE } State; + State state; + RenderBuffers *buffers; Tile() {} - Tile(int index_, int x_, int y_, int w_, int h_, int device_) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_) {} + Tile(int index_, int x_, int y_, int w_, int h_, int device_, State state_ = RENDER) + : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), state(state_), buffers(NULL) {} }; /* Tile order */ @@ -58,27 +66,37 @@ public: BufferParams params; struct State { + vector<Tile> tiles; + int tile_stride; BufferParams buffer; int sample; int num_samples; int resolution_divider; int num_tiles; - int num_rendered_tiles; - /* This vector contains a list of tiles for every logical device in the session. - * In each list, the tiles are sorted according to the tile order setting. */ - vector<list<Tile> > tiles; + + /* Total samples over all pixels: Generally num_samples*num_pixels, + * but can be higher due to the initial resolution division for previews. */ + uint64_t total_pixel_samples; + + /* These lists contain the indices of the tiles to be rendered/denoised and are used + * when acquiring a new tile for the device. + * Each list in each vector is for one logical device. */ + vector<list<int> > render_tiles; + vector<list<int> > denoising_tiles; } state; int num_samples; TileManager(bool progressive, int num_samples, int2 tile_size, int start_resolution, - bool preserve_tile_device, bool background, TileOrder tile_order, int num_devices = 1); + bool preserve_tile_device, bool background, TileOrder tile_order, int num_devices = 1, int pixel_size = 1); ~TileManager(); + void free_device(); void reset(BufferParams& params, int num_samples); void set_samples(int num_samples); bool next(); - bool next_tile(Tile& tile, int device = 0); + bool next_tile(Tile* &tile, int device = 0); + bool finish_tile(int index, bool& delete_tile); bool done(); void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } @@ -91,8 +109,11 @@ public: /* Number to samples in the rendering range. */ int range_num_samples; - /* get number of actual samples to render. */ + /* Get number of actual samples to render. */ int get_num_effective_samples(); + + /* Schedule tiles for denoising after they've been rendered. */ + bool schedule_denoising; protected: void set_tiles(); @@ -101,6 +122,7 @@ protected: int2 tile_size; TileOrder tile_order; int start_resolution; + int pixel_size; int num_devices; /* in some cases it is important that the same tile will be returned for the same @@ -124,6 +146,9 @@ protected: /* Generate tile list, return number of tiles. */ int gen_tiles(bool sliced); + + int get_neighbor_index(int index, int neighbor); + bool check_neighbor_state(int index, Tile::State state); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/volume.cpp b/intern/cycles/render/volume.cpp index c677f1df576..e7d1d7d6595 100644 --- a/intern/cycles/render/volume.cpp +++ b/intern/cycles/render/volume.cpp @@ -17,10 +17,10 @@ #include "scene.h" #include "volume.h" -#include "util_foreach.h" -#include "util_logging.h" -#include "util_progress.h" -#include "util_task.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_progress.h" +#include "util/util_task.h" #include "../kernel/openvdb/vdb_globals.h" diff --git a/intern/cycles/render/volume.h b/intern/cycles/render/volume.h index 0d706629685..3dcafab6995 100644 --- a/intern/cycles/render/volume.h +++ b/intern/cycles/render/volume.h @@ -19,8 +19,8 @@ #include "attribute.h" -#include "util_string.h" -#include "util_types.h" +#include "util/util_string.h" +#include "util/util_types.h" #ifdef WITH_OPENVDB #include <openvdb/openvdb.h> |