diff options
Diffstat (limited to 'intern/cycles/render')
-rw-r--r-- | intern/cycles/render/background.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/bake.cpp | 128 | ||||
-rw-r--r-- | intern/cycles/render/bake.h | 16 | ||||
-rw-r--r-- | intern/cycles/render/camera.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/camera.h | 3 | ||||
-rw-r--r-- | intern/cycles/render/film.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 27 | ||||
-rw-r--r-- | intern/cycles/render/image.h | 1 | ||||
-rw-r--r-- | intern/cycles/render/integrator.cpp | 15 | ||||
-rw-r--r-- | intern/cycles/render/integrator.h | 3 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 17 | ||||
-rw-r--r-- | intern/cycles/render/light.h | 1 | ||||
-rw-r--r-- | intern/cycles/render/mesh.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/render/mesh.h | 3 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/osl.cpp | 13 | ||||
-rw-r--r-- | intern/cycles/render/scene.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/svm.cpp | 67 | ||||
-rw-r--r-- | intern/cycles/render/svm.h | 12 |
19 files changed, 231 insertions, 100 deletions
diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp index a877c52fbed..3926ecb99d6 100644 --- a/intern/cycles/render/background.cpp +++ b/intern/cycles/render/background.cpp @@ -78,6 +78,8 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene kbackground->surface_shader |= SHADER_EXCLUDE_GLOSSY; if(!(visibility & PATH_RAY_TRANSMIT)) kbackground->surface_shader |= SHADER_EXCLUDE_TRANSMIT; + if(!(visibility & PATH_RAY_VOLUME_SCATTER)) + kbackground->surface_shader |= SHADER_EXCLUDE_SCATTER; if(!(visibility & PATH_RAY_CAMERA)) kbackground->surface_shader |= SHADER_EXCLUDE_CAMERA; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index c68f6e1f08e..5723a22dd84 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN -BakeData::BakeData(const int object, const int tri_offset, const int num_pixels): +BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels): m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels) @@ -60,7 +60,7 @@ int BakeData::object() return m_object; } -int BakeData::size() +size_t BakeData::size() { return m_num_pixels; } @@ -95,6 +95,7 @@ BakeManager::BakeManager() m_bake_data = NULL; m_is_baking = false; need_update = true; + m_shader_limit = 512 * 512; } BakeManager::~BakeManager() @@ -113,76 +114,105 @@ void BakeManager::set_baking(const bool value) m_is_baking = value; } -BakeData *BakeManager::init(const int object, const int tri_offset, const int num_pixels) +BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels) { m_bake_data = new BakeData(object, tri_offset, num_pixels); return m_bake_data; } +void BakeManager::set_shader_limit(const size_t x, const size_t y) +{ + m_shader_limit = x * y; + m_shader_limit = (size_t)pow(2, ceil(log(m_shader_limit)/log(2))); +} + bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) { - size_t limit = bake_data->size(); + size_t num_pixels = bake_data->size(); - /* setup input for device task */ - device_vector<uint4> d_input; - uint4 *d_input_data = d_input.resize(limit * 2); - size_t d_input_size = 0; + progress.reset_sample(); + this->num_parts = 0; - for(size_t i = 0; i < limit; i++) { - d_input_data[d_input_size++] = bake_data->data(i); - d_input_data[d_input_size++] = bake_data->differentials(i); + /* calculate the total parts for the progress bar */ + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { + size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); + + DeviceTask task(DeviceTask::SHADER); + task.shader_w = shader_size; + + this->num_parts += device->get_split_task_count(task); } - if(d_input_size == 0) - return false; + this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; - /* run device task */ - device_vector<float4> d_output; - d_output.resize(limit); + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { + size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - /* needs to be up to data for attribute access */ - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + /* setup input for device task */ + device_vector<uint4> d_input; + uint4 *d_input_data = d_input.resize(shader_size * 2); + size_t d_input_size = 0; - device->mem_alloc(d_input, MEM_READ_ONLY); - device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + for(size_t i = shader_offset; i < (shader_offset + shader_size); i++) { + d_input_data[d_input_size++] = bake_data->data(i); + d_input_data[d_input_size++] = bake_data->differentials(i); + } - DeviceTask task(DeviceTask::SHADER); - task.shader_input = d_input.device_pointer; - task.shader_output = d_output.device_pointer; - task.shader_eval_type = shader_type; - task.shader_x = 0; - task.shader_w = d_output.size(); - task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; - task.get_cancel = function_bind(&Progress::get_cancel, &progress); + if(d_input_size == 0) { + m_is_baking = false; + return false; + } - device->task_add(task); - device->task_wait(); + /* run device task */ + device_vector<float4> d_output; + d_output.resize(shader_size); + + /* needs to be up to data for attribute access */ + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + + device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_copy_to(d_input); + device->mem_alloc(d_output, MEM_WRITE_ONLY); + + DeviceTask task(DeviceTask::SHADER); + task.shader_input = d_input.device_pointer; + task.shader_output = d_output.device_pointer; + task.shader_eval_type = shader_type; + task.shader_x = 0; + task.offset = shader_offset; + task.shader_w = d_output.size(); + task.num_samples = this->num_samples; + task.get_cancel = function_bind(&Progress::get_cancel, &progress); + task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); + + device->task_add(task); + device->task_wait(); + + if(progress.get_cancel()) { + device->mem_free(d_input); + device->mem_free(d_output); + m_is_baking = false; + return false; + } - if(progress.get_cancel()) { + device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); device->mem_free(d_input); device->mem_free(d_output); - m_is_baking = false; - return false; - } - - device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); - device->mem_free(d_input); - device->mem_free(d_output); - /* read result */ - int k = 0; + /* read result */ + int k = 0; - float4 *offset = (float4*)d_output.data_pointer; + float4 *offset = (float4*)d_output.data_pointer; - size_t depth = 4; - for(size_t i = 0; i < limit; i++) { - size_t index = i * depth; - float4 out = offset[k++]; + size_t depth = 4; + for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) { + size_t index = i * depth; + float4 out = offset[k++]; - if(bake_data->is_valid(i)) { - for(size_t j=0; j < 4; j++) { - result[index + j] = out[j]; + if(bake_data->is_valid(i)) { + for(size_t j=0; j < 4; j++) { + result[index + j] = out[j]; + } } } } diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index f91ba589b8b..186fbbeea4d 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -27,20 +27,20 @@ CCL_NAMESPACE_BEGIN class BakeData { public: - BakeData(const int object, const int tri_offset, const int num_pixels); + BakeData(const int object, const size_t tri_offset, const size_t num_pixels); ~BakeData(); void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy); int object(); - int size(); + size_t size(); uint4 data(int i); uint4 differentials(int i); bool is_valid(int i); private: int m_object; - int m_tri_offset; - int m_num_pixels; + size_t m_tri_offset; + size_t m_num_pixels; vector<int>m_primitive; vector<float>m_u; vector<float>m_v; @@ -58,7 +58,9 @@ public: bool get_baking(); void set_baking(const bool value); - BakeData *init(const int object, const int tri_offset, const int num_pixels); + BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels); + + void set_shader_limit(const size_t x, const size_t y); bool bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]); @@ -70,9 +72,13 @@ public: bool need_update; + int num_samples; + int num_parts; + private: BakeData *m_bake_data; bool m_is_baking; + size_t m_shader_limit; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 8659fe4f7a3..bb0fec759a9 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -38,6 +38,8 @@ Camera::Camera() motion.post = transform_identity(); use_motion = false; + aperture_ratio = 1.0f; + type = CAMERA_PERSPECTIVE; panorama_type = PANORAMA_EQUIRECTANGULAR; fisheye_fov = M_PI_F; @@ -241,6 +243,9 @@ void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) /* type */ kcam->type = type; + /* anamorphic lens bokeh */ + kcam->inv_aperture_ratio = 1.0f / aperture_ratio; + /* panorama */ kcam->panorama_type = panorama_type; kcam->fisheye_fov = fisheye_fov; @@ -291,6 +296,7 @@ bool Camera::modified(const Camera& cam) (viewplane == cam.viewplane) && (border == cam.border) && (matrix == cam.matrix) && + (aperture_ratio == cam.aperture_ratio) && (panorama_type == cam.panorama_type) && (fisheye_fov == cam.fisheye_fov) && (fisheye_lens == cam.fisheye_lens)); diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index c28670bc55f..50889968a90 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -54,6 +54,9 @@ public: float fisheye_fov; float fisheye_lens; + /* anamorphic lens bokeh */ + float aperture_ratio; + /* sensor */ float sensorwidth; float sensorheight; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index c1aefbcfbbc..09973e8bc86 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -80,22 +80,13 @@ void Pass::add(PassType type, vector<Pass>& passes) pass.components = 1; break; case PASS_OBJECT_ID: - pass.components = 1; - pass.filter = false; - break; case PASS_MATERIAL_ID: pass.components = 1; pass.filter = false; break; case PASS_DIFFUSE_COLOR: - pass.components = 4; - break; case PASS_GLOSSY_COLOR: - pass.components = 4; - break; case PASS_TRANSMISSION_COLOR: - pass.components = 4; - break; case PASS_SUBSURFACE_COLOR: pass.components = 4; break; @@ -141,9 +132,6 @@ void Pass::add(PassType type, vector<Pass>& passes) break; case PASS_EMISSION: - pass.components = 4; - pass.exposure = true; - break; case PASS_BACKGROUND: pass.components = 4; pass.exposure = true; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 8369df5e137..076cc3d8b63 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -135,6 +135,7 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo (colorspace == "" && (strcmp(in->format_name(), "png") == 0 || strcmp(in->format_name(), "tiff") == 0 || + strcmp(in->format_name(), "dpx") == 0 || strcmp(in->format_name(), "jpeg2000") == 0))); } else { @@ -313,6 +314,32 @@ void ImageManager::remove_image(const string& filename, void *builtin_data, Inte } } +/* TODO(sergey): Deduplicate with the iteration above, but make it pretty, + * without bunch of arguments passing around making code readability even + * more cluttered. + */ +void ImageManager::tag_reload_image(const string& filename, void *builtin_data, InterpolationType interpolation) +{ + size_t slot; + + for(slot = 0; slot < images.size(); slot++) { + if(images[slot] && image_equals(images[slot], filename, builtin_data, interpolation)) { + images[slot]->need_load = true; + break; + } + } + + if(slot == images.size()) { + /* see if it's in a float texture slot */ + for(slot = 0; slot < float_images.size(); slot++) { + if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) { + float_images[slot]->need_load = true; + break; + } + } + } +} + bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) { if(img->filename == "") diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 50ea346c034..535f0ff156d 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -59,6 +59,7 @@ public: bool& is_float, bool& is_linear, InterpolationType interpolation, bool use_alpha); void remove_image(int slot); void remove_image(const string& filename, void *builtin_data, InterpolationType interpolation); + void tag_reload_image(const string& filename, void *builtin_data, InterpolationType interpolation); bool is_float_image(const string& filename, void *builtin_data, bool& is_linear); void device_update(Device *device, DeviceScene *dscene, Progress& progress); diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 4a8b490b1ad..b7a87ac14da 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -43,7 +43,8 @@ Integrator::Integrator() volume_max_steps = 1024; volume_step_size = 0.1f; - no_caustics = false; + caustics_reflective = true; + caustics_refractive = true; filter_glossy = 0.0f; seed = 0; layer_flag = ~0; @@ -86,11 +87,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1; kintegrator->max_glossy_bounce = max_glossy_bounce + 1; kintegrator->max_transmission_bounce = max_transmission_bounce + 1; - - if(kintegrator->use_volumes) - kintegrator->max_volume_bounce = max_volume_bounce + 1; - else - kintegrator->max_volume_bounce = 1; + kintegrator->max_volume_bounce = max_volume_bounce + 1; kintegrator->transparent_max_bounce = transparent_max_bounce + 1; kintegrator->transparent_min_bounce = transparent_min_bounce + 1; @@ -104,7 +101,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->volume_max_steps = volume_max_steps; kintegrator->volume_step_size = volume_step_size; - kintegrator->no_caustics = no_caustics; + kintegrator->caustics_reflective = caustics_reflective; + kintegrator->caustics_refractive = caustics_refractive; kintegrator->filter_glossy = (filter_glossy == 0.0f)? FLT_MAX: 1.0f/filter_glossy; kintegrator->seed = hash_int(seed); @@ -183,7 +181,8 @@ bool Integrator::modified(const Integrator& integrator) volume_homogeneous_sampling == integrator.volume_homogeneous_sampling && volume_max_steps == integrator.volume_max_steps && volume_step_size == integrator.volume_step_size && - no_caustics == integrator.no_caustics && + caustics_reflective == integrator.caustics_reflective && + caustics_refractive == integrator.caustics_refractive && filter_glossy == integrator.filter_glossy && layer_flag == integrator.layer_flag && seed == integrator.seed && diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 380c1a65722..13c10e8ca94 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -43,7 +43,8 @@ public: int volume_max_steps; float volume_step_size; - bool no_caustics; + bool caustics_reflective; + bool caustics_refractive; float filter_glossy; int seed; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 9a0a7ead696..1f006637e67 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -121,6 +121,7 @@ Light::Light() use_diffuse = true; use_glossy = true; use_transmission = true; + use_scatter = true; shader = 0; samples = 1; @@ -206,8 +207,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen } /* skip motion blurred deforming meshes, not supported yet */ - if(mesh->has_motion_blur()) + if(mesh->has_motion_blur()) { + j++; continue; + } /* skip if we have no emission shaders */ foreach(uint sindex, mesh->used_shaders) { @@ -241,6 +244,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen shader_flag |= SHADER_EXCLUDE_TRANSMIT; use_light_visibility = true; } + if(!(object->visibility & PATH_RAY_VOLUME_SCATTER)) { + shader_flag |= SHADER_EXCLUDE_SCATTER; + use_light_visibility = true; + } for(size_t i = 0; i < mesh->triangles.size(); i++) { Shader *shader = scene->shaders[mesh->shader[i]]; @@ -498,6 +505,10 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce shader_id |= SHADER_EXCLUDE_TRANSMIT; use_light_visibility = true; } + if(!light->use_scatter) { + shader_id |= SHADER_EXCLUDE_SCATTER; + use_light_visibility = true; + } if(light->type == LIGHT_POINT) { shader_id &= ~SHADER_AREA_LIGHT; @@ -552,6 +563,10 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce shader_id |= SHADER_EXCLUDE_TRANSMIT; use_light_visibility = true; } + if(!(visibility & PATH_RAY_VOLUME_SCATTER)) { + shader_id |= SHADER_EXCLUDE_SCATTER; + use_light_visibility = true; + } light_data[i*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), 0.0f, 0.0f, 0.0f); light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h index 82308cf3e88..89091bb5f9e 100644 --- a/intern/cycles/render/light.h +++ b/intern/cycles/render/light.h @@ -54,6 +54,7 @@ public: bool use_diffuse; bool use_glossy; bool use_transmission; + bool use_scatter; int shader; int samples; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 273443034c5..8299cd02fef 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -132,6 +132,7 @@ void Mesh::clear() transform_applied = false; transform_negative_scaled = false; transform_normal = transform_identity(); + geometry_synced = false; } int Mesh::split_vertex(int vertex) diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 5ee774bacc1..d45905611fa 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -71,6 +71,9 @@ public: ustring name; /* Mesh Data */ + bool geometry_synced; /* used to distinguish meshes with no verts + and meshed for which geometry is not created */ + vector<float3> verts; vector<Triangle> triangles; vector<uint> shader; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 027bfd71931..1f148d34ea6 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -449,6 +449,8 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u } object_flag[i] |= SD_TRANSFORM_APPLIED; + if(object->mesh->transform_negative_scaled) + object_flag[i] |= SD_NEGATIVE_SCALE_APPLIED; } else have_instancing = true; diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 94866102f60..f57e16471a1 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -248,20 +248,27 @@ void OSLShaderManager::shading_system_free() bool OSLShaderManager::osl_compile(const string& inputfile, const string& outputfile) { - vector<string> options; +#if OSL_LIBRARY_VERSION_CODE < 10500 + typedef string string_view; +#endif + + vector<string_view> options; string stdosl_path; + string shader_path = path_get("shader"); /* specify output file name */ options.push_back("-o"); options.push_back(outputfile); /* specify standard include path */ - options.push_back("-I" + path_get("shader")); + options.push_back("-I"); + options.push_back(shader_path); + stdosl_path = path_get("shader/stdosl.h"); /* compile */ OSL::OSLCompiler *compiler = OSL::OSLCompiler::create(); - bool ok = compiler->compile(inputfile, options, stdosl_path); + bool ok = compiler->compile(string_view(inputfile), options, string_view(stdosl_path)); delete compiler; return ok; diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index cf0f0bc1055..796007b64a8 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -141,7 +141,7 @@ void Scene::device_update(Device *device_, Progress& progress) * the different managers, using data computed by previous managers. * * - Image manager uploads images used by shaders. - * - Camera may be used for adapative subdivison. + * - Camera may be used for adaptive subdivision. * - Displacement shader must have all shader data available. * - Light manager needs lookup tables and final mesh data to compute emission CDF. * - Film needs light manager to run for use_light_visibility diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 576c176759c..13c63d9420c 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -363,14 +363,17 @@ bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input) return false; } -void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input) +void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, + const set<ShaderNode*>& done, + ShaderInput *input, + ShaderNode *skip_node) { ShaderNode *node = (input->link)? input->link->parent: NULL; - if(node && done.find(node) == done.end()) { + if(node && done.find(node) == done.end() && node != skip_node) { foreach(ShaderInput *in, node->inputs) if(!node_skip_input(node, in)) - find_dependencies(dependencies, done, in); + find_dependencies(dependencies, done, in, skip_node); dependencies.insert(node); } @@ -459,20 +462,28 @@ void SVMCompiler::generate_closure_node(ShaderNode *node, set<ShaderNode*>& done } } -void SVMCompiler::generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared) +void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node, + ShaderNode *node, + set<ShaderNode*>& done, + set<ShaderNode*>& closure_done, + const set<ShaderNode*>& shared) { if(shared.find(node) != shared.end()) { - generate_multi_closure(node, done, closure_done); + generate_multi_closure(root_node, node, done, closure_done); } else { foreach(ShaderInput *in, node->inputs) { if(in->type == SHADER_SOCKET_CLOSURE && in->link) - generated_shared_closure_nodes(in->link->parent, done, closure_done, shared); + generated_shared_closure_nodes(root_node, in->link->parent, + done, closure_done, shared); } } } -void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done) +void SVMCompiler::generate_multi_closure(ShaderNode *root_node, + ShaderNode *node, + set<ShaderNode*>& done, + set<ShaderNode*>& closure_done) { /* only generate once */ if(closure_done.find(node) != closure_done.end()) @@ -509,12 +520,33 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don set_intersection(cl1deps.begin(), cl1deps.end(), cl2deps.begin(), cl2deps.end(), std::inserter(shareddeps, shareddeps.begin())); - + + /* it's possible some nodes are not shared between this mix node + * inputs, but still needed to be always executed, this mainly + * happens when a node of current subbranch is used by a parent + * node or so */ + if(root_node != node) { + foreach(ShaderInput *in, root_node->inputs) { + set<ShaderNode*> rootdeps; + find_dependencies(rootdeps, done, in, node); + set_intersection(rootdeps.begin(), rootdeps.end(), + cl1deps.begin(), cl1deps.end(), + std::inserter(shareddeps, shareddeps.begin())); + set_intersection(rootdeps.begin(), rootdeps.end(), + cl2deps.begin(), cl2deps.end(), + std::inserter(shareddeps, shareddeps.begin())); + } + } + if(!shareddeps.empty()) { - if(cl1in->link) - generated_shared_closure_nodes(cl1in->link->parent, done, closure_done, shareddeps); - if(cl2in->link) - generated_shared_closure_nodes(cl2in->link->parent, done, closure_done, shareddeps); + if(cl1in->link) { + generated_shared_closure_nodes(root_node, cl1in->link->parent, + done, closure_done, shareddeps); + } + if(cl2in->link) { + generated_shared_closure_nodes(root_node, cl2in->link->parent, + done, closure_done, shareddeps); + } generate_svm_nodes(shareddeps, done); } @@ -525,7 +557,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE, 0, facin->stack_offset, 0)); int node_jump_skip_index = svm_nodes.size() - 1; - generate_multi_closure(cl1in->link->parent, done, closure_done); + generate_multi_closure(root_node, cl1in->link->parent, done, closure_done); /* fill in jump instruction location to be after closure */ svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1; @@ -537,7 +569,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO, 0, facin->stack_offset, 0)); int node_jump_skip_index = svm_nodes.size() - 1; - generate_multi_closure(cl2in->link->parent, done, closure_done); + generate_multi_closure(root_node, cl2in->link->parent, done, closure_done); /* fill in jump instruction location to be after closure */ svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1; @@ -551,9 +583,9 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don * to skip closures here because was already optimized due to * fixed weight or add closure that always needs both */ if(cl1in->link) - generate_multi_closure(cl1in->link->parent, done, closure_done); + generate_multi_closure(root_node, cl1in->link->parent, done, closure_done); if(cl2in->link) - generate_multi_closure(cl2in->link->parent, done, closure_done); + generate_multi_closure(root_node, cl2in->link->parent, done, closure_done); } } else { @@ -638,7 +670,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty if(generate) { set<ShaderNode*> done, closure_done; - generate_multi_closure(clin->link->parent, done, closure_done); + generate_multi_closure(clin->link->parent, clin->link->parent, + done, closure_done); } } diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h index 45aa4d26926..c1dd96e4d80 100644 --- a/intern/cycles/render/svm.h +++ b/intern/cycles/render/svm.h @@ -123,15 +123,21 @@ protected: /* single closure */ void find_dependencies(set<ShaderNode*>& dependencies, - const set<ShaderNode*>& done, ShaderInput *input); + const set<ShaderNode*>& done, + ShaderInput *input, + ShaderNode *skip_node = NULL); void generate_node(ShaderNode *node, set<ShaderNode*>& done); void generate_closure_node(ShaderNode *node, set<ShaderNode*>& done); - void generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done, + void generated_shared_closure_nodes(ShaderNode *root_node, ShaderNode *node, + set<ShaderNode*>& done, set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared); void generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done); /* multi closure */ - void generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done); + void generate_multi_closure(ShaderNode *root_node, + ShaderNode *node, + set<ShaderNode*>& done, + set<ShaderNode*>& closure_done); /* compile */ void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type); |