diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/blender/blender_mesh.cpp | 9 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh_build.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh_split.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 2 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/mesh.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 338 | ||||
-rw-r--r-- | intern/cycles/render/object.h | 60 | ||||
-rw-r--r-- | intern/cycles/render/osl.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/particles.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/scene.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/render/scene.h | 5 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/svm.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/tables.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_guarded_allocator.h | 35 | ||||
-rw-r--r-- | intern/cycles/util/util_stack_allocator.h | 25 | ||||
-rw-r--r-- | intern/cycles/util/util_vector.h | 6 |
19 files changed, 387 insertions, 164 deletions
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index 1f0aa5eef34..55ef913408f 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -657,7 +657,7 @@ static void create_mesh(Scene *scene, static void create_subd_mesh(Scene *scene, Mesh *mesh, - BL::Object b_ob, + BL::Object& b_ob, BL::Mesh& b_mesh, PointerRNA *cmesh, const vector<uint>& used_shaders, @@ -976,7 +976,12 @@ void BlenderSync::sync_mesh_motion(BL::Object& b_ob, memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0) { /* no motion, remove attributes again */ - VLOG(1) << "No actual deformation motion for object " << b_ob.name(); + if(b_mesh.vertices.length() != numverts) { + VLOG(1) << "Topology differs, disabling motion blur."; + } + else { + VLOG(1) << "No actual deformation motion for object " << b_ob.name(); + } mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); if(attr_mN) mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL); diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 9e63485c04e..5c474c8c3e9 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -327,6 +327,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) data.x += prim_offset; data.y += prim_offset; pack_leaf_nodes[pack_leaf_nodes_offset] = data; + for(int j = 1; j < nsize_leaf; ++j) { + pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j]; + } pack_leaf_nodes_offset += nsize_leaf; } } diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index a0b09c780ce..bba89a8f35c 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -617,7 +617,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, BoundBox::empty, BoundBox::empty}; int ob_num = 0; - + int num_new_prims = 0; /* Fill in per-type type/index array. */ for(int i = 0; i < range.size(); i++) { const BVHReference& ref = references[range.start() + i]; @@ -629,10 +629,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, bounds[type_index].grow(ref.bounds()); visibility[type_index] |= objects[ref.prim_object()]->visibility; + ++num_new_prims; } else { object_references.push_back(ref); - ob_num++; + ++ob_num; } } @@ -651,11 +652,11 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object; + local_prim_type.resize(num_new_prims); + local_prim_index.resize(num_new_prims); + local_prim_object.resize(num_new_prims); for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) { int num = (int)p_type[i].size(); - local_prim_type.resize(start_index + num); - local_prim_index.resize(start_index + num); - local_prim_object.resize(start_index + num); if(num != 0) { assert(p_type[i].size() == p_index[i].size()); assert(p_type[i].size() == p_object[i].size()); diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp index 9185bd99d10..8084975565e 100644 --- a/intern/cycles/bvh/bvh_split.cpp +++ b/intern/cycles/bvh/bvh_split.cpp @@ -44,6 +44,8 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHReference *ref_ptr = &references_->at(range.start()); float min_sah = FLT_MAX; + storage_->right_bounds.resize(range.size()); + for(int dim = 0; dim < 3; dim++) { /* Sort references. */ bvh_reference_sort(range.start(), @@ -53,8 +55,6 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, /* sweep right to left and determine bounds. */ BoundBox right_bounds = BoundBox::empty; - - storage_->right_bounds.resize(range.size()); for(int i = range.size() - 1; i > 0; i--) { right_bounds.grow(ref_ptr[i].bounds()); storage_->right_bounds[i - 1] = right_bounds; @@ -157,11 +157,10 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder, } /* select best split plane. */ + storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS); for(int dim = 0; dim < 3; dim++) { /* sweep right to left and determine bounds. */ BoundBox right_bounds = BoundBox::empty; - - storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS); for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { right_bounds.grow(storage_->bins[dim][i].bounds); storage_->right_bounds[i - 1] = right_bounds; diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 8e7a2c1b62b..d0ca256f323 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -303,7 +303,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, /* dummy initilizations copied from SHADER_EVAL_DISPLACE */ float3 I = Ng; - float t = 0.0f; + float t = 1.0f; float time = TIME_INVALID; /* light passes */ diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 1637045ce84..fef28b25f3e 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -771,11 +771,11 @@ void LightManager::device_update_points(Device *device, void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->lights.size() << " lights."; - if(!need_update) return; + VLOG(1) << "Total " << scene->lights.size() << " lights."; + device_free(device, dscene); use_light_visibility = false; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 241a1c44ebf..cc8519219ed 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -528,7 +528,7 @@ void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total delete bvh; bvh = BVH::create(bparams, objects); - bvh->build(*progress); + MEM_GUARDED_CALL(progress, bvh->build, *progress); } } @@ -1232,11 +1232,11 @@ void MeshManager::device_update_displacement_images(Device *device, void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->meshes.size() << " meshes."; - if(!need_update) return; + VLOG(1) << "Total " << scene->meshes.size() << " meshes."; + /* update normals */ foreach(Mesh *mesh, scene->meshes) { foreach(uint shader, mesh->used_shaders) { diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 42bb665cb9f..a7ea75820ea 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -231,160 +231,260 @@ ObjectManager::~ObjectManager() { } -void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress) +void ObjectManager::device_update_object_transform(UpdateObejctTransformState *state, + Object *ob, + int object_index) { - float4 *objects; - float4 *objects_vector = NULL; - int i = 0; - map<Mesh*, float> surface_area_map; - map<ParticleSystem*, int> particle_offset; - Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); - bool have_motion = false; - bool have_curves = false; - - objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); - if(need_motion == Scene::MOTION_PASS) - objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); - - /* particle system device offsets - * 0 is dummy particle, index starts at 1 + float4 *objects = state->objects; + float4 *objects_vector = state->objects_vector; + + Mesh *mesh = ob->mesh; + uint flag = 0; + + /* Compute transformations. */ + Transform tfm = ob->tfm; + Transform itfm = transform_inverse(tfm); + + /* Compute surface area. for uniform scale we can do avoid the many + * transform calls and share computation for instances. + * + * TODO(brecht): Correct for displacement, and move to a better place. */ - int numparticles = 1; - foreach(ParticleSystem *psys, scene->particle_systems) { - particle_offset[psys] = numparticles; - numparticles += psys->particles.size(); - } + float uniform_scale; + float surface_area = 0.0f; + float pass_id = ob->pass_id; + float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF); + int particle_index = (ob->particle_system) + ? ob->particle_index + state->particle_offset[ob->particle_system] + : 0; + + if(transform_uniform_scale(tfm, uniform_scale)) { + map<Mesh*, float>::iterator it; + + /* NOTE: This isn't fully optimal and could in theory lead to multiple + * threads calculating area of the same mesh in parallel. However, this + * also prevents suspending all the threads when some mesh's area is + * not yet known. + */ + state->surface_area_lock.lock(); + it = state->surface_area_map.find(mesh); + state->surface_area_lock.unlock(); - foreach(Object *ob, scene->objects) { - Mesh *mesh = ob->mesh; - uint flag = 0; - - /* compute transformations */ - Transform tfm = ob->tfm; - Transform itfm = transform_inverse(tfm); - - /* compute surface area. for uniform scale we can do avoid the many - * transform calls and share computation for instances */ - /* todo: correct for displacement, and move to a better place */ - float uniform_scale; - float surface_area = 0.0f; - float pass_id = ob->pass_id; - float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF); - int particle_index = (ob->particle_system)? ob->particle_index + particle_offset[ob->particle_system]: 0; - - if(transform_uniform_scale(tfm, uniform_scale)) { - map<Mesh*, float>::iterator it = surface_area_map.find(mesh); - - if(it == surface_area_map.end()) { - foreach(Mesh::Triangle& t, mesh->triangles) { - float3 p1 = mesh->verts[t.v[0]]; - float3 p2 = mesh->verts[t.v[1]]; - float3 p3 = mesh->verts[t.v[2]]; - - surface_area += triangle_area(p1, p2, p3); - } + if(it == state->surface_area_map.end()) { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = mesh->verts[t.v[0]]; + float3 p2 = mesh->verts[t.v[1]]; + float3 p3 = mesh->verts[t.v[2]]; - surface_area_map[mesh] = surface_area; + surface_area += triangle_area(p1, p2, p3); } - else - surface_area = it->second; - surface_area *= uniform_scale; + state->surface_area_lock.lock(); + state->surface_area_map[mesh] = surface_area; + state->surface_area_lock.unlock(); } else { - foreach(Mesh::Triangle& t, mesh->triangles) { - float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]); - float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]); - float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]); + surface_area = it->second; + } - surface_area += triangle_area(p1, p2, p3); - } + surface_area *= uniform_scale; + } + else { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]); + float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]); + float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]); + + surface_area += triangle_area(p1, p2, p3); } + } - /* pack in texture */ - int offset = i*OBJECT_SIZE; - - /* OBJECT_TRANSFORM */ - memcpy(&objects[offset], &tfm, sizeof(float4)*3); - /* OBJECT_INVERSE_TRANSFORM */ - memcpy(&objects[offset+4], &itfm, sizeof(float4)*3); - /* OBJECT_PROPERTIES */ - objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); - - if(need_motion == Scene::MOTION_PASS) { - /* motion transformations, is world/object space depending if mesh - * comes with deformed position in object space, or if we transform - * the shading point in world space */ - Transform mtfm_pre = ob->motion.pre; - Transform mtfm_post = ob->motion.post; - - if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { - mtfm_pre = mtfm_pre * itfm; - mtfm_post = mtfm_post * itfm; - } - else { - flag |= SD_OBJECT_HAS_VERTEX_MOTION; - } + /* Pack in texture. */ + int offset = object_index*OBJECT_SIZE; - memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); - memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); + /* OBJECT_TRANSFORM */ + memcpy(&objects[offset], &tfm, sizeof(float4)*3); + /* OBJECT_INVERSE_TRANSFORM */ + memcpy(&objects[offset+4], &itfm, sizeof(float4)*3); + /* OBJECT_PROPERTIES */ + objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); + + if(state->need_motion == Scene::MOTION_PASS) { + /* Motion transformations, is world/object space depending if mesh + * comes with deformed position in object space, or if we transform + * the shading point in world space. + */ + Transform mtfm_pre = ob->motion.pre; + Transform mtfm_post = ob->motion.post; + + if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { + mtfm_pre = mtfm_pre * itfm; + mtfm_post = mtfm_post * itfm; + } + else { + flag |= SD_OBJECT_HAS_VERTEX_MOTION; } + + memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); + memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); + } #ifdef __OBJECT_MOTION__ - else if(need_motion == Scene::MOTION_BLUR) { - if(ob->use_motion) { - /* decompose transformations for interpolation */ - DecompMotionTransform decomp; - - transform_motion_decompose(&decomp, &ob->motion, &ob->tfm); - memcpy(&objects[offset], &decomp, sizeof(float4)*8); - flag |= SD_OBJECT_MOTION; - have_motion = true; - } + else if(state->need_motion == Scene::MOTION_BLUR) { + if(ob->use_motion) { + /* decompose transformations for interpolation. */ + DecompMotionTransform decomp; + + transform_motion_decompose(&decomp, &ob->motion, &ob->tfm); + memcpy(&objects[offset], &decomp, sizeof(float4)*8); + flag |= SD_OBJECT_MOTION; + state->have_motion = true; } + } #endif - if(mesh->use_motion_blur) - have_motion = true; + if(mesh->use_motion_blur) { + state->have_motion = true; + } - /* dupli object coords and motion info */ - int totalsteps = mesh->motion_steps; - int numsteps = (totalsteps - 1)/2; - int numverts = mesh->verts.size(); - int numkeys = mesh->curve_keys.size(); + /* Dupli object coords and motion info. */ + int totalsteps = mesh->motion_steps; + int numsteps = (totalsteps - 1)/2; + int numverts = mesh->verts.size(); + int numkeys = mesh->curve_keys.size(); - objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); - objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); + objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); + objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); - /* object flag */ - if(ob->use_holdout) - flag |= SD_HOLDOUT_MASK; - object_flag[i] = flag; + /* Object flag. */ + if(ob->use_holdout) { + flag |= SD_HOLDOUT_MASK; + } + state->object_flag[object_index] = flag; - /* have curves */ - if(mesh->curves.size()) - have_curves = true; + /* Have curves. */ + if(mesh->curves.size()) { + state->have_curves = true; + } +} - i++; +bool ObjectManager::device_update_object_transform_pop_work( + UpdateObejctTransformState *state, + int *start_index, + int *num_objects) +{ + /* Tweakable parameter, number of objects per chunk. + * Too small value will cause some extra overhead due to spin lock, + * too big value might not use all threads nicely. + */ + static const int OBJECTS_PER_TASK = 32; + bool have_work = false; + state->queue_lock.lock(); + int num_scene_objects = state->scene->objects.size(); + if(state->queue_start_object < num_scene_objects) { + int count = min(OBJECTS_PER_TASK, + num_scene_objects - state->queue_start_object); + *start_index = state->queue_start_object; + *num_objects = count; + state->queue_start_object += count; + have_work = true; + } + state->queue_lock.unlock(); + return have_work; +} + +void ObjectManager::device_update_object_transform_task( + UpdateObejctTransformState *state) +{ + int start_index, num_objects; + while(device_update_object_transform_pop_work(state, + &start_index, + &num_objects)) + { + for(int i = 0; i < num_objects; ++i) { + const int object_index = start_index + i; + Object *ob = state->scene->objects[object_index]; + device_update_object_transform(state, ob, object_index); + } + } +} - if(progress.get_cancel()) return; +void ObjectManager::device_update_transforms(Device *device, + DeviceScene *dscene, + Scene *scene, + uint *object_flag, + Progress& progress) +{ + UpdateObejctTransformState state; + state.need_motion = scene->need_motion(device->info.advanced_shading); + state.have_motion = false; + state.have_curves = false; + state.scene = scene; + state.queue_start_object = 0; + + state.object_flag = object_flag; + state.objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); + if(state.need_motion == Scene::MOTION_PASS) { + state.objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); + } + else { + state.objects_vector = NULL; + } + + /* Particle system device offsets + * 0 is dummy particle, index starts at 1. + */ + int numparticles = 1; + foreach(ParticleSystem *psys, scene->particle_systems) { + state.particle_offset[psys] = numparticles; + numparticles += psys->particles.size(); + } + + /* NOTE: If it's just a handful of objects we deal with them in a single + * thread to avoid threading overhead. However, this threshold is might + * need some tweaks to make mid-complex scenes optimal. + */ + if(scene->objects.size() < 64) { + int object_index = 0; + foreach(Object *ob, scene->objects) { + device_update_object_transform(&state, ob, object_index); + object_index++; + if(progress.get_cancel()) { + return; + } + } + } + else { + const int num_threads = TaskScheduler::num_threads(); + TaskPool pool; + for(int i = 0; i < num_threads; ++i) { + pool.push(function_bind( + &ObjectManager::device_update_object_transform_task, + this, + &state)); + } + pool.wait_work(); + if(progress.get_cancel()) { + return; + } } device->tex_alloc("__objects", dscene->objects); - if(need_motion == Scene::MOTION_PASS) + if(state.need_motion == Scene::MOTION_PASS) { device->tex_alloc("__objects_vector", dscene->objects_vector); + } - dscene->data.bvh.have_motion = have_motion; - dscene->data.bvh.have_curves = have_curves; + dscene->data.bvh.have_motion = state.have_motion; + dscene->data.bvh.have_curves = state.have_curves; dscene->data.bvh.have_instancing = true; } void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->objects.size() << " objects."; - if(!need_update) return; - + + VLOG(1) << "Total " << scene->objects.size() << " objects."; + device_free(device, dscene); if(scene->objects.size() == 0) diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 379d1748cdd..c2a79ca8dc4 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -17,9 +17,12 @@ #ifndef __OBJECT_H__ #define __OBJECT_H__ +#include "scene.h" + #include "util_boundbox.h" #include "util_param.h" #include "util_transform.h" +#include "util_thread.h" #include "util_types.h" CCL_NAMESPACE_BEGIN @@ -76,7 +79,12 @@ public: ~ObjectManager(); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); - void device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress); + void device_update_transforms(Device *device, + DeviceScene *dscene, + Scene *scene, + uint *object_flag, + Progress& progress); + void device_update_flags(Device *device, DeviceScene *dscene, Scene *scene, @@ -87,6 +95,56 @@ public: void tag_update(Scene *scene); void apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress); + +protected: + /* Global state of object transform update. */ + struct UpdateObejctTransformState { + /* Global state used by device_update_object_transform(). + * Common for both threaded and non-threaded update. + */ + + /* Type of the motion required by the scene settings. */ + Scene::MotionType need_motion; + + /* Mapping from particle system to a index in packed particle array. + * Only used for read. + */ + map<ParticleSystem*, int> particle_offset; + + /* Mesh area. + * Used to avoid calculation of mesh area multiple times. Used for both + * read and write. Acquire surface_area_lock to keep it all thread safe. + */ + map<Mesh*, float> surface_area_map; + + /* Packed object arrays. Those will be filled in. */ + uint *object_flag; + float4 *objects; + float4 *objects_vector; + + /* Flags which will be synchronized to Integrator. */ + bool have_motion; + bool have_curves; + + /* ** Scheduling queue. ** */ + + Scene *scene; + + /* Some locks to keep everything thread-safe. */ + thread_spin_lock queue_lock; + thread_spin_lock surface_area_lock; + + /* First unused object index in the queue. */ + int queue_start_object; + }; + void device_update_object_transform(UpdateObejctTransformState *state, + Object *ob, + const int object_index); + void device_update_object_transform_task(UpdateObejctTransformState *state); + bool device_update_object_transform_pop_work( + UpdateObejctTransformState *state, + int *start_index, + int *num_objects); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index e1c5416b024..cb3cb8b9b1b 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -75,11 +75,11 @@ void OSLShaderManager::reset(Scene * /*scene*/) void OSLShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->shaders.size() << " shaders."; - if(!need_update) return; + VLOG(1) << "Total " << scene->shaders.size() << " shaders."; + device_free(device, dscene, scene); /* determine which shaders are in use */ diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index 8f9e8c6d639..50726bb4574 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -93,12 +93,12 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { + if(!need_update) + return; + VLOG(1) << "Total " << scene->particle_systems.size() << " particle systems."; - if(!need_update) - return; - device_free(device, dscene); progress.set_status("Updating Particle Systems", "Copying Particles to device"); diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 29163c53109..b0052c30af4 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -135,7 +135,9 @@ void Scene::device_update(Device *device_, Progress& progress) { if(!device) device = device_; - + + bool print_stats = need_data_update(); + /* The order of updates is important, because there's dependencies between * the different managers, using data computed by previous managers. * @@ -239,9 +241,11 @@ void Scene::device_update(Device *device_, Progress& progress) device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); } - VLOG(1) << "System memory statistics after full device sync:\n" - << " Usage: " << util_guarded_get_mem_used() << "\n" - << " Peak: " << util_guarded_get_mem_peak(); + if(print_stats) { + VLOG(1) << "System memory statistics after full device sync:\n" + << " Usage: " << util_guarded_get_mem_used() << "\n" + << " Peak: " << util_guarded_get_mem_peak(); + } } Scene::MotionType Scene::need_motion(bool advanced_shading) @@ -278,11 +282,10 @@ bool Scene::need_update() return (need_reset() || film->need_update); } -bool Scene::need_reset() +bool Scene::need_data_update() { return (background->need_update || image_manager->need_update - || camera->need_update || object_manager->need_update || mesh_manager->need_update || light_manager->need_update @@ -295,6 +298,11 @@ bool Scene::need_reset() || film->need_update); } +bool Scene::need_reset() +{ + return need_data_update() || camera->need_update; +} + void Scene::reset() { shader_manager->reset(this); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index d30a0cb45fe..b29aff88c01 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -213,6 +213,11 @@ public: void device_free(); protected: + /* Check if some heavy data worth logging was updated. + * Mainly used to suppress extra annoying logging. + */ + bool need_data_update(); + void free_memory(bool final); }; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 24f48b61349..63037311889 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -816,7 +816,7 @@ void Session::update_scene() /* update scene */ if(scene->need_update()) { progress.set_status("Updating Scene"); - scene->device_update(device, progress); + MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); } } diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index f3d39c1bd72..56fb57e9667 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -46,11 +46,11 @@ void SVMShaderManager::reset(Scene * /*scene*/) void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->shaders.size() << " shaders."; - if(!need_update) return; + VLOG(1) << "Total " << scene->shaders.size() << " shaders."; + /* test if we need to update */ device_free(device, dscene, scene); diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index ad3f4866072..cde024cc11c 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -37,11 +37,11 @@ LookupTables::~LookupTables() void LookupTables::device_update(Device *device, DeviceScene *dscene) { - VLOG(1) << "Total " << lookup_tables.size() << " lookup tables."; - if(!need_update) return; + VLOG(1) << "Total " << lookup_tables.size() << " lookup tables."; + device->tex_free(dscene->lookup_table); if(lookup_tables.size() > 0) diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h index f6004749a13..78453d214be 100644 --- a/intern/cycles/util/util_guarded_allocator.h +++ b/intern/cycles/util/util_guarded_allocator.h @@ -53,19 +53,24 @@ public: size_t size = n * sizeof(T); util_guarded_mem_alloc(size); (void)hint; -#ifdef WITH_BLENDER_GUARDEDALLOC if(n == 0) { return NULL; } + T *mem; +#ifdef WITH_BLENDER_GUARDEDALLOC /* C++ standard requires allocation functions to allocate memory suitably * aligned for any standard type. This is 16 bytes for 64 bit platform as * far as i concerned. We might over-align on 32bit here, but that should * be all safe actually. */ - return (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); + mem = (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); #else - return (T*)malloc(size); + mem = (T*)malloc(size); #endif + if(mem == NULL) { + throw std::bad_alloc(); + } + return mem; } void deallocate(T *p, size_t n) @@ -97,7 +102,9 @@ public: void construct(T *p, const T& val) { - new ((T *)p) T(val); + if(p != NULL) { + new ((T *)p) T(val); + } } void destroy(T *p) @@ -157,6 +164,26 @@ public: size_t util_guarded_get_mem_used(void); size_t util_guarded_get_mem_peak(void); +/* Call given function and keep track if it runs out of memory. + * + * If it does run out f memory, stop execution and set progress + * to do a global cancel. + * + * It's not fully robust, but good enough to catch obvious issues + * when running out of memory. + */ +#define MEM_GUARDED_CALL(progress, func, ...) \ + do { \ + try { \ + (func)(__VA_ARGS__); \ + } \ + catch (std::bad_alloc&) { \ + fprintf(stderr, "Error: run out of memory!\n"); \ + fflush(stderr); \ + (progress)->set_error("Out of memory"); \ + } \ + } while(false) + CCL_NAMESPACE_END #endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h index 29260888eef..d7aab5b250c 100644 --- a/intern/cycles/util/util_stack_allocator.h +++ b/intern/cycles/util/util_stack_allocator.h @@ -40,14 +40,17 @@ public: /* Allocator construction/destruction. */ StackAllocator() - : pointer_(0) {} + : pointer_(0), + use_stack_(true) {} StackAllocator(const StackAllocator&) - : pointer_(0) {} + : pointer_(0), + use_stack_(true) {} template <class U> StackAllocator(const StackAllocator<SIZE, U>&) - : pointer_(0) {} + : pointer_(0), + use_stack_(false) {} /* Memory allocation/deallocation. */ @@ -57,14 +60,19 @@ public: if(n == 0) { return NULL; } - if(pointer_ + n >= SIZE) { + if(pointer_ + n >= SIZE || use_stack_ == false) { size_t size = n * sizeof(T); util_guarded_mem_alloc(size); + T *mem; #ifdef WITH_BLENDER_GUARDEDALLOC - return (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); + mem = (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); #else - return (T*)malloc(size); + mem = (T*)malloc(size); #endif + if(mem == NULL) { + throw std::bad_alloc(); + } + return mem; } T *mem = &data_[pointer_]; pointer_ += n; @@ -104,7 +112,9 @@ public: void construct(T *p, const T& val) { - new ((T *)p) T(val); + if(p != NULL) { + new ((T *)p) T(val); + } } void destroy(T *p) @@ -151,6 +161,7 @@ public: private: int pointer_; + bool use_stack_; T data_[SIZE]; }; diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 4eb0dde8308..ad579da2d2e 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -218,10 +218,16 @@ public: protected: inline T* mem_allocate(size_t N) { + if(N == 0) { + return NULL; + } T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment); if(mem != NULL) { util_guarded_mem_alloc(sizeof(T)*N); } + else { + throw std::bad_alloc(); + } return mem; } |