diff options
author | Lukas Tönne <lukas.toenne@gmail.com> | 2016-04-28 18:33:19 +0300 |
---|---|---|
committer | Lukas Tönne <lukas.toenne@gmail.com> | 2016-04-28 18:33:19 +0300 |
commit | 1f723603c882e6d79ab69bea6e5c034ae21a6ce7 (patch) | |
tree | 098b0133beea06c460c3e9e92010bc669d151e5d /intern/cycles/render | |
parent | 3632c4997f5019d2a519996d9e216d474aa05d3b (diff) | |
parent | a48d7407986d04492107a2ef16d0ff6f0d012c45 (diff) |
Merge branch 'master' into temp_remove_particlestemp_remove_particles
Diffstat (limited to 'intern/cycles/render')
-rw-r--r-- | intern/cycles/render/light.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/mesh.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 338 | ||||
-rw-r--r-- | intern/cycles/render/object.h | 60 | ||||
-rw-r--r-- | intern/cycles/render/osl.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/particles.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/render/scene.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/render/scene.h | 5 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/svm.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/tables.cpp | 4 |
11 files changed, 312 insertions, 141 deletions
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 1637045ce84..fef28b25f3e 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -771,11 +771,11 @@ void LightManager::device_update_points(Device *device, void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->lights.size() << " lights."; - if(!need_update) return; + VLOG(1) << "Total " << scene->lights.size() << " lights."; + device_free(device, dscene); use_light_visibility = false; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 241a1c44ebf..cc8519219ed 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -528,7 +528,7 @@ void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total delete bvh; bvh = BVH::create(bparams, objects); - bvh->build(*progress); + MEM_GUARDED_CALL(progress, bvh->build, *progress); } } @@ -1232,11 +1232,11 @@ void MeshManager::device_update_displacement_images(Device *device, void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->meshes.size() << " meshes."; - if(!need_update) return; + VLOG(1) << "Total " << scene->meshes.size() << " meshes."; + /* update normals */ foreach(Mesh *mesh, scene->meshes) { foreach(uint shader, mesh->used_shaders) { diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 42bb665cb9f..a7ea75820ea 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -231,160 +231,260 @@ ObjectManager::~ObjectManager() { } -void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress) +void ObjectManager::device_update_object_transform(UpdateObejctTransformState *state, + Object *ob, + int object_index) { - float4 *objects; - float4 *objects_vector = NULL; - int i = 0; - map<Mesh*, float> surface_area_map; - map<ParticleSystem*, int> particle_offset; - Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); - bool have_motion = false; - bool have_curves = false; - - objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); - if(need_motion == Scene::MOTION_PASS) - objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); - - /* particle system device offsets - * 0 is dummy particle, index starts at 1 + float4 *objects = state->objects; + float4 *objects_vector = state->objects_vector; + + Mesh *mesh = ob->mesh; + uint flag = 0; + + /* Compute transformations. */ + Transform tfm = ob->tfm; + Transform itfm = transform_inverse(tfm); + + /* Compute surface area. for uniform scale we can do avoid the many + * transform calls and share computation for instances. + * + * TODO(brecht): Correct for displacement, and move to a better place. */ - int numparticles = 1; - foreach(ParticleSystem *psys, scene->particle_systems) { - particle_offset[psys] = numparticles; - numparticles += psys->particles.size(); - } + float uniform_scale; + float surface_area = 0.0f; + float pass_id = ob->pass_id; + float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF); + int particle_index = (ob->particle_system) + ? ob->particle_index + state->particle_offset[ob->particle_system] + : 0; + + if(transform_uniform_scale(tfm, uniform_scale)) { + map<Mesh*, float>::iterator it; + + /* NOTE: This isn't fully optimal and could in theory lead to multiple + * threads calculating area of the same mesh in parallel. However, this + * also prevents suspending all the threads when some mesh's area is + * not yet known. + */ + state->surface_area_lock.lock(); + it = state->surface_area_map.find(mesh); + state->surface_area_lock.unlock(); - foreach(Object *ob, scene->objects) { - Mesh *mesh = ob->mesh; - uint flag = 0; - - /* compute transformations */ - Transform tfm = ob->tfm; - Transform itfm = transform_inverse(tfm); - - /* compute surface area. for uniform scale we can do avoid the many - * transform calls and share computation for instances */ - /* todo: correct for displacement, and move to a better place */ - float uniform_scale; - float surface_area = 0.0f; - float pass_id = ob->pass_id; - float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF); - int particle_index = (ob->particle_system)? ob->particle_index + particle_offset[ob->particle_system]: 0; - - if(transform_uniform_scale(tfm, uniform_scale)) { - map<Mesh*, float>::iterator it = surface_area_map.find(mesh); - - if(it == surface_area_map.end()) { - foreach(Mesh::Triangle& t, mesh->triangles) { - float3 p1 = mesh->verts[t.v[0]]; - float3 p2 = mesh->verts[t.v[1]]; - float3 p3 = mesh->verts[t.v[2]]; - - surface_area += triangle_area(p1, p2, p3); - } + if(it == state->surface_area_map.end()) { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = mesh->verts[t.v[0]]; + float3 p2 = mesh->verts[t.v[1]]; + float3 p3 = mesh->verts[t.v[2]]; - surface_area_map[mesh] = surface_area; + surface_area += triangle_area(p1, p2, p3); } - else - surface_area = it->second; - surface_area *= uniform_scale; + state->surface_area_lock.lock(); + state->surface_area_map[mesh] = surface_area; + state->surface_area_lock.unlock(); } else { - foreach(Mesh::Triangle& t, mesh->triangles) { - float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]); - float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]); - float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]); + surface_area = it->second; + } - surface_area += triangle_area(p1, p2, p3); - } + surface_area *= uniform_scale; + } + else { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]); + float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]); + float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]); + + surface_area += triangle_area(p1, p2, p3); } + } - /* pack in texture */ - int offset = i*OBJECT_SIZE; - - /* OBJECT_TRANSFORM */ - memcpy(&objects[offset], &tfm, sizeof(float4)*3); - /* OBJECT_INVERSE_TRANSFORM */ - memcpy(&objects[offset+4], &itfm, sizeof(float4)*3); - /* OBJECT_PROPERTIES */ - objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); - - if(need_motion == Scene::MOTION_PASS) { - /* motion transformations, is world/object space depending if mesh - * comes with deformed position in object space, or if we transform - * the shading point in world space */ - Transform mtfm_pre = ob->motion.pre; - Transform mtfm_post = ob->motion.post; - - if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { - mtfm_pre = mtfm_pre * itfm; - mtfm_post = mtfm_post * itfm; - } - else { - flag |= SD_OBJECT_HAS_VERTEX_MOTION; - } + /* Pack in texture. */ + int offset = object_index*OBJECT_SIZE; - memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); - memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); + /* OBJECT_TRANSFORM */ + memcpy(&objects[offset], &tfm, sizeof(float4)*3); + /* OBJECT_INVERSE_TRANSFORM */ + memcpy(&objects[offset+4], &itfm, sizeof(float4)*3); + /* OBJECT_PROPERTIES */ + objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); + + if(state->need_motion == Scene::MOTION_PASS) { + /* Motion transformations, is world/object space depending if mesh + * comes with deformed position in object space, or if we transform + * the shading point in world space. + */ + Transform mtfm_pre = ob->motion.pre; + Transform mtfm_post = ob->motion.post; + + if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { + mtfm_pre = mtfm_pre * itfm; + mtfm_post = mtfm_post * itfm; + } + else { + flag |= SD_OBJECT_HAS_VERTEX_MOTION; } + + memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); + memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); + } #ifdef __OBJECT_MOTION__ - else if(need_motion == Scene::MOTION_BLUR) { - if(ob->use_motion) { - /* decompose transformations for interpolation */ - DecompMotionTransform decomp; - - transform_motion_decompose(&decomp, &ob->motion, &ob->tfm); - memcpy(&objects[offset], &decomp, sizeof(float4)*8); - flag |= SD_OBJECT_MOTION; - have_motion = true; - } + else if(state->need_motion == Scene::MOTION_BLUR) { + if(ob->use_motion) { + /* decompose transformations for interpolation. */ + DecompMotionTransform decomp; + + transform_motion_decompose(&decomp, &ob->motion, &ob->tfm); + memcpy(&objects[offset], &decomp, sizeof(float4)*8); + flag |= SD_OBJECT_MOTION; + state->have_motion = true; } + } #endif - if(mesh->use_motion_blur) - have_motion = true; + if(mesh->use_motion_blur) { + state->have_motion = true; + } - /* dupli object coords and motion info */ - int totalsteps = mesh->motion_steps; - int numsteps = (totalsteps - 1)/2; - int numverts = mesh->verts.size(); - int numkeys = mesh->curve_keys.size(); + /* Dupli object coords and motion info. */ + int totalsteps = mesh->motion_steps; + int numsteps = (totalsteps - 1)/2; + int numverts = mesh->verts.size(); + int numkeys = mesh->curve_keys.size(); - objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); - objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); + objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); + objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); - /* object flag */ - if(ob->use_holdout) - flag |= SD_HOLDOUT_MASK; - object_flag[i] = flag; + /* Object flag. */ + if(ob->use_holdout) { + flag |= SD_HOLDOUT_MASK; + } + state->object_flag[object_index] = flag; - /* have curves */ - if(mesh->curves.size()) - have_curves = true; + /* Have curves. */ + if(mesh->curves.size()) { + state->have_curves = true; + } +} - i++; +bool ObjectManager::device_update_object_transform_pop_work( + UpdateObejctTransformState *state, + int *start_index, + int *num_objects) +{ + /* Tweakable parameter, number of objects per chunk. + * Too small value will cause some extra overhead due to spin lock, + * too big value might not use all threads nicely. + */ + static const int OBJECTS_PER_TASK = 32; + bool have_work = false; + state->queue_lock.lock(); + int num_scene_objects = state->scene->objects.size(); + if(state->queue_start_object < num_scene_objects) { + int count = min(OBJECTS_PER_TASK, + num_scene_objects - state->queue_start_object); + *start_index = state->queue_start_object; + *num_objects = count; + state->queue_start_object += count; + have_work = true; + } + state->queue_lock.unlock(); + return have_work; +} + +void ObjectManager::device_update_object_transform_task( + UpdateObejctTransformState *state) +{ + int start_index, num_objects; + while(device_update_object_transform_pop_work(state, + &start_index, + &num_objects)) + { + for(int i = 0; i < num_objects; ++i) { + const int object_index = start_index + i; + Object *ob = state->scene->objects[object_index]; + device_update_object_transform(state, ob, object_index); + } + } +} - if(progress.get_cancel()) return; +void ObjectManager::device_update_transforms(Device *device, + DeviceScene *dscene, + Scene *scene, + uint *object_flag, + Progress& progress) +{ + UpdateObejctTransformState state; + state.need_motion = scene->need_motion(device->info.advanced_shading); + state.have_motion = false; + state.have_curves = false; + state.scene = scene; + state.queue_start_object = 0; + + state.object_flag = object_flag; + state.objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); + if(state.need_motion == Scene::MOTION_PASS) { + state.objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); + } + else { + state.objects_vector = NULL; + } + + /* Particle system device offsets + * 0 is dummy particle, index starts at 1. + */ + int numparticles = 1; + foreach(ParticleSystem *psys, scene->particle_systems) { + state.particle_offset[psys] = numparticles; + numparticles += psys->particles.size(); + } + + /* NOTE: If it's just a handful of objects we deal with them in a single + * thread to avoid threading overhead. However, this threshold is might + * need some tweaks to make mid-complex scenes optimal. + */ + if(scene->objects.size() < 64) { + int object_index = 0; + foreach(Object *ob, scene->objects) { + device_update_object_transform(&state, ob, object_index); + object_index++; + if(progress.get_cancel()) { + return; + } + } + } + else { + const int num_threads = TaskScheduler::num_threads(); + TaskPool pool; + for(int i = 0; i < num_threads; ++i) { + pool.push(function_bind( + &ObjectManager::device_update_object_transform_task, + this, + &state)); + } + pool.wait_work(); + if(progress.get_cancel()) { + return; + } } device->tex_alloc("__objects", dscene->objects); - if(need_motion == Scene::MOTION_PASS) + if(state.need_motion == Scene::MOTION_PASS) { device->tex_alloc("__objects_vector", dscene->objects_vector); + } - dscene->data.bvh.have_motion = have_motion; - dscene->data.bvh.have_curves = have_curves; + dscene->data.bvh.have_motion = state.have_motion; + dscene->data.bvh.have_curves = state.have_curves; dscene->data.bvh.have_instancing = true; } void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->objects.size() << " objects."; - if(!need_update) return; - + + VLOG(1) << "Total " << scene->objects.size() << " objects."; + device_free(device, dscene); if(scene->objects.size() == 0) diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 379d1748cdd..c2a79ca8dc4 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -17,9 +17,12 @@ #ifndef __OBJECT_H__ #define __OBJECT_H__ +#include "scene.h" + #include "util_boundbox.h" #include "util_param.h" #include "util_transform.h" +#include "util_thread.h" #include "util_types.h" CCL_NAMESPACE_BEGIN @@ -76,7 +79,12 @@ public: ~ObjectManager(); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); - void device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress); + void device_update_transforms(Device *device, + DeviceScene *dscene, + Scene *scene, + uint *object_flag, + Progress& progress); + void device_update_flags(Device *device, DeviceScene *dscene, Scene *scene, @@ -87,6 +95,56 @@ public: void tag_update(Scene *scene); void apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress); + +protected: + /* Global state of object transform update. */ + struct UpdateObejctTransformState { + /* Global state used by device_update_object_transform(). + * Common for both threaded and non-threaded update. + */ + + /* Type of the motion required by the scene settings. */ + Scene::MotionType need_motion; + + /* Mapping from particle system to a index in packed particle array. + * Only used for read. + */ + map<ParticleSystem*, int> particle_offset; + + /* Mesh area. + * Used to avoid calculation of mesh area multiple times. Used for both + * read and write. Acquire surface_area_lock to keep it all thread safe. + */ + map<Mesh*, float> surface_area_map; + + /* Packed object arrays. Those will be filled in. */ + uint *object_flag; + float4 *objects; + float4 *objects_vector; + + /* Flags which will be synchronized to Integrator. */ + bool have_motion; + bool have_curves; + + /* ** Scheduling queue. ** */ + + Scene *scene; + + /* Some locks to keep everything thread-safe. */ + thread_spin_lock queue_lock; + thread_spin_lock surface_area_lock; + + /* First unused object index in the queue. */ + int queue_start_object; + }; + void device_update_object_transform(UpdateObejctTransformState *state, + Object *ob, + const int object_index); + void device_update_object_transform_task(UpdateObejctTransformState *state); + bool device_update_object_transform_pop_work( + UpdateObejctTransformState *state, + int *start_index, + int *num_objects); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index e1c5416b024..cb3cb8b9b1b 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -75,11 +75,11 @@ void OSLShaderManager::reset(Scene * /*scene*/) void OSLShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->shaders.size() << " shaders."; - if(!need_update) return; + VLOG(1) << "Total " << scene->shaders.size() << " shaders."; + device_free(device, dscene, scene); /* determine which shaders are in use */ diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index 8f9e8c6d639..50726bb4574 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -93,12 +93,12 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { + if(!need_update) + return; + VLOG(1) << "Total " << scene->particle_systems.size() << " particle systems."; - if(!need_update) - return; - device_free(device, dscene); progress.set_status("Updating Particle Systems", "Copying Particles to device"); diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 29163c53109..b0052c30af4 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -135,7 +135,9 @@ void Scene::device_update(Device *device_, Progress& progress) { if(!device) device = device_; - + + bool print_stats = need_data_update(); + /* The order of updates is important, because there's dependencies between * the different managers, using data computed by previous managers. * @@ -239,9 +241,11 @@ void Scene::device_update(Device *device_, Progress& progress) device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); } - VLOG(1) << "System memory statistics after full device sync:\n" - << " Usage: " << util_guarded_get_mem_used() << "\n" - << " Peak: " << util_guarded_get_mem_peak(); + if(print_stats) { + VLOG(1) << "System memory statistics after full device sync:\n" + << " Usage: " << util_guarded_get_mem_used() << "\n" + << " Peak: " << util_guarded_get_mem_peak(); + } } Scene::MotionType Scene::need_motion(bool advanced_shading) @@ -278,11 +282,10 @@ bool Scene::need_update() return (need_reset() || film->need_update); } -bool Scene::need_reset() +bool Scene::need_data_update() { return (background->need_update || image_manager->need_update - || camera->need_update || object_manager->need_update || mesh_manager->need_update || light_manager->need_update @@ -295,6 +298,11 @@ bool Scene::need_reset() || film->need_update); } +bool Scene::need_reset() +{ + return need_data_update() || camera->need_update; +} + void Scene::reset() { shader_manager->reset(this); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index d30a0cb45fe..b29aff88c01 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -213,6 +213,11 @@ public: void device_free(); protected: + /* Check if some heavy data worth logging was updated. + * Mainly used to suppress extra annoying logging. + */ + bool need_data_update(); + void free_memory(bool final); }; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 24f48b61349..63037311889 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -816,7 +816,7 @@ void Session::update_scene() /* update scene */ if(scene->need_update()) { progress.set_status("Updating Scene"); - scene->device_update(device, progress); + MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); } } diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index f3d39c1bd72..56fb57e9667 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -46,11 +46,11 @@ void SVMShaderManager::reset(Scene * /*scene*/) void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) { - VLOG(1) << "Total " << scene->shaders.size() << " shaders."; - if(!need_update) return; + VLOG(1) << "Total " << scene->shaders.size() << " shaders."; + /* test if we need to update */ device_free(device, dscene, scene); diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index ad3f4866072..cde024cc11c 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -37,11 +37,11 @@ LookupTables::~LookupTables() void LookupTables::device_update(Device *device, DeviceScene *dscene) { - VLOG(1) << "Total " << lookup_tables.size() << " lookup tables."; - if(!need_update) return; + VLOG(1) << "Total " << lookup_tables.size() << " lookup tables."; + device->tex_free(dscene->lookup_table); if(lookup_tables.size() > 0) |