Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/render/object.cpp332
-rw-r--r--intern/cycles/render/object.h60
2 files changed, 275 insertions, 117 deletions
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 42bb665cb9f..c9cd6921a56 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -231,150 +231,250 @@ ObjectManager::~ObjectManager()
{
}
-void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress)
+void ObjectManager::device_update_object_transform(UpdateObejctTransformState *state,
+ Object *ob,
+ int object_index)
{
- float4 *objects;
- float4 *objects_vector = NULL;
- int i = 0;
- map<Mesh*, float> surface_area_map;
- map<ParticleSystem*, int> particle_offset;
- Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
- bool have_motion = false;
- bool have_curves = false;
-
- objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
- if(need_motion == Scene::MOTION_PASS)
- objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size());
-
- /* particle system device offsets
- * 0 is dummy particle, index starts at 1
+ float4 *objects = state->objects;
+ float4 *objects_vector = state->objects_vector;
+
+ Mesh *mesh = ob->mesh;
+ uint flag = 0;
+
+ /* Compute transformations. */
+ Transform tfm = ob->tfm;
+ Transform itfm = transform_inverse(tfm);
+
+ /* Compute surface area. for uniform scale we can do avoid the many
+ * transform calls and share computation for instances.
+ *
+ * TODO(brecht): Correct for displacement, and move to a better place.
*/
- int numparticles = 1;
- foreach(ParticleSystem *psys, scene->particle_systems) {
- particle_offset[psys] = numparticles;
- numparticles += psys->particles.size();
- }
+ float uniform_scale;
+ float surface_area = 0.0f;
+ float pass_id = ob->pass_id;
+ float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF);
+ int particle_index = (ob->particle_system)
+ ? ob->particle_index + state->particle_offset[ob->particle_system]
+ : 0;
+
+ if(transform_uniform_scale(tfm, uniform_scale)) {
+ map<Mesh*, float>::iterator it;
+
+ /* NOTE: This isn't fully optimal and could in theory lead to multiple
+ * threads calculating area of the same mesh in parallel. However, this
+ * also prevents suspending all the threads when some mesh's area is
+ * not yet known.
+ */
+ state->surface_area_lock.lock();
+ it = state->surface_area_map.find(mesh);
+ state->surface_area_lock.unlock();
- foreach(Object *ob, scene->objects) {
- Mesh *mesh = ob->mesh;
- uint flag = 0;
-
- /* compute transformations */
- Transform tfm = ob->tfm;
- Transform itfm = transform_inverse(tfm);
-
- /* compute surface area. for uniform scale we can do avoid the many
- * transform calls and share computation for instances */
- /* todo: correct for displacement, and move to a better place */
- float uniform_scale;
- float surface_area = 0.0f;
- float pass_id = ob->pass_id;
- float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF);
- int particle_index = (ob->particle_system)? ob->particle_index + particle_offset[ob->particle_system]: 0;
-
- if(transform_uniform_scale(tfm, uniform_scale)) {
- map<Mesh*, float>::iterator it = surface_area_map.find(mesh);
-
- if(it == surface_area_map.end()) {
- foreach(Mesh::Triangle& t, mesh->triangles) {
- float3 p1 = mesh->verts[t.v[0]];
- float3 p2 = mesh->verts[t.v[1]];
- float3 p3 = mesh->verts[t.v[2]];
-
- surface_area += triangle_area(p1, p2, p3);
- }
+ if(it == state->surface_area_map.end()) {
+ foreach(Mesh::Triangle& t, mesh->triangles) {
+ float3 p1 = mesh->verts[t.v[0]];
+ float3 p2 = mesh->verts[t.v[1]];
+ float3 p3 = mesh->verts[t.v[2]];
- surface_area_map[mesh] = surface_area;
+ surface_area += triangle_area(p1, p2, p3);
}
- else
- surface_area = it->second;
- surface_area *= uniform_scale;
+ state->surface_area_lock.lock();
+ state->surface_area_map[mesh] = surface_area;
+ state->surface_area_lock.unlock();
}
else {
- foreach(Mesh::Triangle& t, mesh->triangles) {
- float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]);
- float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]);
- float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]);
+ surface_area = it->second;
+ }
- surface_area += triangle_area(p1, p2, p3);
- }
+ surface_area *= uniform_scale;
+ }
+ else {
+ foreach(Mesh::Triangle& t, mesh->triangles) {
+ float3 p1 = transform_point(&tfm, mesh->verts[t.v[0]]);
+ float3 p2 = transform_point(&tfm, mesh->verts[t.v[1]]);
+ float3 p3 = transform_point(&tfm, mesh->verts[t.v[2]]);
+
+ surface_area += triangle_area(p1, p2, p3);
}
+ }
- /* pack in texture */
- int offset = i*OBJECT_SIZE;
-
- /* OBJECT_TRANSFORM */
- memcpy(&objects[offset], &tfm, sizeof(float4)*3);
- /* OBJECT_INVERSE_TRANSFORM */
- memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
- /* OBJECT_PROPERTIES */
- objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));
-
- if(need_motion == Scene::MOTION_PASS) {
- /* motion transformations, is world/object space depending if mesh
- * comes with deformed position in object space, or if we transform
- * the shading point in world space */
- Transform mtfm_pre = ob->motion.pre;
- Transform mtfm_post = ob->motion.post;
-
- if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
- mtfm_pre = mtfm_pre * itfm;
- mtfm_post = mtfm_post * itfm;
- }
- else {
- flag |= SD_OBJECT_HAS_VERTEX_MOTION;
- }
+ /* Pack in texture. */
+ int offset = object_index*OBJECT_SIZE;
+
+ /* OBJECT_TRANSFORM */
+ memcpy(&objects[offset], &tfm, sizeof(float4)*3);
+ /* OBJECT_INVERSE_TRANSFORM */
+ memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
+ /* OBJECT_PROPERTIES */
+ objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));
+
+ if(state->need_motion == Scene::MOTION_PASS) {
+ /* Motion transformations, is world/object space depending if mesh
+ * comes with deformed position in object space, or if we transform
+ * the shading point in world space.
+ */
+ Transform mtfm_pre = ob->motion.pre;
+ Transform mtfm_post = ob->motion.post;
- memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3);
- memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3);
+ if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+ mtfm_pre = mtfm_pre * itfm;
+ mtfm_post = mtfm_post * itfm;
+ }
+ else {
+ flag |= SD_OBJECT_HAS_VERTEX_MOTION;
}
+
+ memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3);
+ memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3);
+ }
#ifdef __OBJECT_MOTION__
- else if(need_motion == Scene::MOTION_BLUR) {
- if(ob->use_motion) {
- /* decompose transformations for interpolation */
- DecompMotionTransform decomp;
-
- transform_motion_decompose(&decomp, &ob->motion, &ob->tfm);
- memcpy(&objects[offset], &decomp, sizeof(float4)*8);
- flag |= SD_OBJECT_MOTION;
- have_motion = true;
- }
+ else if(state->need_motion == Scene::MOTION_BLUR) {
+ if(ob->use_motion) {
+ /* decompose transformations for interpolation. */
+ DecompMotionTransform decomp;
+
+ transform_motion_decompose(&decomp, &ob->motion, &ob->tfm);
+ memcpy(&objects[offset], &decomp, sizeof(float4)*8);
+ flag |= SD_OBJECT_MOTION;
+ state->have_motion = true;
}
+ }
#endif
- if(mesh->use_motion_blur)
- have_motion = true;
+ if(mesh->use_motion_blur) {
+ state->have_motion = true;
+ }
- /* dupli object coords and motion info */
- int totalsteps = mesh->motion_steps;
- int numsteps = (totalsteps - 1)/2;
- int numverts = mesh->verts.size();
- int numkeys = mesh->curve_keys.size();
+ /* Dupli object coords and motion info. */
+ int totalsteps = mesh->motion_steps;
+ int numsteps = (totalsteps - 1)/2;
+ int numverts = mesh->verts.size();
+ int numkeys = mesh->curve_keys.size();
- objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
- objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
+ objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
+ objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
- /* object flag */
- if(ob->use_holdout)
- flag |= SD_HOLDOUT_MASK;
- object_flag[i] = flag;
+ /* Object flag. */
+ if(ob->use_holdout) {
+ flag |= SD_HOLDOUT_MASK;
+ }
+ state->object_flag[object_index] = flag;
- /* have curves */
- if(mesh->curves.size())
- have_curves = true;
+ /* Have curves. */
+ if(mesh->curves.size()) {
+ state->have_curves = true;
+ }
+}
- i++;
+bool ObjectManager::device_update_object_transform_pop_work(
+ UpdateObejctTransformState *state,
+ int *start_index,
+ int *num_objects)
+{
+ /* Tweakable parameter, number of objects per chunk.
+ * Too small value will cause some extra overhead due to spin lock,
+ * too big value might not use all threads nicely.
+ */
+ static const int OBJECTS_PER_TASK = 32;
+ bool have_work = false;
+ state->queue_lock.lock();
+ int num_scene_objects = state->scene->objects.size();
+ if(state->queue_start_object < num_scene_objects) {
+ int count = min(OBJECTS_PER_TASK,
+ num_scene_objects - state->queue_start_object);
+ *start_index = state->queue_start_object;
+ *num_objects = count;
+ state->queue_start_object += count;
+ have_work = true;
+ }
+ state->queue_lock.unlock();
+ return have_work;
+}
+
+void ObjectManager::device_update_object_transform_task(
+ UpdateObejctTransformState *state)
+{
+ int start_index, num_objects;
+ while(device_update_object_transform_pop_work(state,
+ &start_index,
+ &num_objects))
+ {
+ for(int i = 0; i < num_objects; ++i) {
+ const int object_index = start_index + i;
+ Object *ob = state->scene->objects[object_index];
+ device_update_object_transform(state, ob, object_index);
+ }
+ }
+}
+
+void ObjectManager::device_update_transforms(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ uint *object_flag,
+ Progress& progress)
+{
+ UpdateObejctTransformState state;
+ state.need_motion = scene->need_motion(device->info.advanced_shading);
+ state.have_motion = false;
+ state.have_curves = false;
+ state.scene = scene;
+ state.queue_start_object = 0;
+
+ state.object_flag = object_flag;
+ state.objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
+ if(state.need_motion == Scene::MOTION_PASS) {
+ state.objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size());
+ }
+ else {
+ state.objects_vector = NULL;
+ }
+
+ /* Particle system device offsets
+ * 0 is dummy particle, index starts at 1.
+ */
+ int numparticles = 1;
+ foreach(ParticleSystem *psys, scene->particle_systems) {
+ state.particle_offset[psys] = numparticles;
+ numparticles += psys->particles.size();
+ }
- if(progress.get_cancel()) return;
+ /* NOTE: If it's just a handful of objects we deal with them in a single
+ * thread to avoid threading overhead. However, this threshold is might
+ * need some tweaks to make mid-complex scenes optimal.
+ */
+ if(scene->objects.size() < 64) {
+ int object_index = 0;
+ foreach(Object *ob, scene->objects) {
+ device_update_object_transform(&state, ob, object_index);
+ object_index++;
+ if(progress.get_cancel()) {
+ return;
+ }
+ }
+ }
+ else {
+ const int num_threads = TaskScheduler::num_threads();
+ TaskPool pool;
+ for(int i = 0; i < num_threads; ++i) {
+ pool.push(function_bind(
+ &ObjectManager::device_update_object_transform_task,
+ this,
+ &state));
+ }
+ pool.wait_work();
+ if(progress.get_cancel()) {
+ return;
+ }
}
device->tex_alloc("__objects", dscene->objects);
- if(need_motion == Scene::MOTION_PASS)
+ if(state.need_motion == Scene::MOTION_PASS) {
device->tex_alloc("__objects_vector", dscene->objects_vector);
+ }
- dscene->data.bvh.have_motion = have_motion;
- dscene->data.bvh.have_curves = have_curves;
+ dscene->data.bvh.have_motion = state.have_motion;
+ dscene->data.bvh.have_curves = state.have_curves;
dscene->data.bvh.have_instancing = true;
}
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 379d1748cdd..c2a79ca8dc4 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -17,9 +17,12 @@
#ifndef __OBJECT_H__
#define __OBJECT_H__
+#include "scene.h"
+
#include "util_boundbox.h"
#include "util_param.h"
#include "util_transform.h"
+#include "util_thread.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
@@ -76,7 +79,12 @@ public:
~ObjectManager();
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
- void device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress);
+ void device_update_transforms(Device *device,
+ DeviceScene *dscene,
+ Scene *scene,
+ uint *object_flag,
+ Progress& progress);
+
void device_update_flags(Device *device,
DeviceScene *dscene,
Scene *scene,
@@ -87,6 +95,56 @@ public:
void tag_update(Scene *scene);
void apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress);
+
+protected:
+ /* Global state of object transform update. */
+ struct UpdateObejctTransformState {
+ /* Global state used by device_update_object_transform().
+ * Common for both threaded and non-threaded update.
+ */
+
+ /* Type of the motion required by the scene settings. */
+ Scene::MotionType need_motion;
+
+ /* Mapping from particle system to a index in packed particle array.
+ * Only used for read.
+ */
+ map<ParticleSystem*, int> particle_offset;
+
+ /* Mesh area.
+ * Used to avoid calculation of mesh area multiple times. Used for both
+ * read and write. Acquire surface_area_lock to keep it all thread safe.
+ */
+ map<Mesh*, float> surface_area_map;
+
+ /* Packed object arrays. Those will be filled in. */
+ uint *object_flag;
+ float4 *objects;
+ float4 *objects_vector;
+
+ /* Flags which will be synchronized to Integrator. */
+ bool have_motion;
+ bool have_curves;
+
+ /* ** Scheduling queue. ** */
+
+ Scene *scene;
+
+ /* Some locks to keep everything thread-safe. */
+ thread_spin_lock queue_lock;
+ thread_spin_lock surface_area_lock;
+
+ /* First unused object index in the queue. */
+ int queue_start_object;
+ };
+ void device_update_object_transform(UpdateObejctTransformState *state,
+ Object *ob,
+ const int object_index);
+ void device_update_object_transform_task(UpdateObejctTransformState *state);
+ bool device_update_object_transform_pop_work(
+ UpdateObejctTransformState *state,
+ int *start_index,
+ int *num_objects);
};
CCL_NAMESPACE_END