Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/audaspace/intern/AUD_C-API.cpp42
-rw-r--r--intern/audaspace/intern/AUD_C-API.h15
-rw-r--r--intern/audaspace/intern/AUD_FileWriter.cpp36
-rw-r--r--intern/audaspace/intern/AUD_FileWriter.h10
-rw-r--r--intern/audaspace/intern/AUD_Reference.h18
-rw-r--r--intern/audaspace/intern/AUD_ReferenceHandler.cpp21
-rw-r--r--intern/container/CTR_Map.h2
-rw-r--r--intern/cycles/blender/addon/properties.py8
-rw-r--r--intern/cycles/blender/addon/ui.py48
-rw-r--r--intern/cycles/blender/blender_session.cpp7
-rw-r--r--intern/cycles/blender/blender_sync.cpp6
-rw-r--r--intern/cycles/blender/blender_sync.h5
-rw-r--r--intern/cycles/bvh/CMakeLists.txt4
-rw-r--r--intern/cycles/bvh/bvh.cpp4
-rw-r--r--intern/cycles/bvh/bvh_binning.cpp223
-rw-r--r--intern/cycles/bvh/bvh_binning.h86
-rw-r--r--intern/cycles/bvh/bvh_build.cpp636
-rw-r--r--intern/cycles/bvh/bvh_build.h110
-rw-r--r--intern/cycles/bvh/bvh_node.cpp22
-rw-r--r--intern/cycles/bvh/bvh_node.h18
-rw-r--r--intern/cycles/bvh/bvh_params.h91
-rw-r--r--intern/cycles/bvh/bvh_sort.cpp16
-rw-r--r--intern/cycles/bvh/bvh_sort.h2
-rw-r--r--intern/cycles/bvh/bvh_split.cpp293
-rw-r--r--intern/cycles/bvh/bvh_split.h110
-rw-r--r--intern/cycles/device/device.cpp9
-rw-r--r--intern/cycles/device/device.h4
-rw-r--r--intern/cycles/device/device_cpu.cpp63
-rw-r--r--intern/cycles/device/device_multi.cpp7
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h9
-rw-r--r--intern/cycles/kernel/kernel_path.h33
-rw-r--r--intern/cycles/kernel/kernel_types.h4
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h27
-rw-r--r--intern/cycles/kernel/svm/svm_types.h1
-rw-r--r--intern/cycles/render/integrator.cpp4
-rw-r--r--intern/cycles/render/integrator.h1
-rw-r--r--intern/cycles/render/mesh.cpp7
-rw-r--r--intern/cycles/render/nodes.cpp7
-rw-r--r--intern/cycles/render/object.cpp1
-rw-r--r--intern/cycles/render/session.cpp5
-rw-r--r--intern/cycles/subd/subd_patch.cpp12
-rw-r--r--intern/cycles/util/CMakeLists.txt2
-rw-r--r--intern/cycles/util/util_boundbox.h86
-rw-r--r--intern/cycles/util/util_math.h485
-rw-r--r--intern/cycles/util/util_task.cpp223
-rw-r--r--intern/cycles/util/util_task.h122
-rw-r--r--intern/cycles/util/util_thread.h127
-rw-r--r--intern/cycles/util/util_transform.cpp23
-rw-r--r--intern/cycles/util/util_types.h268
-rw-r--r--intern/ghost/GHOST_Rect.h4
-rw-r--r--intern/guardedalloc/intern/mallocn.c80
-rw-r--r--intern/mikktspace/mikktspace.c406
-rw-r--r--intern/smoke/extern/smoke_API.h7
-rw-r--r--intern/smoke/intern/FLUID_3D.cpp228
-rw-r--r--intern/smoke/intern/FLUID_3D.h17
-rw-r--r--intern/smoke/intern/OBSTACLE.h6
-rw-r--r--intern/smoke/intern/WTURBULENCE.cpp9
-rw-r--r--intern/smoke/intern/smoke_API.cpp53
-rw-r--r--intern/utfconv/utf_winfunc.c12
59 files changed, 2996 insertions, 1189 deletions
diff --git a/intern/audaspace/intern/AUD_C-API.cpp b/intern/audaspace/intern/AUD_C-API.cpp
index 9100a277124..50b47650696 100644
--- a/intern/audaspace/intern/AUD_C-API.cpp
+++ b/intern/audaspace/intern/AUD_C-API.cpp
@@ -41,6 +41,7 @@
#include <cstdlib>
#include <cstring>
#include <cmath>
+#include <sstream>
#include "AUD_NULLDevice.h"
#include "AUD_I3DDevice.h"
@@ -1236,6 +1237,47 @@ const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int lengt
}
}
+const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate)
+{
+ try
+ {
+ AUD_SequencerFactory* f = dynamic_cast<AUD_SequencerFactory*>(sound->get());
+
+ f->setSpecs(specs.specs);
+
+ std::vector<AUD_Reference<AUD_IWriter> > writers;
+
+ int channels = specs.channels;
+ specs.channels = AUD_CHANNELS_MONO;
+
+ for(int i = 0; i < channels; i++)
+ {
+ std::stringstream stream;
+ std::string fn = filename;
+ size_t index = fn.find_last_of('.');
+ size_t index_slash = fn.find_last_of('/');
+ size_t index_backslash = fn.find_last_of('\\');
+ if((index == std::string::npos) ||
+ ((index < index_slash) && (index_slash != std::string::npos)) ||
+ ((index < index_backslash) && (index_backslash != std::string::npos)))
+ stream << filename << "_" << (i + 1);
+ else
+ stream << fn.substr(0, index) << "_" << (i + 1) << fn.substr(index);
+ writers.push_back(AUD_FileWriter::createWriter(stream.str(), specs, format, codec, bitrate));
+ }
+
+ AUD_Reference<AUD_IReader> reader = f->createQualityReader();
+ reader->seek(start);
+ AUD_FileWriter::writeReader(reader, writers, length, buffersize);
+
+ return NULL;
+ }
+ catch(AUD_Exception& e)
+ {
+ return e.str;
+ }
+}
+
AUD_Device* AUD_openMixdownDevice(AUD_DeviceSpecs specs, AUD_Sound* sequencer, float volume, float start)
{
try
diff --git a/intern/audaspace/intern/AUD_C-API.h b/intern/audaspace/intern/AUD_C-API.h
index 8388af2170d..a52a1fa8369 100644
--- a/intern/audaspace/intern/AUD_C-API.h
+++ b/intern/audaspace/intern/AUD_C-API.h
@@ -710,6 +710,21 @@ extern void* AUD_getSet(void* set);
extern const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate);
/**
+ * Mixes a sound down into multiple files.
+ * \param sound The sound scene to mix down.
+ * \param start The start frame.
+ * \param length The count of frames to write.
+ * \param buffersize How many samples should be written at once.
+ * \param filename The file to write to, the channel number and an underscore are added at the beginning.
+ * \param specs The file's audio specification.
+ * \param format The file's container format.
+ * \param codec The codec used for encoding the audio data.
+ * \param bitrate The bitrate for encoding.
+ * \return An error message or NULL in case of success.
+ */
+extern const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate);
+
+/**
* Opens a read device and prepares it for mixdown of the sound scene.
* \param specs Output audio specifications.
* \param sequencer The sound scene to mix down.
diff --git a/intern/audaspace/intern/AUD_FileWriter.cpp b/intern/audaspace/intern/AUD_FileWriter.cpp
index df76b667e3f..f74021acad1 100644
--- a/intern/audaspace/intern/AUD_FileWriter.cpp
+++ b/intern/audaspace/intern/AUD_FileWriter.cpp
@@ -93,3 +93,39 @@ void AUD_FileWriter::writeReader(AUD_Reference<AUD_IReader> reader, AUD_Referenc
writer->write(len, buf);
}
}
+
+void AUD_FileWriter::writeReader(AUD_Reference<AUD_IReader> reader, std::vector<AUD_Reference<AUD_IWriter> >& writers, unsigned int length, unsigned int buffersize)
+{
+ AUD_Buffer buffer(buffersize * AUD_SAMPLE_SIZE(reader->getSpecs()));
+ AUD_Buffer buffer2(buffersize * sizeof(sample_t));
+ sample_t* buf = buffer.getBuffer();
+ sample_t* buf2 = buffer2.getBuffer();
+
+ int len;
+ bool eos = false;
+ int channels = reader->getSpecs().channels;
+
+ for(unsigned int pos = 0; ((pos < length) || (length <= 0)) && !eos; pos += len)
+ {
+ len = buffersize;
+ if((len > length - pos) && (length > 0))
+ len = length - pos;
+ reader->read(len, eos, buf);
+
+ for(int channel = 0; channel < channels; channel++)
+ {
+ for(int i = 0; i < len; i++)
+ {
+ // clamping!
+ if(buf[i * channels + channel] > 1)
+ buf2[i] = 1;
+ else if(buf[i * channels + channel] < -1)
+ buf2[i] = -1;
+ else
+ buf2[i] = buf[i * channels + channel];
+ }
+
+ writers[channel]->write(len, buf2);
+ }
+ }
+}
diff --git a/intern/audaspace/intern/AUD_FileWriter.h b/intern/audaspace/intern/AUD_FileWriter.h
index c9ee2b1ee12..385aba5ef45 100644
--- a/intern/audaspace/intern/AUD_FileWriter.h
+++ b/intern/audaspace/intern/AUD_FileWriter.h
@@ -31,6 +31,7 @@
#define __AUD_FILEWRITER_H__
#include <string>
+#include <vector>
#include "AUD_Reference.h"
@@ -68,6 +69,15 @@ public:
* \param buffersize How many samples should be transfered at once.
*/
static void writeReader(AUD_Reference<AUD_IReader> reader, AUD_Reference<AUD_IWriter> writer, unsigned int length, unsigned int buffersize);
+
+ /**
+ * Writes a reader to several writers.
+ * \param reader The reader to read from.
+ * \param writers The writers to write to.
+ * \param length How many samples should be transfered.
+ * \param buffersize How many samples should be transfered at once.
+ */
+ static void writeReader(AUD_Reference<AUD_IReader> reader, std::vector<AUD_Reference<AUD_IWriter> >& writers, unsigned int length, unsigned int buffersize);
};
#endif //__AUD_FILEWRITER_H__
diff --git a/intern/audaspace/intern/AUD_Reference.h b/intern/audaspace/intern/AUD_Reference.h
index 2e07417154b..0c9f02c0155 100644
--- a/intern/audaspace/intern/AUD_Reference.h
+++ b/intern/audaspace/intern/AUD_Reference.h
@@ -31,6 +31,7 @@
#include <map>
#include <cstddef>
+#include <pthread.h>
// #define MEM_DEBUG
@@ -49,8 +50,13 @@ private:
* Saves the reference counts.
*/
static std::map<void*, unsigned int> m_references;
+ static pthread_mutex_t m_mutex;
+ static bool m_mutex_initialised;
public:
+
+ static pthread_mutex_t* getMutex();
+
/**
* Reference increment.
* \param reference The reference.
@@ -108,6 +114,7 @@ public:
template <class U>
AUD_Reference(U* reference)
{
+ pthread_mutex_lock(AUD_ReferenceHandler::getMutex());
m_original = reference;
m_reference = dynamic_cast<T*>(reference);
AUD_ReferenceHandler::incref(m_original);
@@ -115,6 +122,7 @@ public:
if(m_reference != NULL)
std::cerr << "+" << typeid(*m_reference).name() << std::endl;
#endif
+ pthread_mutex_unlock(AUD_ReferenceHandler::getMutex());
}
AUD_Reference()
@@ -129,6 +137,7 @@ public:
*/
AUD_Reference(const AUD_Reference& ref)
{
+ pthread_mutex_lock(AUD_ReferenceHandler::getMutex());
m_original = ref.m_original;
m_reference = ref.m_reference;
AUD_ReferenceHandler::incref(m_original);
@@ -136,11 +145,13 @@ public:
if(m_reference != NULL)
std::cerr << "+" << typeid(*m_reference).name() << std::endl;
#endif
+ pthread_mutex_unlock(AUD_ReferenceHandler::getMutex());
}
template <class U>
explicit AUD_Reference(const AUD_Reference<U>& ref)
{
+ pthread_mutex_lock(AUD_ReferenceHandler::getMutex());
m_original = ref.get();
m_reference = dynamic_cast<T*>(ref.get());
AUD_ReferenceHandler::incref(m_original);
@@ -148,6 +159,7 @@ public:
if(m_reference != NULL)
std::cerr << "+" << typeid(*m_reference).name() << std::endl;
#endif
+ pthread_mutex_unlock(AUD_ReferenceHandler::getMutex());
}
/**
@@ -156,12 +168,14 @@ public:
*/
~AUD_Reference()
{
+ pthread_mutex_lock(AUD_ReferenceHandler::getMutex());
#ifdef MEM_DEBUG
if(m_reference != NULL)
std::cerr << "-" << typeid(*m_reference).name() << std::endl;
#endif
if(AUD_ReferenceHandler::decref(m_original))
delete m_reference;
+ pthread_mutex_unlock(AUD_ReferenceHandler::getMutex());
}
/**
@@ -173,6 +187,8 @@ public:
if(&ref == this)
return *this;
+ pthread_mutex_lock(AUD_ReferenceHandler::getMutex());
+
#ifdef MEM_DEBUG
if(m_reference != NULL)
std::cerr << "-" << typeid(*m_reference).name() << std::endl;
@@ -188,6 +204,8 @@ public:
std::cerr << "+" << typeid(*m_reference).name() << std::endl;
#endif
+ pthread_mutex_unlock(AUD_ReferenceHandler::getMutex());
+
return *this;
}
diff --git a/intern/audaspace/intern/AUD_ReferenceHandler.cpp b/intern/audaspace/intern/AUD_ReferenceHandler.cpp
index 24f645df761..3e9f6707262 100644
--- a/intern/audaspace/intern/AUD_ReferenceHandler.cpp
+++ b/intern/audaspace/intern/AUD_ReferenceHandler.cpp
@@ -29,3 +29,24 @@
#include "AUD_Reference.h"
std::map<void*, unsigned int> AUD_ReferenceHandler::m_references;
+pthread_mutex_t AUD_ReferenceHandler::m_mutex;
+bool AUD_ReferenceHandler::m_mutex_initialised = false;
+
+pthread_mutex_t *AUD_ReferenceHandler::getMutex()
+{
+ if(!m_mutex_initialised)
+ {
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+
+ pthread_mutex_init(&m_mutex, &attr);
+
+ pthread_mutexattr_destroy(&attr);
+
+ m_mutex_initialised = true;
+ }
+
+ return &m_mutex;
+}
+
diff --git a/intern/container/CTR_Map.h b/intern/container/CTR_Map.h
index 8b6d84337c2..9557821d642 100644
--- a/intern/container/CTR_Map.h
+++ b/intern/container/CTR_Map.h
@@ -63,7 +63,7 @@ public:
for (int i = 0; i < m_num_buckets; ++i) {
m_buckets[i] = 0;
- for(Entry *entry = map.m_buckets[i]; entry; entry=entry->m_next)
+ for (Entry *entry = map.m_buckets[i]; entry; entry=entry->m_next)
insert(entry->m_key, entry->m_value);
}
}
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index cb99ea3b499..35f97bf629f 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -85,10 +85,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Leave out caustics, resulting in a darker image with less noise",
default=False,
)
- cls.blur_caustics = FloatProperty(
- name="Blur Caustics",
- description="Blur caustics to reduce noise",
- min=0.0, max=1.0,
+ cls.blur_glossy = FloatProperty(
+ name="Filter Glossy",
+ description="Adaptively blur glossy shaders after blurry bounces, to reduce noise at the cost of accuracy",
+ min=0.0, max=10.0,
default=0.0,
)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 4a8b639b390..0ed08589327 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -87,11 +87,11 @@ class CyclesRender_PT_integrator(CyclesButtonsPanel, Panel):
sub.prop(cscene, "diffuse_bounces", text="Diffuse")
sub.prop(cscene, "glossy_bounces", text="Glossy")
sub.prop(cscene, "transmission_bounces", text="Transmission")
- sub.prop(cscene, "no_caustics")
- #row = col.row()
- #row.prop(cscene, "blur_caustics")
- #row.active = not cscene.no_caustics
+ col.separator()
+
+ col.prop(cscene, "no_caustics")
+ col.prop(cscene, "blur_glossy")
class CyclesRender_PT_film(CyclesButtonsPanel, Panel):
@@ -178,10 +178,7 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel):
col = split.column()
col.prop(scene, "layers", text="Scene")
- col.label(text="Material:")
- col.prop(rl, "material_override", text="")
-
- col.prop(rl, "use_sky", "Use Environment")
+ col.prop(rl, "layers_exclude", text="Exclude")
col = split.column()
col.prop(rl, "layers", text="Layer")
@@ -191,6 +188,16 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel):
split = layout.split()
col = split.column()
+ col.label(text="Material:")
+ col.prop(rl, "material_override", text="")
+
+ col = split.column()
+ col.prop(rl, "samples")
+ col.prop(rl, "use_sky", "Use Environment")
+
+ split = layout.split()
+
+ col = split.column()
col.label(text="Passes:")
col.prop(rl, "use_pass_combined")
col.prop(rl, "use_pass_z")
@@ -783,6 +790,31 @@ class CyclesTexture_PT_colors(CyclesButtonsPanel, Panel):
layout.template_color_ramp(mapping, "color_ramp", expand=True)
+class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel):
+ bl_label = "Simplify"
+ bl_context = "scene"
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw_header(self, context):
+ rd = context.scene.render
+ self.layout.prop(rd, "use_simplify", text="")
+
+ def draw(self, context):
+ layout = self.layout
+
+ rd = context.scene.render
+
+ layout.active = rd.use_simplify
+
+ split = layout.split()
+
+ col = split.column()
+ col.prop(rd, "simplify_subdivision", text="Subdivision")
+
+ col = split.column()
+ col.prop(rd, "simplify_child_particles", text="Child Particles")
+
+
def draw_device(self, context):
scene = context.scene
layout = self.layout
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index dc6c69e2904..5ece7aa26e2 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -218,12 +218,13 @@ void BlenderSession::render()
scene->film->passes = passes;
scene->film->tag_update(scene);
- /* update session */
- session->reset(buffer_params, session_params.samples);
-
/* update scene */
sync->sync_data(b_v3d, b_iter->name().c_str());
+ /* update session */
+ int samples = sync->get_layer_samples();
+ session->reset(buffer_params, (samples == 0)? session_params.samples: samples);
+
/* render */
session->start();
session->wait();
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 5a286298774..41cd200d003 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -153,6 +153,8 @@ void BlenderSync::sync_integrator()
integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows");
integrator->no_caustics = get_boolean(cscene, "no_caustics");
+ integrator->filter_glossy = get_float(cscene, "blur_glossy");
+
integrator->seed = get_int(cscene, "seed");
integrator->layer_flag = render_layer.layer;
@@ -208,6 +210,7 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer)
render_layer.holdout_layer = 0;
render_layer.material_override = PointerRNA_NULL;
render_layer.use_background = true;
+ render_layer.samples = 0;
return;
}
}
@@ -220,12 +223,13 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer)
for(r.layers.begin(b_rlay); b_rlay != r.layers.end(); ++b_rlay) {
if((!layer && first_layer) || (layer && b_rlay->name() == layer)) {
render_layer.name = b_rlay->name();
- render_layer.scene_layer = get_layer(b_scene.layers());
+ render_layer.scene_layer = get_layer(b_scene.layers()) & ~get_layer(b_rlay->layers_exclude());
render_layer.layer = get_layer(b_rlay->layers());
render_layer.holdout_layer = get_layer(b_rlay->layers_zmask());
render_layer.layer |= render_layer.holdout_layer;
render_layer.material_override = b_rlay->material_override();
render_layer.use_background = b_rlay->use_sky();
+ render_layer.samples = b_rlay->samples();
}
first_layer = false;
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index d2550a1ffd7..ab8e4bd8d00 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -57,6 +57,7 @@ public:
void sync_data(BL::SpaceView3D b_v3d, const char *layer = 0);
void sync_camera(BL::Object b_override, int width, int height);
void sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int width, int height);
+ int get_layer_samples() { return render_layer.samples; }
/* get parameters */
static SceneParams get_scene_params(BL::Scene b_scene, bool background);
@@ -108,7 +109,8 @@ private:
RenderLayerInfo()
: scene_layer(0), layer(0), holdout_layer(0),
material_override(PointerRNA_NULL),
- use_background(true)
+ use_background(true),
+ samples(0)
{}
string name;
@@ -117,6 +119,7 @@ private:
uint holdout_layer;
BL::Material material_override;
bool use_background;
+ int samples;
} render_layer;
};
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index decc576fe51..131a7a1f750 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -10,17 +10,21 @@ set(INC
set(SRC
bvh.cpp
+ bvh_binning.cpp
bvh_build.cpp
bvh_node.cpp
bvh_sort.cpp
+ bvh_split.cpp
)
set(SRC_HEADERS
bvh.h
+ bvh_binning.h
bvh_build.h
bvh_node.h
bvh_params.h
bvh_sort.h
+ bvh_split.h
)
include_directories(${INC})
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index c9bfa964332..15695dddf45 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -530,7 +530,7 @@ void RegularBVH::refit_nodes()
{
assert(!params.top_level);
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.is_leaf[0])? true: false, bbox, visibility);
}
@@ -572,7 +572,7 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
}
else {
/* refit inner node, set bbox from children */
- BoundBox bbox0, bbox1;
+ BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0;
refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp
new file mode 100644
index 00000000000..661541a8d23
--- /dev/null
+++ b/intern/cycles/bvh/bvh_binning.cpp
@@ -0,0 +1,223 @@
+/*
+ * Adapted from code copyright 2009-2011 Intel Corporation
+ * Modifications Copyright 2012, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define __KERNEL_SSE__
+
+#include <stdlib.h>
+
+#include "bvh_binning.h"
+
+#include "util_algorithm.h"
+#include "util_boundbox.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* SSE replacements */
+
+__forceinline void prefetch_L1 (const void* ptr) { }
+__forceinline void prefetch_L2 (const void* ptr) { }
+__forceinline void prefetch_L3 (const void* ptr) { }
+__forceinline void prefetch_NTA(const void* ptr) { }
+
+template<size_t src> __forceinline float extract(const int4& b)
+{ return b[src]; }
+template<size_t dst> __forceinline const float4 insert(const float4& a, const float b)
+{ float4 r = a; r[dst] = b; return r; }
+
+__forceinline int get_best_dimension(const float4& bestSAH)
+{
+ // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
+
+ float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
+
+ if(bestSAH.x == minSAH) return 0;
+ else if(bestSAH.y == minSAH) return 1;
+ else return 2;
+}
+
+/* BVH Object Binning */
+
+BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
+: BVHRange(job), splitSAH(FLT_MAX), dim(0), pos(0)
+{
+ /* compute number of bins to use and precompute scaling factor for binning */
+ num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size()));
+ scale = rcp(cent_bounds().size()) * make_float3((float)num_bins);
+
+ /* initialize binning counter and bounds */
+ BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
+ int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
+
+ for(size_t i = 0; i < num_bins; i++) {
+ bin_count[i] = make_int4(0);
+ bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
+ }
+
+ /* map geometry to bins, unrolled once */
+ {
+ ssize_t i;
+
+ for(i = 0; i < ssize_t(size()) - 1; i += 2) {
+ prefetch_L2(&prims[start() + i + 8]);
+
+ /* map even and odd primitive to bin */
+ BVHReference prim0 = prims[start() + i + 0];
+ BVHReference prim1 = prims[start() + i + 1];
+
+ int4 bin0 = get_bin(prim0.bounds());
+ int4 bin1 = get_bin(prim1.bounds());
+
+ /* increase bounds for bins for even primitive */
+ int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
+ int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
+ int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+
+ /* increase bounds of bins for odd primitive */
+ int b10 = extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds());
+ int b11 = extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds());
+ int b12 = extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds());
+ }
+
+ /* for uneven number of primitives */
+ if(i < ssize_t(size())) {
+ /* map primitive to bin */
+ BVHReference prim0 = prims[start() + i];
+ int4 bin0 = get_bin(prim0.bounds());
+
+ /* increase bounds of bins */
+ int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
+ int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
+ int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+ }
+ }
+
+ /* sweep from right to left and compute parallel prefix of merged bounds */
+ float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
+ float4 r_count[MAX_BINS]; /* number of primitives on the right */
+ int4 count = make_int4(0);
+
+ BoundBox bx = BoundBox::empty;
+ BoundBox by = BoundBox::empty;
+ BoundBox bz = BoundBox::empty;
+
+ for(size_t i = num_bins - 1; i > 0; i--) {
+ count = count + bin_count[i];
+ r_count[i] = blocks(count);
+
+ bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area();
+ by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area();
+ bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area();
+ }
+
+ /* sweep from left to right and compute SAH */
+ int4 ii = make_int4(1);
+ float4 bestSAH = make_float4(FLT_MAX);
+ int4 bestSplit = make_int4(-1);
+
+ count = make_int4(0);
+
+ bx = BoundBox::empty;
+ by = BoundBox::empty;
+ bz = BoundBox::empty;
+
+ for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
+ count = count + bin_count[i-1];
+
+ bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area();
+ by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area();
+ bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area();
+
+ float4 lCount = blocks(count);
+ float4 lArea = make_float4(Ax,Ay,Az,Az);
+ float4 sah = lArea*lCount + r_area[i]*r_count[i];
+
+ bestSplit = select(sah < bestSAH,ii,bestSplit);
+ bestSAH = min(sah,bestSAH);
+ }
+
+ int4 mask = float3_to_float4(cent_bounds().size()) <= make_float4(0.0f);
+ bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
+
+ /* find best dimension */
+ dim = get_best_dimension(bestSAH);
+ splitSAH = bestSAH[dim];
+ pos = bestSplit[dim];
+ leafSAH = bounds().half_area() * blocks(size());
+}
+
+void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const
+{
+ size_t N = size();
+
+ BoundBox lgeom_bounds = BoundBox::empty;
+ BoundBox rgeom_bounds = BoundBox::empty;
+ BoundBox lcent_bounds = BoundBox::empty;
+ BoundBox rcent_bounds = BoundBox::empty;
+
+ ssize_t l = 0, r = N-1;
+
+ while(l <= r) {
+ prefetch_L2(&prims[start() + l + 8]);
+ prefetch_L2(&prims[start() + r - 8]);
+
+ BVHReference prim = prims[start() + l];
+ float3 center = prim.bounds().center2();
+
+ if(get_bin(center)[dim] < pos) {
+ lgeom_bounds.grow(prim.bounds());
+ lcent_bounds.grow(center);
+ l++;
+ }
+ else {
+ rgeom_bounds.grow(prim.bounds());
+ rcent_bounds.grow(center);
+ swap(prims[start()+l],prims[start()+r]);
+ r--;
+ }
+ }
+
+ /* finish */
+ if(l != 0 && N-1-r != 0) {
+ right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
+ left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
+ return;
+ }
+
+ /* object medium split if we did not make progress, can happen when all
+ primitives have same centroid */
+ lgeom_bounds = BoundBox::empty;
+ rgeom_bounds = BoundBox::empty;
+ lcent_bounds = BoundBox::empty;
+ rcent_bounds = BoundBox::empty;
+
+ for(size_t i = 0; i < N/2; i++) {
+ lgeom_bounds.grow(prims[start()+i].bounds());
+ lcent_bounds.grow(prims[start()+i].bounds().center2());
+ }
+
+ for(size_t i = N/2; i < N; i++) {
+ rgeom_bounds.grow(prims[start()+i].bounds());
+ rcent_bounds.grow(prims[start()+i].bounds().center2());
+ }
+
+ right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims);
+ left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims);
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/bvh/bvh_binning.h b/intern/cycles/bvh/bvh_binning.h
new file mode 100644
index 00000000000..60742157055
--- /dev/null
+++ b/intern/cycles/bvh/bvh_binning.h
@@ -0,0 +1,86 @@
+/*
+ * Adapted from code copyright 2009-2011 Intel Corporation
+ * Modifications Copyright 2012, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_BINNING_H__
+#define __BVH_BINNING_H__
+
+#include "bvh_params.h"
+
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Single threaded object binner. Finds the split with the best SAH heuristic
+ * by testing for each dimension multiple partitionings for regular spaced
+ * partition locations. A partitioning for a partition location is computed,
+ * by putting primitives whose centroid is on the left and right of the split
+ * location to different sets. The SAH is evaluated by computing the number of
+ * blocks occupied by the primitives in the partitions. */
+
+class BVHObjectBinning : public BVHRange
+{
+public:
+ __forceinline BVHObjectBinning() {}
+ BVHObjectBinning(const BVHRange& job, BVHReference *prims);
+
+ void split(BVHReference *prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const;
+
+ float splitSAH; /* SAH cost of the best split */
+ float leafSAH; /* SAH cost of creating a leaf */
+
+protected:
+ int dim; /* best split dimension */
+ int pos; /* best split position */
+ size_t num_bins; /* actual number of bins to use */
+ float3 scale; /* scaling factor to compute bin */
+
+ enum { MAX_BINS = 32 };
+ enum { LOG_BLOCK_SIZE = 2 };
+
+ /* computes the bin numbers for each dimension for a box. */
+ __forceinline int4 get_bin(const BoundBox& box) const
+ {
+ int4 a = make_int4((box.center2() - cent_bounds().min)*scale - make_float3(0.5f));
+ int4 mn = make_int4(0);
+ int4 mx = make_int4((int)num_bins-1);
+
+ return clamp(a, mn, mx);
+ }
+
+ /* computes the bin numbers for each dimension for a point. */
+ __forceinline int4 get_bin(const float3& c) const
+ {
+ return make_int4((c - cent_bounds().min)*scale - make_float3(0.5f));
+ }
+
+ /* compute the number of blocks occupied for each dimension. */
+ __forceinline float4 blocks(const int4& a) const
+ {
+ return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
+ }
+
+ /* compute the number of blocks occupied in one dimension. */
+ __forceinline int blocks(size_t a) const
+ {
+ return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
+ }
+};
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 38674c2c561..c5b4f1d01ae 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -15,22 +15,36 @@
* limitations under the License.
*/
+#include "bvh_binning.h"
#include "bvh_build.h"
#include "bvh_node.h"
#include "bvh_params.h"
-#include "bvh_sort.h"
+#include "bvh_split.h"
#include "mesh.h"
#include "object.h"
#include "scene.h"
-#include "util_algorithm.h"
+#include "util_debug.h"
#include "util_foreach.h"
#include "util_progress.h"
#include "util_time.h"
CCL_NAMESPACE_BEGIN
+/* BVH Build Task */
+
+class BVHBuildTask : public Task {
+public:
+ BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_)
+ : node(node_), child(child_), level(level_), range(range_) {}
+
+ InnerNode *node;
+ int child;
+ int level;
+ BVHObjectBinning range;
+};
+
/* Constructor / Destructor */
BVHBuild::BVHBuild(const vector<Object*>& objects_,
@@ -41,10 +55,10 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
prim_object(prim_object_),
params(params_),
progress(progress_),
- progress_start_time(0.0)
+ progress_start_time(0.0),
+ task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2))
{
spatial_min_overlap = 0.0f;
- progress_num_duplicates = 0;
}
BVHBuild::~BVHBuild()
@@ -53,57 +67,63 @@ BVHBuild::~BVHBuild()
/* Adding References */
-void BVHBuild::add_reference_mesh(NodeSpec& root, Mesh *mesh, int i)
+void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
for(uint j = 0; j < mesh->triangles.size(); j++) {
Mesh::Triangle t = mesh->triangles[j];
- Reference ref;
+ BoundBox bounds = BoundBox::empty;
for(int k = 0; k < 3; k++) {
float3 pt = mesh->verts[t.v[k]];
- ref.bounds.grow(pt);
+ bounds.grow(pt);
}
- if(ref.bounds.valid()) {
- ref.prim_index = j;
- ref.prim_object = i;
-
- references.push_back(ref);
- root.bounds.grow(ref.bounds);
+ if(bounds.valid()) {
+ references.push_back(BVHReference(bounds, j, i));
+ root.grow(bounds);
+ center.grow(bounds.center2());
}
}
}
-void BVHBuild::add_reference_object(NodeSpec& root, Object *ob, int i)
+void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
{
- Reference ref;
-
- ref.prim_index = -1;
- ref.prim_object = i;
- ref.bounds = ob->bounds;
-
- references.push_back(ref);
- root.bounds.grow(ref.bounds);
+ references.push_back(BVHReference(ob->bounds, -1, i));
+ root.grow(ob->bounds);
+ center.grow(ob->bounds.center2());
}
-void BVHBuild::add_references(NodeSpec& root)
+void BVHBuild::add_references(BVHRange& root)
{
- /* init root spec */
- root.num = 0;
- root.bounds = BoundBox();
+ /* reserve space for references */
+ size_t num_alloc_references = 0;
+
+ foreach(Object *ob, objects) {
+ if(params.top_level) {
+ if(ob->mesh->transform_applied)
+ num_alloc_references += ob->mesh->triangles.size();
+ else
+ num_alloc_references++;
+ }
+ else
+ num_alloc_references += ob->mesh->triangles.size();
+ }
+
+ references.reserve(num_alloc_references);
- /* add objects */
+ /* add references from objects */
+ BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
int i = 0;
foreach(Object *ob, objects) {
if(params.top_level) {
if(ob->mesh->transform_applied)
- add_reference_mesh(root, ob->mesh, i);
+ add_reference_mesh(bounds, center, ob->mesh, i);
else
- add_reference_object(root, ob, i);
+ add_reference_object(bounds, center, ob, i);
}
else
- add_reference_mesh(root, ob->mesh, i);
+ add_reference_mesh(bounds, center, ob->mesh, i);
i++;
@@ -111,129 +131,213 @@ void BVHBuild::add_references(NodeSpec& root)
}
/* happens mostly on empty meshes */
- if(!root.bounds.valid())
- root.bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
+ if(!bounds.valid())
+ bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
- root.num = references.size();
+ root = BVHRange(bounds, center, 0, references.size());
}
/* Build */
BVHNode* BVHBuild::run()
{
- NodeSpec root;
+ BVHRange root;
/* add references */
add_references(root);
- if(progress.get_cancel()) return NULL;
+ if(progress.get_cancel())
+ return NULL;
/* init spatial splits */
if(params.top_level) /* todo: get rid of this */
params.use_spatial_split = false;
- spatial_min_overlap = root.bounds.area() * params.spatial_split_alpha;
+ spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
spatial_right_bounds.clear();
- spatial_right_bounds.resize(max(root.num, (int)BVHParams::NUM_SPATIAL_BINS) - 1);
+ spatial_right_bounds.resize(max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1);
/* init progress updates */
- progress_num_duplicates = 0;
progress_start_time = time_dt();
+ progress_count = 0;
+ progress_total = references.size();
+ progress_original_total = progress_total;
+
+ prim_index.resize(references.size());
+ prim_object.resize(references.size());
/* build recursively */
- return build_node(root, 0, 0.0f, 1.0f);
+ BVHNode *rootnode;
+
+ if(params.use_spatial_split) {
+ /* singlethreaded spatial split build */
+ rootnode = build_node(root, 0);
+ }
+ else {
+ /* multithreaded binning build */
+ BVHObjectBinning rootbin(root, &references[0]);
+ rootnode = build_node(rootbin, 0);
+ task_pool.wait();
+ }
+
+ /* delete if we cancelled */
+ if(rootnode) {
+ if(progress.get_cancel()) {
+ rootnode->deleteSubtree();
+ rootnode = NULL;
+ }
+ else if(!params.use_spatial_split) {
+ /*rotate(rootnode, 4, 5);*/
+ rootnode->update_visibility();
+ }
+ }
+
+ return rootnode;
}
-void BVHBuild::progress_update(float progress_start, float progress_end)
+void BVHBuild::progress_update()
{
if(time_dt() - progress_start_time < 0.25f)
return;
+
+ double progress_start = (double)progress_count/(double)progress_total;
+ double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
- float duplicates = (float)progress_num_duplicates/(float)references.size();
string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
progress_start*100.0f, duplicates*100.0f);
progress.set_substatus(msg);
- progress_start_time = time_dt();
+ progress_start_time = time_dt();
}
-BVHNode* BVHBuild::build_node(const NodeSpec& spec, int level, float progress_start, float progress_end)
+void BVHBuild::thread_build_node(Task *task_, int thread_id)
{
- /* progress update */
- progress_update(progress_start, progress_end);
- if(progress.get_cancel()) return NULL;
+ if(progress.get_cancel())
+ return;
- /* small enough or too deep => create leaf. */
- if(spec.num <= params.min_leaf_size || level >= BVHParams::MAX_DEPTH)
- return create_leaf_node(spec);
-
- /* find split candidates. */
- float area = spec.bounds.area();
- float leafSAH = area * params.triangle_cost(spec.num);
- float nodeSAH = area * params.node_cost(2);
- ObjectSplit object = find_object_split(spec, nodeSAH);
- SpatialSplit spatial;
-
- if(params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
- BoundBox overlap = object.left_bounds;
- overlap.intersect(object.right_bounds);
-
- if(overlap.area() >= spatial_min_overlap)
- spatial = find_spatial_split(spec, nodeSAH);
- }
+ /* build nodes */
+ BVHBuildTask *task = (BVHBuildTask*)task_;
+ BVHNode *node = build_node(task->range, task->level);
+
+ /* set child in inner node */
+ task->node->children[task->child] = node;
- /* leaf SAH is the lowest => create leaf. */
- float minSAH = min(min(leafSAH, object.sah), spatial.sah);
+ /* update progress */
+ if(task->range.size() < THREAD_TASK_SIZE) {
+ /*rotate(node, INT_MAX, 5);*/
- if(minSAH == leafSAH && spec.num <= params.max_leaf_size)
- return create_leaf_node(spec);
+ thread_scoped_lock lock(build_mutex);
- /* perform split. */
- NodeSpec left, right;
+ progress_count += task->range.size();
+ progress_update();
+ }
+}
+
+/* multithreaded binning builder */
+BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
+{
+ size_t size = range.size();
+ float leafSAH = params.sah_triangle_cost * range.leafSAH;
+ float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_triangle_cost * range.splitSAH;
- if(params.use_spatial_split && minSAH == spatial.sah)
- do_spatial_split(left, right, spec, spatial);
- if(!left.num || !right.num)
- do_object_split(left, right, spec, object);
+ /* make leaf node when threshold reached or SAH tells us */
+ if(params.small_enough_for_leaf(size, level) || (size <= params.max_leaf_size && leafSAH < splitSAH))
+ return create_leaf_node(range);
+
+ /* perform split */
+ BVHObjectBinning left, right;
+ range.split(&references[0], left, right);
/* create inner node. */
- progress_num_duplicates += left.num + right.num - spec.num;
+ InnerNode *inner;
- float progress_mid = lerp(progress_start, progress_end, (float)right.num / (float)(left.num + right.num));
+ if(range.size() < THREAD_TASK_SIZE) {
+ /* local build */
+ BVHNode *leftnode = build_node(left, level + 1);
+ BVHNode *rightnode = build_node(right, level + 1);
- BVHNode* rightNode = build_node(right, level + 1, progress_start, progress_mid);
- if(progress.get_cancel()) {
- if(rightNode) rightNode->deleteSubtree();
- return NULL;
+ inner = new InnerNode(range.bounds(), leftnode, rightnode);
}
+ else {
+ /* threaded build */
+ inner = new InnerNode(range.bounds());
+
+ task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true);
+ task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true);
+ }
+
+ return inner;
+}
- BVHNode* leftNode = build_node(left, level + 1, progress_mid, progress_end);
- if(progress.get_cancel()) {
- if(leftNode) leftNode->deleteSubtree();
+/* single threaded spatial split builder */
+BVHNode* BVHBuild::build_node(const BVHRange& range, int level)
+{
+ /* progress update */
+ progress_update();
+ if(progress.get_cancel())
return NULL;
+
+ /* small enough or too deep => create leaf. */
+ if(params.small_enough_for_leaf(range.size(), level)) {
+ progress_count += range.size();
+ return create_leaf_node(range);
+ }
+
+ /* splitting test */
+ BVHMixedSplit split(this, range, level);
+
+ if(split.no_split) {
+ progress_count += range.size();
+ return create_leaf_node(range);
}
+
+ /* do split */
+ BVHRange left, right;
+ split.split(this, left, right, range);
+
+ progress_total += left.size() + right.size() - range.size();
+ size_t total = progress_total;
+
+ /* leaft node */
+ BVHNode *leftnode = build_node(left, level + 1);
+
+ /* right node (modify start for splits) */
+ right.set_start(right.start() + progress_total - total);
+ BVHNode *rightnode = build_node(right, level + 1);
- return new InnerNode(spec.bounds, leftNode, rightNode);
+ /* inner node */
+ return new InnerNode(range.bounds(), leftnode, rightnode);
}
-BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num)
+/* Create Nodes */
+
+BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
{
if(num == 0) {
- BoundBox bounds;
+ BoundBox bounds = BoundBox::empty;
return new LeafNode(bounds, 0, 0, 0);
}
else if(num == 1) {
- prim_index.push_back(ref[0].prim_index);
- prim_object.push_back(ref[0].prim_object);
- uint visibility = objects[ref[0].prim_object]->visibility;
- return new LeafNode(ref[0].bounds, visibility, prim_index.size()-1, prim_index.size());
+ if(start == prim_index.size()) {
+ assert(params.use_spatial_split);
+
+ prim_index.push_back(ref->prim_index());
+ prim_object.push_back(ref->prim_object());
+ }
+ else {
+ prim_index[start] = ref->prim_index();
+ prim_object[start] = ref->prim_object();
+ }
+
+ uint visibility = objects[ref->prim_object()]->visibility;
+ return new LeafNode(ref->bounds(), visibility, start, start+1);
}
else {
int mid = num/2;
- BVHNode *leaf0 = create_object_leaf_nodes(ref, mid);
- BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, num-mid);
+ BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
+ BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
- BoundBox bounds;
+ BoundBox bounds = BoundBox::empty;
bounds.grow(leaf0->m_bounds);
bounds.grow(leaf1->m_bounds);
@@ -241,310 +345,136 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num)
}
}
-BVHNode* BVHBuild::create_leaf_node(const NodeSpec& spec)
+BVHNode* BVHBuild::create_leaf_node(const BVHRange& range)
{
vector<int>& p_index = prim_index;
vector<int>& p_object = prim_object;
- BoundBox bounds;
- int num = 0;
+ BoundBox bounds = BoundBox::empty;
+ int num = 0, ob_num = 0;
uint visibility = 0;
- for(int i = 0; i < spec.num; i++) {
- if(references.back().prim_index != -1) {
- p_index.push_back(references.back().prim_index);
- p_object.push_back(references.back().prim_object);
- bounds.grow(references.back().bounds);
- visibility |= objects[references.back().prim_object]->visibility;
- references.pop_back();
+ for(int i = 0; i < range.size(); i++) {
+ BVHReference& ref = references[range.start() + i];
+
+ if(ref.prim_index() != -1) {
+ if(range.start() + num == prim_index.size()) {
+ assert(params.use_spatial_split);
+
+ p_index.push_back(ref.prim_index());
+ p_object.push_back(ref.prim_object());
+ }
+ else {
+ p_index[range.start() + num] = ref.prim_index();
+ p_object[range.start() + num] = ref.prim_object();
+ }
+
+ bounds.grow(ref.bounds());
+ visibility |= objects[ref.prim_object()]->visibility;
num++;
}
+ else {
+ if(ob_num < i)
+ references[range.start() + ob_num] = ref;
+ ob_num++;
+ }
}
BVHNode *leaf = NULL;
if(num > 0) {
- leaf = new LeafNode(bounds, visibility, p_index.size() - num, p_index.size());
+ leaf = new LeafNode(bounds, visibility, range.start(), range.start() + num);
- if(num == spec.num)
+ if(num == range.size())
return leaf;
}
/* while there may be multiple triangles in a leaf, for object primitives
- * we want them to be the only one, so we */
- int ob_num = spec.num - num;
- const Reference *ref = (ob_num)? &references.back() - (ob_num - 1): NULL;
- BVHNode *oleaf = create_object_leaf_nodes(ref, ob_num);
- for(int i = 0; i < ob_num; i++)
- references.pop_back();
+ * we want there to be the only one, so we keep splitting */
+ const BVHReference *ref = (ob_num)? &references[range.start()]: NULL;
+ BVHNode *oleaf = create_object_leaf_nodes(ref, range.start() + num, ob_num);
if(leaf)
- return new InnerNode(spec.bounds, leaf, oleaf);
+ return new InnerNode(range.bounds(), leaf, oleaf);
else
return oleaf;
}
-/* Object Split */
+/* Tree Rotations */
-BVHBuild::ObjectSplit BVHBuild::find_object_split(const NodeSpec& spec, float nodeSAH)
+void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
{
- ObjectSplit split;
- const Reference *ref_ptr = &references[references.size() - spec.num];
-
- for(int dim = 0; dim < 3; dim++) {
- /* sort references */
- bvh_reference_sort(references.size() - spec.num, references.size(), &references[0], dim);
-
- /* sweep right to left and determine bounds. */
- BoundBox right_bounds;
-
- for(int i = spec.num - 1; i > 0; i--) {
- right_bounds.grow(ref_ptr[i].bounds);
- spatial_right_bounds[i - 1] = right_bounds;
- }
-
- /* sweep left to right and select lowest SAH. */
- BoundBox left_bounds;
-
- for(int i = 1; i < spec.num; i++) {
- left_bounds.grow(ref_ptr[i - 1].bounds);
- right_bounds = spatial_right_bounds[i - 1];
-
- float sah = nodeSAH +
- left_bounds.area() * params.triangle_cost(i) +
- right_bounds.area() * params.triangle_cost(spec.num - i);
-
- if(sah < split.sah) {
- split.sah = sah;
- split.dim = dim;
- split.num_left = i;
- split.left_bounds = left_bounds;
- split.right_bounds = right_bounds;
- }
- }
- }
-
- return split;
+ /* in tested scenes, this resulted in slightly slower raytracing, so disabled
+ * it for now. could be implementation bug, or depend on the scene */
+ if(node)
+ for(int i = 0; i < iterations; i++)
+ rotate(node, max_depth);
}
-void BVHBuild::do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split)
+void BVHBuild::rotate(BVHNode *node, int max_depth)
{
- /* sort references according to split */
- int start = references.size() - spec.num;
- int end = references.size(); /* todo: is this right? */
-
- bvh_reference_sort(start, end, &references[0], split.dim);
-
- /* split node specs */
- left.num = split.num_left;
- left.bounds = split.left_bounds;
- right.num = spec.num - split.num_left;
- right.bounds = split.right_bounds;
-}
-
-/* Spatial Split */
-
-BVHBuild::SpatialSplit BVHBuild::find_spatial_split(const NodeSpec& spec, float nodeSAH)
-{
- /* initialize bins. */
- float3 origin = spec.bounds.min;
- float3 binSize = (spec.bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
- float3 invBinSize = 1.0f / binSize;
-
- for(int dim = 0; dim < 3; dim++) {
- for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
- SpatialBin& bin = spatial_bins[dim][i];
-
- bin.bounds = BoundBox();
- bin.enter = 0;
- bin.exit = 0;
- }
- }
-
- /* chop references into bins. */
- for(unsigned int refIdx = references.size() - spec.num; refIdx < references.size(); refIdx++) {
- const Reference& ref = references[refIdx];
- float3 firstBinf = (ref.bounds.min - origin) * invBinSize;
- float3 lastBinf = (ref.bounds.max - origin) * invBinSize;
- int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
- int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
+ /* nothing to rotate if we reached a leaf node. */
+ if(node->is_leaf() || max_depth < 0)
+ return;
+
+ InnerNode *parent = (InnerNode*)node;
- firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
- lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
+ /* rotate all children first */
+ for(size_t c = 0; c < 2; c++)
+ rotate(parent->children[c], max_depth-1);
- for(int dim = 0; dim < 3; dim++) {
- Reference currRef = ref;
+ /* compute current area of all children */
+ BoundBox bounds0 = parent->children[0]->m_bounds;
+ BoundBox bounds1 = parent->children[1]->m_bounds;
- for(int i = firstBin[dim]; i < lastBin[dim]; i++) {
- Reference leftRef, rightRef;
+ float area0 = bounds0.half_area();
+ float area1 = bounds1.half_area();
+ float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
- split_reference(leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
- spatial_bins[dim][i].bounds.grow(leftRef.bounds);
- currRef = rightRef;
- }
+ /* find best rotation. we pick a target child of a first child, and swap
+ * this with an other child. we perform the best such swap. */
+ float best_cost = FLT_MAX;
+ int best_child = -1, bets_target = -1, best_other = -1;
- spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds);
- spatial_bins[dim][firstBin[dim]].enter++;
- spatial_bins[dim][lastBin[dim]].exit++;
- }
- }
+ for(size_t c = 0; c < 2; c++) {
+ /* ignore leaf nodes as we cannot descent into */
+ if(parent->children[c]->is_leaf())
+ continue;
- /* select best split plane. */
- SpatialSplit split;
+ InnerNode *child = (InnerNode*)parent->children[c];
+ BoundBox& other = (c == 0)? bounds1: bounds0;
- for(int dim = 0; dim < 3; dim++) {
- /* sweep right to left and determine bounds. */
- BoundBox right_bounds;
+ /* transpose child bounds */
+ BoundBox target0 = child->children[0]->m_bounds;
+ BoundBox target1 = child->children[1]->m_bounds;
- for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
- right_bounds.grow(spatial_bins[dim][i].bounds);
- spatial_right_bounds[i - 1] = right_bounds;
- }
+ /* compute cost for both possible swaps */
+ float cost0 = merge(other, target1).half_area() - child_area[c];
+ float cost1 = merge(target0, other).half_area() - child_area[c];
- /* sweep left to right and select lowest SAH. */
- BoundBox left_bounds;
- int leftNum = 0;
- int rightNum = spec.num;
+ if(min(cost0,cost1) < best_cost) {
+ best_child = (int)c;
+ best_other = (int)(1-c);
- for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
- left_bounds.grow(spatial_bins[dim][i - 1].bounds);
- leftNum += spatial_bins[dim][i - 1].enter;
- rightNum -= spatial_bins[dim][i - 1].exit;
-
- float sah = nodeSAH +
- left_bounds.area() * params.triangle_cost(leftNum) +
- spatial_right_bounds[i - 1].area() * params.triangle_cost(rightNum);
-
- if(sah < split.sah) {
- split.sah = sah;
- split.dim = dim;
- split.pos = origin[dim] + binSize[dim] * (float)i;
+ if(cost0 < cost1) {
+ best_cost = cost0;
+ bets_target = 0;
+ }
+ else {
+ best_cost = cost0;
+ bets_target = 1;
}
}
}
- return split;
-}
-
-void BVHBuild::do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split)
-{
- /* Categorize references and compute bounds.
- *
- * Left-hand side: [left_start, left_end[
- * Uncategorized/split: [left_end, right_start[
- * Right-hand side: [right_start, refs.size()[ */
-
- vector<Reference>& refs = references;
- int left_start = refs.size() - spec.num;
- int left_end = left_start;
- int right_start = refs.size();
-
- left.bounds = right.bounds = BoundBox();
-
- for(int i = left_end; i < right_start; i++) {
- if(refs[i].bounds.max[split.dim] <= split.pos) {
- /* entirely on the left-hand side */
- left.bounds.grow(refs[i].bounds);
- swap(refs[i], refs[left_end++]);
- }
- else if(refs[i].bounds.min[split.dim] >= split.pos) {
- /* entirely on the right-hand side */
- right.bounds.grow(refs[i].bounds);
- swap(refs[i--], refs[--right_start]);
- }
- }
-
- /* duplicate or unsplit references intersecting both sides. */
- while(left_end < right_start) {
- /* split reference. */
- Reference lref, rref;
-
- split_reference(lref, rref, refs[left_end], split.dim, split.pos);
-
- /* compute SAH for duplicate/unsplit candidates. */
- BoundBox lub = left.bounds; // Unsplit to left: new left-hand bounds.
- BoundBox rub = right.bounds; // Unsplit to right: new right-hand bounds.
- BoundBox ldb = left.bounds; // Duplicate: new left-hand bounds.
- BoundBox rdb = right.bounds; // Duplicate: new right-hand bounds.
-
- lub.grow(refs[left_end].bounds);
- rub.grow(refs[left_end].bounds);
- ldb.grow(lref.bounds);
- rdb.grow(rref.bounds);
-
- float lac = params.triangle_cost(left_end - left_start);
- float rac = params.triangle_cost(refs.size() - right_start);
- float lbc = params.triangle_cost(left_end - left_start + 1);
- float rbc = params.triangle_cost(refs.size() - right_start + 1);
-
- float unsplitLeftSAH = lub.area() * lbc + right.bounds.area() * rac;
- float unsplitRightSAH = left.bounds.area() * lac + rub.area() * rbc;
- float duplicateSAH = ldb.area() * lbc + rdb.area() * rbc;
- float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
-
- if(minSAH == unsplitLeftSAH) {
- /* unsplit to left */
- left.bounds = lub;
- left_end++;
- }
- else if(minSAH == unsplitRightSAH) {
- /* unsplit to right */
- right.bounds = rub;
- swap(refs[left_end], refs[--right_start]);
- }
- else {
- /* duplicate */
- left.bounds = ldb;
- right.bounds = rdb;
- refs[left_end++] = lref;
- refs.push_back(rref);
- }
- }
-
- left.num = left_end - left_start;
- right.num = refs.size() - right_start;
-}
+ /* if we did not find a swap that improves the SAH then do nothing */
+ if(best_cost >= 0)
+ return;
-void BVHBuild::split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos)
-{
- /* initialize references. */
- left.prim_index = right.prim_index = ref.prim_index;
- left.prim_object = right.prim_object = ref.prim_object;
- left.bounds = right.bounds = BoundBox();
-
- /* loop over vertices/edges. */
- Object *ob = objects[ref.prim_object];
- const Mesh *mesh = ob->mesh;
- const int *inds = mesh->triangles[ref.prim_index].v;
- const float3 *verts = &mesh->verts[0];
- const float3* v1 = &verts[inds[2]];
-
- for(int i = 0; i < 3; i++) {
- const float3* v0 = v1;
- int vindex = inds[i];
- v1 = &verts[vindex];
- float v0p = (*v0)[dim];
- float v1p = (*v1)[dim];
-
- /* insert vertex to the boxes it belongs to. */
- if(v0p <= pos)
- left.bounds.grow(*v0);
-
- if(v0p >= pos)
- right.bounds.grow(*v0);
-
- /* edge intersects the plane => insert intersection to both boxes. */
- if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
- float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
- left.bounds.grow(t);
- right.bounds.grow(t);
- }
- }
+ /* perform the best found tree rotation */
+ InnerNode *child = (InnerNode*)parent->children[best_child];
- /* intersect with original bounds. */
- left.bounds.max[dim] = pos;
- right.bounds.min[dim] = pos;
- left.bounds.intersect(ref.bounds);
- right.bounds.intersect(ref.bounds);
+ swap(parent->children[best_other], child->children[bets_target]);
+ child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 1fa1951d7f2..84e14632b4b 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -21,8 +21,10 @@
#include <float.h>
#include "bvh.h"
+#include "bvh_binning.h"
#include "util_boundbox.h"
+#include "util_task.h"
#include "util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -37,28 +39,7 @@ class Progress;
class BVHBuild
{
public:
- struct Reference
- {
- int prim_index;
- int prim_object;
- BoundBox bounds;
-
- Reference()
- {
- }
- };
-
- struct NodeSpec
- {
- int num;
- BoundBox bounds;
-
- NodeSpec()
- {
- num = 0;
- }
- };
-
+ /* Constructor/Destructor */
BVHBuild(
const vector<Object*>& objects,
vector<int>& prim_index,
@@ -70,63 +51,37 @@ public:
BVHNode *run();
protected:
+ friend class BVHMixedSplit;
+ friend class BVHObjectSplit;
+ friend class BVHSpatialSplit;
+
/* adding references */
- void add_reference_mesh(NodeSpec& root, Mesh *mesh, int i);
- void add_reference_object(NodeSpec& root, Object *ob, int i);
- void add_references(NodeSpec& root);
+ void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
+ void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
+ void add_references(BVHRange& root);
/* building */
- BVHNode *build_node(const NodeSpec& spec, int level, float progress_start, float progress_end);
- BVHNode *create_leaf_node(const NodeSpec& spec);
- BVHNode *create_object_leaf_nodes(const Reference *ref, int num);
-
- void progress_update(float progress_start, float progress_end);
-
- /* object splits */
- struct ObjectSplit
- {
- float sah;
- int dim;
- int num_left;
- BoundBox left_bounds;
- BoundBox right_bounds;
-
- ObjectSplit()
- : sah(FLT_MAX), dim(0), num_left(0)
- {
- }
- };
-
- ObjectSplit find_object_split(const NodeSpec& spec, float nodeSAH);
- void do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split);
-
- /* spatial splits */
- struct SpatialSplit
- {
- float sah;
- int dim;
- float pos;
-
- SpatialSplit()
- : sah(FLT_MAX), dim(0), pos(0.0f)
- {
- }
- };
-
- struct SpatialBin
- {
- BoundBox bounds;
- int enter;
- int exit;
- };
-
- SpatialSplit find_spatial_split(const NodeSpec& spec, float nodeSAH);
- void do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split);
- void split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos);
+ BVHNode *build_node(const BVHRange& range, int level);
+ BVHNode *build_node(const BVHObjectBinning& range, int level);
+ BVHNode *create_leaf_node(const BVHRange& range);
+ BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
+
+ /* threads */
+ enum { THREAD_TASK_SIZE = 4096 };
+ void thread_build_node(Task *task_, int thread_id);
+ thread_mutex build_mutex;
+
+ /* progress */
+ void progress_update();
+
+ /* tree rotations */
+ void rotate(BVHNode *node, int max_depth);
+ void rotate(BVHNode *node, int max_depth, int iterations);
/* objects and primitive references */
vector<Object*> objects;
- vector<Reference> references;
+ vector<BVHReference> references;
+ int num_original_references;
/* output primitive indexes and objects */
vector<int>& prim_index;
@@ -138,12 +93,17 @@ protected:
/* progress reporting */
Progress& progress;
double progress_start_time;
- int progress_num_duplicates;
+ size_t progress_count;
+ size_t progress_total;
+ size_t progress_original_total;
/* spatial splitting */
float spatial_min_overlap;
vector<BoundBox> spatial_right_bounds;
- SpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS];
+ BVHSpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS];
+
+ /* threads */
+ TaskPool task_pool;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 63683bae4a3..4edfb4b70a4 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -24,6 +24,8 @@
CCL_NAMESPACE_BEGIN
+/* BVH Node */
+
int BVHNode::getSubtreeSize(BVH_STAT stat) const
{
int cnt = 0;
@@ -59,7 +61,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
void BVHNode::deleteSubtree()
{
for(int i=0;i<num_children();i++)
- get_child(i)->deleteSubtree();
+ if(get_child(i))
+ get_child(i)->deleteSubtree();
delete this;
}
@@ -70,12 +73,27 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons
for(int i=0;i<num_children();i++) {
BVHNode *child = get_child(i);
- SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.area()/m_bounds.area());
+ SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.safe_area()/m_bounds.safe_area());
}
return SAH;
}
+uint BVHNode::update_visibility()
+{
+ if(!is_leaf() && m_visibility == 0) {
+ InnerNode *inner = (InnerNode*)this;
+ BVHNode *child0 = inner->children[0];
+ BVHNode *child1 = inner->children[1];
+
+ m_visibility = child0->update_visibility()|child1->update_visibility();
+ }
+
+ return m_visibility;
+}
+
+/* Inner Node */
+
void InnerNode::print(int depth) const
{
for(int i = 0; i < depth; i++)
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index 5e0a17a1193..5c00f7b7a38 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -49,8 +49,6 @@ public:
virtual int num_triangles() const { return 0; }
virtual void print(int depth = 0) const = 0;
- float getArea() const { return m_bounds.area(); }
-
BoundBox m_bounds;
uint m_visibility;
@@ -58,6 +56,8 @@ public:
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const;
void deleteSubtree();
+
+ uint update_visibility();
};
class InnerNode : public BVHNode
@@ -66,9 +66,21 @@ public:
InnerNode(const BoundBox& bounds, BVHNode* child0, BVHNode* child1)
{
m_bounds = bounds;
- m_visibility = child0->m_visibility|child1->m_visibility;
children[0] = child0;
children[1] = child1;
+
+ if(child0 && child1)
+ m_visibility = child0->m_visibility|child1->m_visibility;
+ else
+ m_visibility = 0; /* happens on build cancel */
+ }
+
+ InnerNode(const BoundBox& bounds)
+ {
+ m_bounds = bounds;
+ m_visibility = 0;
+ children[0] = NULL;
+ children[1] = NULL;
}
bool is_leaf() const { return false; }
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index 38093438500..0cf5e905fea 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -18,6 +18,8 @@
#ifndef __BVH_PARAMS_H__
#define __BVH_PARAMS_H__
+#include "util_boundbox.h"
+
CCL_NAMESPACE_BEGIN
/* BVH Parameters */
@@ -73,14 +75,97 @@ public:
}
/* SAH costs */
- float cost(int num_nodes, int num_tris) const
+ __forceinline float cost(int num_nodes, int num_tris) const
{ return node_cost(num_nodes) + triangle_cost(num_tris); }
- float triangle_cost(int n) const
+ __forceinline float triangle_cost(int n) const
{ return n*sah_triangle_cost; }
- float node_cost(int n) const
+ __forceinline float node_cost(int n) const
{ return n*sah_node_cost; }
+
+ __forceinline bool small_enough_for_leaf(int size, int level)
+ { return (size <= min_leaf_size || level >= MAX_DEPTH); }
+};
+
+/* BVH Reference
+ *
+ * Reference to a primitive. Primitive index and object are sneakily packed
+ * into BoundBox to reduce memory usage and align nicely */
+
+class BVHReference
+{
+public:
+ __forceinline BVHReference() {}
+
+ __forceinline BVHReference(const BoundBox& bounds_, int prim_index, int prim_object)
+ : rbounds(bounds_)
+ {
+ rbounds.min.w = __int_as_float(prim_index);
+ rbounds.max.w = __int_as_float(prim_object);
+ }
+
+ __forceinline const BoundBox& bounds() const { return rbounds; }
+ __forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
+ __forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
+
+protected:
+ BoundBox rbounds;
+};
+
+/* BVH Range
+ *
+ * Build range used during construction, to indicate the bounds and place in
+ * the reference array of a subset of pirmitives Again uses trickery to pack
+ * integers into BoundBox for alignment purposes. */
+
+class BVHRange
+{
+public:
+ __forceinline BVHRange()
+ {
+ rbounds.min.w = __int_as_float(0);
+ rbounds.max.w = __int_as_float(0);
+ }
+
+ __forceinline BVHRange(const BoundBox& bounds_, int start_, int size_)
+ : rbounds(bounds_)
+ {
+ rbounds.min.w = __int_as_float(start_);
+ rbounds.max.w = __int_as_float(size_);
+ }
+
+ __forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_)
+ : rbounds(bounds_), cbounds(cbounds_)
+ {
+ rbounds.min.w = __int_as_float(start_);
+ rbounds.max.w = __int_as_float(size_);
+ }
+
+ __forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); }
+
+ __forceinline const BoundBox& bounds() const { return rbounds; }
+ __forceinline const BoundBox& cent_bounds() const { return cbounds; }
+ __forceinline int start() const { return __float_as_int(rbounds.min.w); }
+ __forceinline int size() const { return __float_as_int(rbounds.max.w); }
+ __forceinline int end() const { return start() + size(); }
+
+protected:
+ BoundBox rbounds;
+ BoundBox cbounds;
+};
+
+/* BVH Spatial Bin */
+
+struct BVHSpatialBin
+{
+ BoundBox bounds;
+ int enter;
+ int exit;
+
+ __forceinline BVHSpatialBin()
+ {
+ }
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index ee4531a4843..bef384be592 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -32,23 +32,23 @@ public:
dim = dim_;
}
- bool operator()(const BVHBuild::Reference& ra, const BVHBuild::Reference& rb)
+ bool operator()(const BVHReference& ra, const BVHReference& rb)
{
- float ca = ra.bounds.min[dim] + ra.bounds.max[dim];
- float cb = rb.bounds.min[dim] + rb.bounds.max[dim];
+ float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
+ float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
if(ca < cb) return true;
else if(ca > cb) return false;
- else if(ra.prim_object < rb.prim_object) return true;
- else if(ra.prim_object > rb.prim_object) return false;
- else if(ra.prim_index < rb.prim_index) return true;
- else if(ra.prim_index > rb.prim_index) return false;
+ else if(ra.prim_object() < rb.prim_object()) return true;
+ else if(ra.prim_object() > rb.prim_object()) return false;
+ else if(ra.prim_index() < rb.prim_index()) return true;
+ else if(ra.prim_index() > rb.prim_index()) return false;
return false;
}
};
-void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim)
+void bvh_reference_sort(int start, int end, BVHReference *data, int dim)
{
sort(data+start, data+end, BVHReferenceCompare(dim));
}
diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h
index f0676948146..ba35ba3fae7 100644
--- a/intern/cycles/bvh/bvh_sort.h
+++ b/intern/cycles/bvh/bvh_sort.h
@@ -20,7 +20,7 @@
CCL_NAMESPACE_BEGIN
-void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim);
+void bvh_reference_sort(int start, int end, BVHReference *data, int dim);
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
new file mode 100644
index 00000000000..263c5834428
--- /dev/null
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -0,0 +1,293 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh_build.h"
+#include "bvh_split.h"
+#include "bvh_sort.h"
+
+#include "mesh.h"
+#include "object.h"
+
+#include "util_algorithm.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Object Split */
+
+BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH)
+: sah(FLT_MAX), dim(0), num_left(0), left_bounds(BoundBox::empty), right_bounds(BoundBox::empty)
+{
+ const BVHReference *ref_ptr = &builder->references[range.start()];
+ float min_sah = FLT_MAX;
+
+ for(int dim = 0; dim < 3; dim++) {
+ /* sort references */
+ bvh_reference_sort(range.start(), range.end(), &builder->references[0], dim);
+
+ /* sweep right to left and determine bounds. */
+ BoundBox right_bounds = BoundBox::empty;
+
+ for(int i = range.size() - 1; i > 0; i--) {
+ right_bounds.grow(ref_ptr[i].bounds());
+ builder->spatial_right_bounds[i - 1] = right_bounds;
+ }
+
+ /* sweep left to right and select lowest SAH. */
+ BoundBox left_bounds = BoundBox::empty;
+
+ for(int i = 1; i < range.size(); i++) {
+ left_bounds.grow(ref_ptr[i - 1].bounds());
+ right_bounds = builder->spatial_right_bounds[i - 1];
+
+ float sah = nodeSAH +
+ left_bounds.safe_area() * builder->params.triangle_cost(i) +
+ right_bounds.safe_area() * builder->params.triangle_cost(range.size() - i);
+
+ if(sah < min_sah) {
+ min_sah = sah;
+
+ this->sah = sah;
+ this->dim = dim;
+ this->num_left = i;
+ this->left_bounds = left_bounds;
+ this->right_bounds = right_bounds;
+ }
+ }
+ }
+}
+
+void BVHObjectSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range)
+{
+ /* sort references according to split */
+ bvh_reference_sort(range.start(), range.end(), &builder->references[0], this->dim);
+
+ /* split node ranges */
+ left = BVHRange(this->left_bounds, range.start(), this->num_left);
+ right = BVHRange(this->right_bounds, left.end(), range.size() - this->num_left);
+
+}
+
+/* Spatial Split */
+
+BVHSpatialSplit::BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH)
+: sah(FLT_MAX), dim(0), pos(0.0f)
+{
+ /* initialize bins. */
+ float3 origin = range.bounds().min;
+ float3 binSize = (range.bounds().max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
+ float3 invBinSize = 1.0f / binSize;
+
+ for(int dim = 0; dim < 3; dim++) {
+ for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
+ BVHSpatialBin& bin = builder->spatial_bins[dim][i];
+
+ bin.bounds = BoundBox::empty;
+ bin.enter = 0;
+ bin.exit = 0;
+ }
+ }
+
+ /* chop references into bins. */
+ for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
+ const BVHReference& ref = builder->references[refIdx];
+ float3 firstBinf = (ref.bounds().min - origin) * invBinSize;
+ float3 lastBinf = (ref.bounds().max - origin) * invBinSize;
+ int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
+ int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
+
+ firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
+ lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
+
+ for(int dim = 0; dim < 3; dim++) {
+ BVHReference currRef = ref;
+
+ for(int i = firstBin[dim]; i < lastBin[dim]; i++) {
+ BVHReference leftRef, rightRef;
+
+ split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
+ builder->spatial_bins[dim][i].bounds.grow(leftRef.bounds());
+ currRef = rightRef;
+ }
+
+ builder->spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
+ builder->spatial_bins[dim][firstBin[dim]].enter++;
+ builder->spatial_bins[dim][lastBin[dim]].exit++;
+ }
+ }
+
+ /* select best split plane. */
+ for(int dim = 0; dim < 3; dim++) {
+ /* sweep right to left and determine bounds. */
+ BoundBox right_bounds = BoundBox::empty;
+
+ for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
+ right_bounds.grow(builder->spatial_bins[dim][i].bounds);
+ builder->spatial_right_bounds[i - 1] = right_bounds;
+ }
+
+ /* sweep left to right and select lowest SAH. */
+ BoundBox left_bounds = BoundBox::empty;
+ int leftNum = 0;
+ int rightNum = range.size();
+
+ for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
+ left_bounds.grow(builder->spatial_bins[dim][i - 1].bounds);
+ leftNum += builder->spatial_bins[dim][i - 1].enter;
+ rightNum -= builder->spatial_bins[dim][i - 1].exit;
+
+ float sah = nodeSAH +
+ left_bounds.safe_area() * builder->params.triangle_cost(leftNum) +
+ builder->spatial_right_bounds[i - 1].safe_area() * builder->params.triangle_cost(rightNum);
+
+ if(sah < this->sah) {
+ this->sah = sah;
+ this->dim = dim;
+ this->pos = origin[dim] + binSize[dim] * (float)i;
+ }
+ }
+ }
+}
+
+void BVHSpatialSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range)
+{
+ /* Categorize references and compute bounds.
+ *
+ * Left-hand side: [left_start, left_end[
+ * Uncategorized/split: [left_end, right_start[
+ * Right-hand side: [right_start, refs.size()[ */
+
+ vector<BVHReference>& refs = builder->references;
+ int left_start = range.start();
+ int left_end = left_start;
+ int right_start = range.end();
+ int right_end = range.end();
+ BoundBox left_bounds = BoundBox::empty;
+ BoundBox right_bounds = BoundBox::empty;
+
+ for(int i = left_end; i < right_start; i++) {
+ if(refs[i].bounds().max[this->dim] <= this->pos) {
+ /* entirely on the left-hand side */
+ left_bounds.grow(refs[i].bounds());
+ swap(refs[i], refs[left_end++]);
+ }
+ else if(refs[i].bounds().min[this->dim] >= this->pos) {
+ /* entirely on the right-hand side */
+ right_bounds.grow(refs[i].bounds());
+ swap(refs[i--], refs[--right_start]);
+ }
+ }
+
+ /* duplicate or unsplit references intersecting both sides. */
+ while(left_end < right_start) {
+ /* split reference. */
+ BVHReference lref, rref;
+
+ split_reference(builder, lref, rref, refs[left_end], this->dim, this->pos);
+
+ /* compute SAH for duplicate/unsplit candidates. */
+ BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds.
+ BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds.
+ BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds.
+ BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds.
+
+ lub.grow(refs[left_end].bounds());
+ rub.grow(refs[left_end].bounds());
+ ldb.grow(lref.bounds());
+ rdb.grow(rref.bounds());
+
+ float lac = builder->params.triangle_cost(left_end - left_start);
+ float rac = builder->params.triangle_cost(right_end - right_start);
+ float lbc = builder->params.triangle_cost(left_end - left_start + 1);
+ float rbc = builder->params.triangle_cost(right_end - right_start + 1);
+
+ float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
+ float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
+ float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
+ float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
+
+ if(minSAH == unsplitLeftSAH) {
+ /* unsplit to left */
+ left_bounds = lub;
+ left_end++;
+ }
+ else if(minSAH == unsplitRightSAH) {
+ /* unsplit to right */
+ right_bounds = rub;
+ swap(refs[left_end], refs[--right_start]);
+ }
+ else {
+ /* duplicate */
+ left_bounds = ldb;
+ right_bounds = rdb;
+ refs[left_end++] = lref;
+ refs.insert(refs.begin() + right_end, rref);
+ right_end++;
+ }
+ }
+
+ left = BVHRange(left_bounds, left_start, left_end - left_start);
+ right = BVHRange(right_bounds, right_start, right_end - right_start);
+}
+
+void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos)
+{
+ /* initialize boundboxes */
+ BoundBox left_bounds = BoundBox::empty;
+ BoundBox right_bounds = BoundBox::empty;
+
+ /* loop over vertices/edges. */
+ Object *ob = builder->objects[ref.prim_object()];
+ const Mesh *mesh = ob->mesh;
+ const int *inds = mesh->triangles[ref.prim_index()].v;
+ const float3 *verts = &mesh->verts[0];
+ const float3* v1 = &verts[inds[2]];
+
+ for(int i = 0; i < 3; i++) {
+ const float3* v0 = v1;
+ int vindex = inds[i];
+ v1 = &verts[vindex];
+ float v0p = (*v0)[dim];
+ float v1p = (*v1)[dim];
+
+ /* insert vertex to the boxes it belongs to. */
+ if(v0p <= pos)
+ left_bounds.grow(*v0);
+
+ if(v0p >= pos)
+ right_bounds.grow(*v0);
+
+ /* edge intersects the plane => insert intersection to both boxes. */
+ if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
+ float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
+ left_bounds.grow(t);
+ right_bounds.grow(t);
+ }
+ }
+
+ /* intersect with original bounds. */
+ left_bounds.max[dim] = pos;
+ right_bounds.min[dim] = pos;
+ left_bounds.intersect(ref.bounds());
+ right_bounds.intersect(ref.bounds());
+
+ /* set referecnes */
+ left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object());
+ right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object());
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
new file mode 100644
index 00000000000..1f4befbe8e2
--- /dev/null
+++ b/intern/cycles/bvh/bvh_split.h
@@ -0,0 +1,110 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_SPLIT_H__
+#define __BVH_SPLIT_H__
+
+#include "bvh_build.h"
+#include "bvh_params.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHBuild;
+
+/* Object Split */
+
+class BVHObjectSplit
+{
+public:
+ float sah;
+ int dim;
+ int num_left;
+ BoundBox left_bounds;
+ BoundBox right_bounds;
+
+ BVHObjectSplit() {}
+ BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH);
+
+ void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range);
+};
+
+/* Spatial Split */
+
+class BVHSpatialSplit
+{
+public:
+ float sah;
+ int dim;
+ float pos;
+
+ BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f) {}
+ BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH);
+
+ void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range);
+ void split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos);
+};
+
+/* Mixed Object-Spatial Split */
+
+class BVHMixedSplit
+{
+public:
+ BVHObjectSplit object;
+ BVHSpatialSplit spatial;
+
+ float leafSAH;
+ float nodeSAH;
+ float minSAH;
+
+ bool no_split;
+
+ __forceinline BVHMixedSplit(BVHBuild *builder, const BVHRange& range, int level)
+ {
+ /* find split candidates. */
+ float area = range.bounds().safe_area();
+
+ leafSAH = area * builder->params.triangle_cost(range.size());
+ nodeSAH = area * builder->params.node_cost(2);
+
+ object = BVHObjectSplit(builder, range, nodeSAH);
+
+ if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
+ BoundBox overlap = object.left_bounds;
+ overlap.intersect(object.right_bounds);
+
+ if(overlap.safe_area() >= builder->spatial_min_overlap)
+ spatial = BVHSpatialSplit(builder, range, nodeSAH);
+ }
+
+ /* leaf SAH is the lowest => create leaf. */
+ minSAH = min(min(leafSAH, object.sah), spatial.sah);
+ no_split = (minSAH == leafSAH && range.size() <= builder->params.max_leaf_size);
+ }
+
+ __forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range)
+ {
+ if(builder->params.use_spatial_split && minSAH == spatial.sah)
+ spatial.split(builder, left, right, range);
+ if(!left.size() || !right.size())
+ object.split(builder, left, right, range);
+ }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_SPLIT_H__ */
+
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index cceec8b8e5c..42dda1180c7 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -58,15 +58,6 @@ void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
split(tasks, num);
}
-void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
-{
- list<DeviceTask> tasks;
- split(tasks, num);
-
- foreach(DeviceTask& task, tasks)
- queue.push(task);
-}
-
void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == SHADER) {
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index af2567498d9..87f255e54e7 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -25,6 +25,7 @@
#include "util_list.h"
#include "util_string.h"
+#include "util_task.h"
#include "util_thread.h"
#include "util_types.h"
#include "util_vector.h"
@@ -66,7 +67,7 @@ public:
/* Device Task */
-class DeviceTask {
+class DeviceTask : public Task {
public:
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
Type type;
@@ -87,7 +88,6 @@ public:
DeviceTask(Type type = PATH_TRACE);
void split(list<DeviceTask>& tasks, int num);
- void split(ThreadQueue<DeviceTask>& tasks, int num);
void split_max_size(list<DeviceTask>& tasks, int max_size);
};
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index da977ed8472..ec84047c44f 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -40,35 +40,21 @@ CCL_NAMESPACE_BEGIN
class CPUDevice : public Device
{
public:
- vector<thread*> threads;
- ThreadQueue<DeviceTask> tasks;
+ TaskPool task_pool;
KernelGlobals *kg;
CPUDevice(int threads_num)
+ : task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2))
{
kg = kernel_globals_create();
/* do now to avoid thread issues */
system_cpu_support_optimized();
-
- if(threads_num == 0)
- threads_num = system_cpu_thread_count();
-
- threads.resize(threads_num);
-
- for(size_t i = 0; i < threads.size(); i++)
- threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
}
~CPUDevice()
{
- tasks.stop();
-
- foreach(thread *t, threads) {
- t->join();
- delete t;
- }
-
+ task_pool.stop();
kernel_globals_free(kg);
}
@@ -127,25 +113,21 @@ public:
#endif
}
- void thread_run(int t)
+ void thread_run(Task *task_, int thread_id)
{
- DeviceTask task;
-
- while(tasks.worker_wait_pop(task)) {
- if(task.type == DeviceTask::PATH_TRACE)
- thread_path_trace(task);
- else if(task.type == DeviceTask::TONEMAP)
- thread_tonemap(task);
- else if(task.type == DeviceTask::SHADER)
- thread_shader(task);
-
- tasks.worker_done();
- }
+ DeviceTask *task = (DeviceTask*)task_;
+
+ if(task->type == DeviceTask::PATH_TRACE)
+ thread_path_trace(*task);
+ else if(task->type == DeviceTask::TONEMAP)
+ thread_tonemap(*task);
+ else if(task->type == DeviceTask::SHADER)
+ thread_shader(*task);
}
void thread_path_trace(DeviceTask& task)
{
- if(tasks.worker_cancel())
+ if(task_pool.cancelled())
return;
#ifdef WITH_OSL
@@ -160,7 +142,7 @@ public:
kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
- if(tasks.worker_cancel())
+ if(task_pool.cancelled())
break;
}
}
@@ -172,7 +154,7 @@ public:
kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
- if(tasks.worker_cancel())
+ if(task_pool.cancelled())
break;
}
}
@@ -214,7 +196,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(tasks.worker_cancel())
+ if(task_pool.cancelled())
break;
}
}
@@ -224,7 +206,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(tasks.worker_cancel())
+ if(task_pool.cancelled())
break;
}
}
@@ -239,17 +221,22 @@ public:
{
/* split task into smaller ones, more than number of threads for uneven
workloads where some parts of the image render slower than others */
- task.split(tasks, threads.size()*10);
+ list<DeviceTask> tasks;
+
+ task.split(tasks, TaskScheduler::num_threads()*10);
+
+ foreach(DeviceTask& task, tasks)
+ task_pool.push(new DeviceTask(task));
}
void task_wait()
{
- tasks.wait_done();
+ task_pool.wait();
}
void task_cancel()
{
- tasks.cancel();
+ task_pool.cancel();
}
};
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 1f69f2c53fa..9f7d65e640b 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -257,13 +257,14 @@ public:
void task_add(DeviceTask& task)
{
- ThreadQueue<DeviceTask> tasks;
+ list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) {
- DeviceTask subtask;
+ if(!tasks.empty()) {
+ DeviceTask subtask = tasks.front();
+ tasks.pop_front();
- if(tasks.worker_wait_pop(subtask)) {
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state];
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 9a52531eec0..6c3ade1c531 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -266,7 +266,7 @@ __device_inline void path_radiance_accum_background(PathRadiance *L, float3 thro
#endif
}
-__device_inline float3 path_radiance_sum(PathRadiance *L)
+__device_inline float3 path_radiance_sum(KernelGlobals *kg, PathRadiance *L)
{
#ifdef __PASSES__
if(L->use_light_pass) {
@@ -283,9 +283,14 @@ __device_inline float3 path_radiance_sum(PathRadiance *L)
L->indirect_glossy *= L->indirect;
L->indirect_transmission *= L->indirect;
- return L->emission + L->background
+ float3 L_sum = L->emission
+ L->direct_diffuse + L->direct_glossy + L->direct_transmission
+ L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission;
+
+ if(!kernel_data.background.transparent)
+ L_sum += L->background;
+
+ return L_sum;
}
else
return L->emission;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index ff12e85375c..8ebac177277 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -223,6 +223,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
path_radiance_init(&L, kernel_data.film.use_light_pass);
+ float min_ray_pdf = FLT_MAX;
float ray_pdf = 0.0f;
PathState state;
int rng_offset = PRNG_BASE_NUM;
@@ -239,13 +240,17 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
L_transparent += average(throughput);
+
+#ifdef __PASSES__
+ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif
+ break;
}
+
#ifdef __BACKGROUND__
- else {
- /* sample background shader */
- float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf);
- path_radiance_accum_background(&L, throughput, L_background, state.bounce);
- }
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf);
+ path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
break;
@@ -259,6 +264,18 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
+ /* blurring of bsdf after bounces, for rays that have a small likelihood
+ of following this particular path (diffuse, rough glossy) */
+ if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+ float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
+
+ if(blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
+ shader_bsdf_blur(kg, &sd, blur_roughness);
+ }
+ }
+
+ /* holdout */
#ifdef __HOLDOUT__
if((sd.flag & SD_HOLDOUT) && (state.flag & PATH_RAY_CAMERA)) {
float3 holdout_weight = shader_holdout_eval(kg, &sd);
@@ -378,8 +395,10 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
/* set labels */
- if(!(label & LABEL_TRANSPARENT))
+ if(!(label & LABEL_TRANSPARENT)) {
ray_pdf = bsdf_pdf;
+ min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
+ }
/* update path state */
path_state_next(kg, &state, label);
@@ -394,7 +413,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
#endif
}
- float3 L_sum = path_radiance_sum(&L);
+ float3 L_sum = path_radiance_sum(kg, &L);
#ifdef __CLAMP_SAMPLE__
path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 391dcd12dad..102a2bb036d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -516,6 +516,7 @@ typedef struct KernelIntegrator {
/* caustics */
int no_caustics;
+ float filter_glossy;
/* seed */
int seed;
@@ -525,9 +526,6 @@ typedef struct KernelIntegrator {
/* clamp */
float sample_clamp;
-
- /* padding */
- int pad;
} KernelIntegrator;
typedef struct KernelBVH {
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index f494b6d66e1..98f8734aed2 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -40,6 +40,15 @@ __device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, float *stack
data = sd->P;
break;
}
+ case NODE_TEXCO_NORMAL: {
+ if(sd->object != ~0) {
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ data = transform_direction(&tfm, sd->N);
+ }
+ else
+ data = sd->N;
+ break;
+ }
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
@@ -85,6 +94,15 @@ __device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, floa
data = sd->P + sd->dP.dx;
break;
}
+ case NODE_TEXCO_NORMAL: {
+ if(sd->object != ~0) {
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ data = transform_direction(&tfm, sd->N);
+ }
+ else
+ data = sd->N;
+ break;
+ }
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
@@ -133,6 +151,15 @@ __device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, floa
data = sd->P + sd->dP.dy;
break;
}
+ case NODE_TEXCO_NORMAL: {
+ if(sd->object != ~0) {
+ Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ data = normalize(transform_direction(&tfm, sd->N));
+ }
+ else
+ data = sd->N;
+ break;
+ }
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 68eb39bdd29..fa7c211b5f9 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -119,6 +119,7 @@ typedef enum NodeLightPath {
} NodeLightPath;
typedef enum NodeTexCoord {
+ NODE_TEXCO_NORMAL,
NODE_TEXCO_OBJECT,
NODE_TEXCO_CAMERA,
NODE_TEXCO_WINDOW,
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 6e6d30f3879..c1f066df10c 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -41,6 +41,7 @@ Integrator::Integrator()
transparent_shadows = false;
no_caustics = false;
+ filter_glossy = 0.0f;
seed = 0;
layer_flag = ~0;
sample_clamp = 0.0f;
@@ -81,6 +82,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene)
kintegrator->transparent_shadows = transparent_shadows;
kintegrator->no_caustics = no_caustics;
+ kintegrator->filter_glossy = (filter_glossy == 0.0f)? FLT_MAX: 1.0f/filter_glossy;
+
kintegrator->seed = hash_int(seed);
kintegrator->layer_flag = layer_flag << PATH_RAY_LAYER_SHIFT;
@@ -119,6 +122,7 @@ bool Integrator::modified(const Integrator& integrator)
transparent_probalistic == integrator.transparent_probalistic &&
transparent_shadows == integrator.transparent_shadows &&
no_caustics == integrator.no_caustics &&
+ filter_glossy == integrator.filter_glossy &&
layer_flag == integrator.layer_flag &&
seed == integrator.seed &&
sample_clamp == integrator.sample_clamp);
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index abbbaca894c..0817fcaa457 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -41,6 +41,7 @@ public:
bool transparent_shadows;
bool no_caustics;
+ float filter_glossy;
int seed;
int layer_flag;
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index a7eb365f983..0ce16e65621 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -43,6 +43,7 @@ Mesh::Mesh()
transform_applied = false;
transform_negative_scaled = false;
displacement_method = DISPLACE_BUMP;
+ bounds = BoundBox::empty;
bvh = NULL;
@@ -96,7 +97,7 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_)
void Mesh::compute_bounds()
{
- BoundBox bnds;
+ BoundBox bnds = BoundBox::empty;
size_t verts_size = verts.size();
for(size_t i = 0; i < verts_size; i++)
@@ -697,6 +698,8 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
progress.set_status(msg, "Building BVH");
mesh->compute_bvh(&scene->params, progress);
+
+ i++;
}
if(progress.get_cancel()) return;
@@ -704,8 +707,6 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
mesh->need_update = false;
mesh->need_update_rebuild = false;
}
-
- i++;
}
foreach(Shader *shader, scene->shaders)
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index db696993737..d71438ebae1 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -1503,6 +1503,7 @@ TextureCoordinateNode::TextureCoordinateNode()
{
add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL, true);
add_output("Generated", SHADER_SOCKET_POINT);
+ add_output("Normal", SHADER_SOCKET_NORMAL);
add_output("UV", SHADER_SOCKET_POINT);
add_output("Object", SHADER_SOCKET_POINT);
add_output("Camera", SHADER_SOCKET_POINT);
@@ -1551,6 +1552,12 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler)
}
}
+ out = output("Normal");
+ if(!out->links.empty()) {
+ compiler.stack_assign(out);
+ compiler.add_node(texco_node, NODE_TEXCO_NORMAL, out->stack_offset);
+ }
+
out = output("UV");
if(!out->links.empty()) {
int attr = compiler.attribute(Attribute::STD_UV);
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 5f7a5810c09..28645d856a8 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -37,6 +37,7 @@ Object::Object()
tfm = transform_identity();
visibility = ~0;
pass_id = 0;
+ bounds = BoundBox::empty;
}
Object::~Object()
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 676f42be790..34a0c0ff877 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -27,6 +27,7 @@
#include "util_foreach.h"
#include "util_function.h"
+#include "util_task.h"
#include "util_time.h"
CCL_NAMESPACE_BEGIN
@@ -37,6 +38,8 @@ Session::Session(const SessionParams& params_)
{
device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background);
+ TaskScheduler::init(params.threads);
+
device = Device::create(params.device, params.background, params.threads);
buffers = new RenderBuffers(device);
display = new DisplayBuffer(device);
@@ -88,6 +91,8 @@ Session::~Session()
delete display;
delete scene;
delete device;
+
+ TaskScheduler::exit();
}
void Session::start()
diff --git a/intern/cycles/subd/subd_patch.cpp b/intern/cycles/subd/subd_patch.cpp
index ff477296c7e..f6acc358959 100644
--- a/intern/cycles/subd/subd_patch.cpp
+++ b/intern/cycles/subd/subd_patch.cpp
@@ -93,7 +93,7 @@ void LinearQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float
BoundBox LinearQuadPatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 4; i++)
bbox.grow(hull[i]);
@@ -115,7 +115,7 @@ void LinearTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f
BoundBox LinearTrianglePatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 3; i++)
bbox.grow(hull[i]);
@@ -132,7 +132,7 @@ void BicubicPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float v)
BoundBox BicubicPatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 16; i++)
bbox.grow(hull[i]);
@@ -152,7 +152,7 @@ void BicubicTangentPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f
BoundBox BicubicTangentPatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 16; i++)
bbox.grow(hull[i]);
@@ -205,7 +205,7 @@ void GregoryQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, floa
BoundBox GregoryQuadPatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 20; i++)
bbox.grow(hull[i]);
@@ -276,7 +276,7 @@ void GregoryTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u,
BoundBox GregoryTrianglePatch::bound()
{
- BoundBox bbox;
+ BoundBox bbox = BoundBox::empty;
for(int i = 0; i < 20; i++)
bbox.grow(hull[i]);
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 9182ee4cbe1..87bd84b4e0f 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
util_path.cpp
util_string.cpp
util_system.cpp
+ util_task.cpp
util_time.cpp
util_transform.cpp
)
@@ -50,6 +51,7 @@ set(SRC_HEADERS
util_set.h
util_string.h
util_system.h
+ util_task.h
util_thread.h
util_time.h
util_transform.h
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index bb1df0b220f..9511b48e103 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -23,6 +23,7 @@
#include <float.h>
#include "util_math.h"
+#include "util_string.h"
#include "util_transform.h"
#include "util_types.h"
@@ -35,45 +36,81 @@ class BoundBox
public:
float3 min, max;
- BoundBox(void)
+ __forceinline BoundBox()
{
- min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX);
- max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
}
- BoundBox(const float3& min_, const float3& max_)
+ __forceinline BoundBox(const float3& pt)
+ : min(pt), max(pt)
+ {
+ }
+
+ __forceinline BoundBox(const float3& min_, const float3& max_)
: min(min_), max(max_)
{
}
- void grow(const float3& pt)
+ static struct empty_t {} empty;
+
+ __forceinline BoundBox(empty_t)
+ : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX))
+ {
+ }
+
+ __forceinline void grow(const float3& pt)
{
min = ccl::min(min, pt);
max = ccl::max(max, pt);
}
- void grow(const BoundBox& bbox)
+ __forceinline void grow(const BoundBox& bbox)
{
grow(bbox.min);
grow(bbox.max);
}
- void intersect(const BoundBox& bbox)
+ __forceinline void intersect(const BoundBox& bbox)
{
min = ccl::max(min, bbox.min);
max = ccl::min(max, bbox.max);
}
- float area(void) const
+ /* todo: avoid using this */
+ __forceinline float safe_area() const
{
- if(!valid())
+ if(!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z)))
return 0.0f;
+ return area();
+ }
+
+ __forceinline float area() const
+ {
+ return half_area()*2.0f;
+ }
+
+ __forceinline float half_area() const
+ {
float3 d = max - min;
- return dot(d, d)*2.0f;
+ return (d.x*d.z + d.y*d.z + d.x*d.y);
+ }
+
+ __forceinline float3 center() const
+ {
+ return 0.5f*(min + max);
}
- bool valid(void) const
+ __forceinline float3 center2() const
+ {
+ return min + max;
+ }
+
+ __forceinline float3 size() const
+ {
+ return max - min;
+ }
+
+ __forceinline bool valid() const
{
return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) &&
(isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) &&
@@ -82,7 +119,7 @@ public:
BoundBox transformed(const Transform *tfm)
{
- BoundBox result;
+ BoundBox result = BoundBox::empty;
for(int i = 0; i < 8; i++) {
float3 p;
@@ -98,6 +135,31 @@ public:
}
};
+__forceinline BoundBox merge(const BoundBox& bbox, const float3& pt)
+{
+ return BoundBox(min(bbox.min, pt), max(bbox.max, pt));
+}
+
+__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b)
+{
+ return BoundBox(min(a.min, b.min), max(a.max, b.max));
+}
+
+__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b, const BoundBox& c, const BoundBox& d)
+{
+ return merge(merge(a, b), merge(c, d));
+}
+
+__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b)
+{
+ return BoundBox(max(a.min, b.min), min(a.max, b.max));
+}
+
+__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b, const BoundBox& c)
+{
+ return intersect(a, intersect(b, c));
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_BOUNDBOX_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 019dede07fa..33e351c74e9 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -182,93 +182,74 @@ __device_inline float average(const float2 a)
__device_inline float2 operator-(const float2 a)
{
- float2 r = {-a.x, -a.y};
- return r;
+ return make_float2(-a.x, -a.y);
}
__device_inline float2 operator*(const float2 a, const float2 b)
{
- float2 r = {a.x*b.x, a.y*b.y};
- return r;
+ return make_float2(a.x*b.x, a.y*b.y);
}
__device_inline float2 operator*(const float2 a, float f)
{
- float2 r = {a.x*f, a.y*f};
- return r;
+ return make_float2(a.x*f, a.y*f);
}
__device_inline float2 operator*(float f, const float2 a)
{
- float2 r = {a.x*f, a.y*f};
- return r;
+ return make_float2(a.x*f, a.y*f);
}
__device_inline float2 operator/(float f, const float2 a)
{
- float2 r = {f/a.x, f/a.y};
- return r;
+ return make_float2(f/a.x, f/a.y);
}
__device_inline float2 operator/(const float2 a, float f)
{
float invf = 1.0f/f;
- float2 r = {a.x*invf, a.y*invf};
- return r;
+ return make_float2(a.x*invf, a.y*invf);
}
__device_inline float2 operator/(const float2 a, const float2 b)
{
- float2 r = {a.x/b.x, a.y/b.y};
- return r;
+ return make_float2(a.x/b.x, a.y/b.y);
}
__device_inline float2 operator+(const float2 a, const float2 b)
{
- float2 r = {a.x+b.x, a.y+b.y};
- return r;
+ return make_float2(a.x+b.x, a.y+b.y);
}
__device_inline float2 operator-(const float2 a, const float2 b)
{
- float2 r = {a.x-b.x, a.y-b.y};
- return r;
+ return make_float2(a.x-b.x, a.y-b.y);
}
__device_inline float2 operator+=(float2& a, const float2 b)
{
- a.x += b.x;
- a.y += b.y;
- return a;
+ return a = a + b;
}
__device_inline float2 operator*=(float2& a, const float2 b)
{
- a.x *= b.x;
- a.y *= b.y;
- return a;
+ return a = a * b;
}
__device_inline float2 operator*=(float2& a, float f)
{
- a.x *= f;
- a.y *= f;
- return a;
+ return a = a * f;
}
__device_inline float2 operator/=(float2& a, const float2 b)
{
- a.x /= b.x;
- a.y /= b.y;
- return a;
+ return a = a / b;
}
__device_inline float2 operator/=(float2& a, float f)
{
float invf = 1.0f/f;
- a.x *= invf;
- a.y *= invf;
- return a;
+ return a = a * invf;
}
@@ -314,14 +295,12 @@ __device_inline bool operator!=(const float2 a, const float2 b)
__device_inline float2 min(float2 a, float2 b)
{
- float2 r = {min(a.x, b.x), min(a.y, b.y)};
- return r;
+ return make_float2(min(a.x, b.x), min(a.y, b.y));
}
__device_inline float2 max(float2 a, float2 b)
{
- float2 r = {max(a.x, b.x), max(a.y, b.y)};
- return r;
+ return make_float2(max(a.x, b.x), max(a.y, b.y));
}
__device_inline float2 clamp(float2 a, float2 mn, float2 mx)
@@ -361,112 +340,78 @@ __device_inline float2 interp(float2 a, float2 b, float t)
/* Float3 Vector */
-__device_inline bool is_zero(const float3 a)
-{
- return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
-}
-
-__device_inline float average(const float3 a)
-{
- return (a.x + a.y + a.z)*(1.0f/3.0f);
-}
-
#ifndef __KERNEL_OPENCL__
__device_inline float3 operator-(const float3 a)
{
- float3 r = make_float3(-a.x, -a.y, -a.z);
- return r;
+ return make_float3(-a.x, -a.y, -a.z);
}
__device_inline float3 operator*(const float3 a, const float3 b)
{
- float3 r = make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
- return r;
+ return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
__device_inline float3 operator*(const float3 a, float f)
{
- float3 r = make_float3(a.x*f, a.y*f, a.z*f);
- return r;
+ return make_float3(a.x*f, a.y*f, a.z*f);
}
__device_inline float3 operator*(float f, const float3 a)
{
- float3 r = make_float3(a.x*f, a.y*f, a.z*f);
- return r;
+ return make_float3(a.x*f, a.y*f, a.z*f);
}
__device_inline float3 operator/(float f, const float3 a)
{
- float3 r = make_float3(f/a.x, f/a.y, f/a.z);
- return r;
+ return make_float3(f/a.x, f/a.y, f/a.z);
}
__device_inline float3 operator/(const float3 a, float f)
{
float invf = 1.0f/f;
- float3 r = make_float3(a.x*invf, a.y*invf, a.z*invf);
- return r;
+ return make_float3(a.x*invf, a.y*invf, a.z*invf);
}
__device_inline float3 operator/(const float3 a, const float3 b)
{
- float3 r = make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
- return r;
+ return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
}
__device_inline float3 operator+(const float3 a, const float3 b)
{
- float3 r = make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
- return r;
+ return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
__device_inline float3 operator-(const float3 a, const float3 b)
{
- float3 r = make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
- return r;
+ return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
__device_inline float3 operator+=(float3& a, const float3 b)
{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
- return a;
+ return a = a + b;
}
__device_inline float3 operator*=(float3& a, const float3 b)
{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
- return a;
+ return a = a * b;
}
__device_inline float3 operator*=(float3& a, float f)
{
- a.x *= f;
- a.y *= f;
- a.z *= f;
- return a;
+ return a = a * f;
}
__device_inline float3 operator/=(float3& a, const float3 b)
{
- a.x /= b.x;
- a.y /= b.y;
- a.z /= b.z;
- return a;
+ return a = a / b;
}
__device_inline float3 operator/=(float3& a, float f)
{
float invf = 1.0f/f;
- a.x *= invf;
- a.y *= invf;
- a.z *= invf;
- return a;
+ return a = a * invf;
}
__device_inline float dot(const float3 a, const float3 b)
@@ -506,7 +451,11 @@ __device_inline float3 normalize_len(const float3 a, float *t)
__device_inline bool operator==(const float3 a, const float3 b)
{
+#ifdef __KERNEL_SSE__
+ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
+#else
return (a.x == b.x && a.y == b.y && a.z == b.z);
+#endif
}
__device_inline bool operator!=(const float3 a, const float3 b)
@@ -516,14 +465,20 @@ __device_inline bool operator!=(const float3 a, const float3 b)
__device_inline float3 min(float3 a, float3 b)
{
- float3 r = make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_min_ps(a.m128, b.m128);
+#else
+ return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
}
__device_inline float3 max(float3 a, float3 b)
{
- float3 r = make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_max_ps(a.m128, b.m128);
+#else
+ return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
}
__device_inline float3 clamp(float3 a, float3 mn, float3 mx)
@@ -533,7 +488,12 @@ __device_inline float3 clamp(float3 a, float3 mn, float3 mx)
__device_inline float3 fabs(float3 a)
{
+#ifdef __KERNEL_SSE__
+ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+ return _mm_and_ps(a.m128, mask);
+#else
return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+#endif
}
#endif
@@ -555,6 +515,16 @@ __device_inline void print_float3(const char *label, const float3& a)
printf("%s: %.8f %.8f %.8f\n", label, a.x, a.y, a.z);
}
+__device_inline float3 rcp(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 r = _mm_rcp_ps(a.m128);
+ return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+#else
+ return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
+#endif
+}
+
#endif
__device_inline float3 interp(float3 a, float3 b, float t)
@@ -562,122 +532,257 @@ __device_inline float3 interp(float3 a, float3 b, float t)
return a + t*(b - a);
}
+__device_inline bool is_zero(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+ return a == make_float3(0.0f);
+#else
+ return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
+#endif
+}
+
+__device_inline float reduce_add(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+ return (a.x + a.y + a.z);
+#else
+ return (a.x + a.y + a.z);
+#endif
+}
+
+__device_inline float average(const float3 a)
+{
+ return reduce_add(a)*(1.0f/3.0f);
+}
+
/* Float4 Vector */
-#ifndef __KERNEL_OPENCL__
+#ifdef __KERNEL_SSE__
-__device_inline bool is_zero(const float4& a)
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> __forceinline const float4 shuffle(const float4& b)
{
- return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+ return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)));
}
-__device_inline float average(const float4& a)
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b)
{
- return (a.x + a.y + a.z + a.w)*(1.0f/4.0f);
+ return _mm_moveldup_ps(b);
}
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b)
+{
+ return _mm_movehdup_ps(b);
+}
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b)
+{
+ return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)));
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
__device_inline float4 operator-(const float4& a)
{
- float4 r = {-a.x, -a.y, -a.z, -a.w};
- return r;
+#ifdef __KERNEL_SSE__
+ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+ return _mm_xor_ps(a.m128, mask);
+#else
+ return make_float4(-a.x, -a.y, -a.z, -a.w);
+#endif
}
__device_inline float4 operator*(const float4& a, const float4& b)
{
- float4 r = {a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w};
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_mul_ps(a.m128, b.m128);
+#else
+ return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+#endif
}
__device_inline float4 operator*(const float4& a, float f)
{
- float4 r = {a.x*f, a.y*f, a.z*f, a.w*f};
- return r;
+#ifdef __KERNEL_SSE__
+ return a * make_float4(f);
+#else
+ return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
+#endif
}
__device_inline float4 operator*(float f, const float4& a)
{
- float4 r = {a.x*f, a.y*f, a.z*f, a.w*f};
- return r;
+ return a * f;
+}
+
+__device_inline float4 rcp(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 r = _mm_rcp_ps(a.m128);
+ return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+#else
+ return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w);
+#endif
}
__device_inline float4 operator/(const float4& a, float f)
{
- float invf = 1.0f/f;
- float4 r = {a.x*invf, a.y*invf, a.z*invf, a.w*invf};
- return r;
+ return a * (1.0f/f);
}
__device_inline float4 operator/(const float4& a, const float4& b)
{
- float4 r = {a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w};
- return r;
+#ifdef __KERNEL_SSE__
+ return a * rcp(b);
+#else
+ return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+#endif
+
}
__device_inline float4 operator+(const float4& a, const float4& b)
{
- float4 r = {a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w};
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_add_ps(a.m128, b.m128);
+#else
+ return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+#endif
}
__device_inline float4 operator-(const float4& a, const float4& b)
{
- float4 r = {a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w};
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_sub_ps(a.m128, b.m128);
+#else
+ return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+#endif
}
__device_inline float4 operator+=(float4& a, const float4& b)
{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
- a.w += b.w;
- return a;
+ return a = a + b;
}
__device_inline float4 operator*=(float4& a, const float4& b)
{
- a.x *= b.x;
- a.y *= b.y;
- a.z *= b.z;
- a.w *= b.w;
- return a;
+ return a = a * b;
}
__device_inline float4 operator/=(float4& a, float f)
{
- float invf = 1.0f/f;
- a.x *= invf;
- a.y *= invf;
- a.z *= invf;
- a.w *= invf;
- return a;
+ return a = a / f;
}
-__device_inline float dot(const float4& a, const float4& b)
+__device_inline int4 operator<(const float4& a, const float4& b)
{
- return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+#ifdef __KERNEL_SSE__
+ return _mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)); /* todo: avoid cvt */
+#else
+ return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#endif
+}
+
+__device_inline int4 operator>=(float4 a, float4 b)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */
+#else
+ return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#endif
+}
+
+__device_inline int4 operator<=(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)); /* todo: avoid cvt */
+#else
+ return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
+#endif
+}
+
+__device_inline bool operator==(const float4 a, const float4 b)
+{
+#ifdef __KERNEL_SSE__
+ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
+#else
+ return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
+#endif
}
__device_inline float4 cross(const float4& a, const float4& b)
{
- float4 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f};
- return r;
+#ifdef __KERNEL_SSE__
+ return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b));
+#else
+ return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f);
+#endif
}
__device_inline float4 min(float4 a, float4 b)
{
+#ifdef __KERNEL_SSE__
+ return _mm_min_ps(a.m128, b.m128);
+#else
return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+#endif
}
__device_inline float4 max(float4 a, float4 b)
{
+#ifdef __KERNEL_SSE__
+ return _mm_max_ps(a.m128, b.m128);
+#else
return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+#endif
}
#endif
#ifndef __KERNEL_GPU__
+__device_inline float4 select(const int4& mask, const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+ /* blendv is sse4, and apparently broken on vs2008 */
+ return _mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)); /* todo: avoid cvt */
+#else
+ return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
+#endif
+}
+
+__device_inline float4 reduce_min(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 h = min(shuffle<1,0,3,2>(a), a);
+ return min(shuffle<2,3,0,1>(h), h);
+#else
+ return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
+#endif
+}
+
+__device_inline float4 reduce_max(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 h = max(shuffle<1,0,3,2>(a), a);
+ return max(shuffle<2,3,0,1>(h), h);
+#else
+ return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
+#endif
+}
+
+#if 0
+__device_inline float4 reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 h = shuffle<1,0,3,2>(a) + a;
+ return shuffle<2,3,0,1>(h) + h;
+#else
+ return make_float4((a.x + a.y) + (a.z + a.w));
+#endif
+}
+#endif
+
__device_inline void print_float4(const char *label, const float4& a)
{
printf("%s: %.8f %.8f %.8f %.8f\n", label, a.x, a.y, a.z, a.w);
@@ -685,26 +790,77 @@ __device_inline void print_float4(const char *label, const float4& a)
#endif
+#ifndef __KERNEL_OPENCL__
+
+__device_inline bool is_zero(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ return a == make_float4(0.0f);
+#else
+ return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+#endif
+}
+
+__device_inline float reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+ float4 h = shuffle<1,0,3,2>(a) + a;
+ return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
+#else
+ return ((a.x + a.y) + (a.z + a.w));
+#endif
+}
+
+__device_inline float average(const float4& a)
+{
+ return reduce_add(a) * 0.25f;
+}
+
+__device_inline float dot(const float4& a, const float4& b)
+{
+ return reduce_add(a * b);
+}
+
+#endif
+
/* Int3 */
#ifndef __KERNEL_OPENCL__
+__device_inline int3 min(int3 a, int3 b)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_min_epi32(a.m128, b.m128);
+#else
+ return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
+}
+
__device_inline int3 max(int3 a, int3 b)
{
- int3 r = {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
- return r;
+#ifdef __KERNEL_SSE__
+ return _mm_max_epi32(a.m128, b.m128);
+#else
+ return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
}
__device_inline int3 clamp(const int3& a, int mn, int mx)
{
- int3 r = {clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)};
- return r;
+#ifdef __KERNEL_SSE__
+ return min(max(a, make_int3(mn)), make_int3(mx));
+#else
+ return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
+#endif
}
__device_inline int3 clamp(const int3& a, int3& mn, int mx)
{
- int3 r = {clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)};
- return r;
+#ifdef __KERNEL_SSE__
+ return min(max(a, mn), make_int3(mx));
+#else
+ return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
+#endif
}
#endif
@@ -720,16 +876,63 @@ __device_inline void print_int3(const char *label, const int3& a)
/* Int4 */
-#ifndef __KERNEL_OPENCL__
+#ifndef __KERNEL_GPU__
-__device_inline int4 operator>=(float4 a, float4 b)
+__device_inline int4 operator+(const int4& a, const int4& b)
{
- return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#ifdef __KERNEL_SSE__
+ return _mm_add_epi32(a.m128, b.m128);
+#else
+ return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+#endif
+}
+
+__device_inline int4 operator+=(int4& a, const int4& b)
+{
+ return a = a + b;
}
+__device_inline int4 operator>>(const int4& a, int i)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_srai_epi32(a.m128, i);
+#else
+ return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
#endif
+}
-#ifndef __KERNEL_GPU__
+__device_inline int4 min(int4 a, int4 b)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_min_epi32(a.m128, b.m128);
+#else
+ return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+#endif
+}
+
+__device_inline int4 max(int4 a, int4 b)
+{
+#ifdef __KERNEL_SSE__
+ return _mm_max_epi32(a.m128, b.m128);
+#else
+ return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+#endif
+}
+
+__device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx)
+{
+ return min(max(a, mn), mx);
+}
+
+__device_inline int4 select(const int4& mask, const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ __m128 m = _mm_cvtepi32_ps(mask);
+ return _mm_castps_si128(_mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))); /* todo: avoid cvt */
+#else
+ return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
+#endif
+}
__device_inline void print_int4(const char *label, const int4& a)
{
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
new file mode 100644
index 00000000000..6da9a70ec0c
--- /dev/null
+++ b/intern/cycles/util/util_task.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util_debug.h"
+#include "util_foreach.h"
+#include "util_system.h"
+#include "util_task.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Task Pool */
+
+TaskPool::TaskPool(const TaskRunFunction& run_)
+{
+ num = 0;
+ num_done = 0;
+
+ do_cancel = false;
+
+ run = run_;
+}
+
+TaskPool::~TaskPool()
+{
+ stop();
+}
+
+void TaskPool::push(Task *task, bool front)
+{
+ TaskScheduler::Entry entry;
+
+ entry.task = task;
+ entry.pool = this;
+
+ TaskScheduler::push(entry, front);
+}
+
+void TaskPool::wait()
+{
+ thread_scoped_lock lock(done_mutex);
+
+ while(num_done != num)
+ done_cond.wait(lock);
+}
+
+void TaskPool::cancel()
+{
+ TaskScheduler::clear(this);
+
+ do_cancel = true;
+ wait();
+ do_cancel = false;
+}
+
+void TaskPool::stop()
+{
+ TaskScheduler::clear(this);
+
+ assert(num_done == num);
+}
+
+bool TaskPool::cancelled()
+{
+ return do_cancel;
+}
+
+void TaskPool::done_increase(int done)
+{
+ done_mutex.lock();
+ num_done += done;
+ done_mutex.unlock();
+
+ assert(num_done <= num);
+ done_cond.notify_all();
+}
+
+/* Task Scheduler */
+
+thread_mutex TaskScheduler::mutex;
+int TaskScheduler::users = 0;
+vector<thread*> TaskScheduler::threads;
+volatile bool TaskScheduler::do_exit = false;
+
+list<TaskScheduler::Entry> TaskScheduler::queue;
+thread_mutex TaskScheduler::queue_mutex;
+thread_condition_variable TaskScheduler::queue_cond;
+
+void TaskScheduler::init(int num_threads)
+{
+ thread_scoped_lock lock(mutex);
+
+ /* multiple cycles instances can use this task scheduler, sharing the same
+ threads, so we keep track of the number of users. */
+ if(users == 0) {
+ do_exit = false;
+
+ /* launch threads that will be waiting for work */
+ if(num_threads == 0)
+ num_threads = system_cpu_thread_count();
+
+ threads.resize(num_threads);
+
+ for(size_t i = 0; i < threads.size(); i++)
+ threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i));
+ }
+
+ users++;
+}
+
+void TaskScheduler::exit()
+{
+ thread_scoped_lock lock(mutex);
+
+ users--;
+
+ if(users == 0) {
+ /* stop all waiting threads */
+ do_exit = true;
+ TaskScheduler::queue_cond.notify_all();
+
+ /* delete threads */
+ foreach(thread *t, threads) {
+ t->join();
+ delete t;
+ }
+
+ threads.clear();
+ }
+}
+
+bool TaskScheduler::thread_wait_pop(Entry& entry)
+{
+ thread_scoped_lock lock(queue_mutex);
+
+ while(queue.empty() && !do_exit)
+ queue_cond.wait(lock);
+
+ if(queue.empty()) {
+ assert(do_exit);
+ return false;
+ }
+
+ entry = queue.front();
+ queue.pop_front();
+
+ return true;
+}
+
+void TaskScheduler::thread_run(int thread_id)
+{
+ Entry entry;
+
+ /* todo: test affinity/denormal mask */
+
+ /* keep popping off tasks */
+ while(thread_wait_pop(entry)) {
+ /* run task */
+ entry.pool->run(entry.task, thread_id);
+
+ /* delete task */
+ delete entry.task;
+
+ /* notify pool task was done */
+ entry.pool->done_increase(1);
+ }
+}
+
+void TaskScheduler::push(Entry& entry, bool front)
+{
+ /* add entry to queue */
+ TaskScheduler::queue_mutex.lock();
+ if(front)
+ TaskScheduler::queue.push_front(entry);
+ else
+ TaskScheduler::queue.push_back(entry);
+ entry.pool->num++;
+ TaskScheduler::queue_mutex.unlock();
+
+ TaskScheduler::queue_cond.notify_one();
+}
+
+void TaskScheduler::clear(TaskPool *pool)
+{
+ thread_scoped_lock lock(TaskScheduler::queue_mutex);
+
+ /* erase all tasks from this pool from the queue */
+ list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin();
+ int done = 0;
+
+ while(it != TaskScheduler::queue.end()) {
+ TaskScheduler::Entry& entry = *it;
+
+ if(entry.pool == pool) {
+ done++;
+ delete entry.task;
+
+ it = TaskScheduler::queue.erase(it);
+ }
+ else
+ it++;
+ }
+
+ /* notify done */
+ pool->done_increase(done);
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
new file mode 100644
index 00000000000..acdb2cb50a2
--- /dev/null
+++ b/intern/cycles/util/util_task.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_TASK_H__
+#define __UTIL_TASK_H__
+
+#include "util_list.h"
+#include "util_thread.h"
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Task;
+class TaskPool;
+class TaskScheduler;
+
+typedef boost::function<void(Task*,int)> TaskRunFunction;
+
+/* Task
+ *
+ * Base class for tasks to be executed in threads. */
+
+class Task
+{
+public:
+ Task() {};
+ virtual ~Task() {}
+};
+
+/* Task Pool
+ *
+ * Pool of tasks that will be executed by the central TaskScheduler.For each
+ * pool, we can wait for all tasks to be done, or cancel them before they are
+ * done.
+ *
+ * The run callback that actually executes the task may be create like this:
+ * function_bind(&MyClass::task_execute, this, _1, _2) */
+
+class TaskPool
+{
+public:
+ TaskPool(const TaskRunFunction& run);
+ ~TaskPool();
+
+ void push(Task *task, bool front = false);
+
+ void wait(); /* wait until all tasks are done */
+ void cancel(); /* cancel all tasks, keep worker threads running */
+ void stop(); /* stop all worker threads */
+
+ bool cancelled(); /* for worker threads, test if cancelled */
+
+protected:
+ friend class TaskScheduler;
+
+ void done_increase(int done);
+
+ TaskRunFunction run;
+
+ thread_mutex done_mutex;
+ thread_condition_variable done_cond;
+
+ volatile int num, num_done;
+ volatile bool do_cancel;
+};
+
+/* Task Scheduler
+ *
+ * Central scheduler that holds running threads ready to execute tasks. A singe
+ * queue holds the task from all pools. */
+
+class TaskScheduler
+{
+public:
+ static void init(int num_threads = 0);
+ static void exit();
+
+ static int num_threads() { return threads.size(); }
+
+protected:
+ friend class TaskPool;
+
+ struct Entry {
+ Task *task;
+ TaskPool *pool;
+ };
+
+ static thread_mutex mutex;
+ static int users;
+ static vector<thread*> threads;
+ static volatile bool do_exit;
+
+ static list<Entry> queue;
+ static thread_mutex queue_mutex;
+ static thread_condition_variable queue_cond;
+
+ static void thread_run(int thread_id);
+ static bool thread_wait_pop(Entry& entry);
+
+ static void push(Entry& entry, bool front);
+ static void clear(TaskPool *pool);
+};
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index 6836be203f5..3d15b342fe5 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -69,133 +69,6 @@ protected:
bool joined;
};
-/* Thread Safe Queue to pass tasks from one thread to another. Tasks should be
- * pushed into the queue, while the worker thread waits to pop the next task
- * off the queue. Once all tasks are into the queue, calling stop() will stop
- * the worker threads from waiting for more tasks once all tasks are done. */
-
-template<typename T> class ThreadQueue
-{
-public:
- ThreadQueue()
- {
- tot = 0;
- tot_done = 0;
- do_stop = false;
- do_cancel = false;
- }
-
- /* Main thread functions */
-
- /* push a task to be executed */
- void push(const T& value)
- {
- thread_scoped_lock lock(queue_mutex);
- queue.push(value);
- tot++;
- lock.unlock();
-
- queue_cond.notify_one();
- }
-
- /* wait until all tasks are done */
- void wait_done()
- {
- thread_scoped_lock lock(done_mutex);
-
- while(tot_done != tot)
- done_cond.wait(lock);
- }
-
- /* stop all worker threads */
- void stop()
- {
- clear();
- do_stop = true;
- queue_cond.notify_all();
- }
-
- /* cancel all tasks, but keep worker threads running */
- void cancel()
- {
- clear();
- do_cancel = true;
- wait_done();
- do_cancel = false;
- }
-
- /* Worker thread functions
- *
- * while(queue.worker_wait_pop(task)) {
- * for(..) {
- * ... do work ...
- *
- * if(queue.worker_cancel())
- * break;
- * }
- *
- * queue.worker_done();
- * }
- */
-
- bool worker_wait_pop(T& value)
- {
- thread_scoped_lock lock(queue_mutex);
-
- while(queue.empty() && !do_stop)
- queue_cond.wait(lock);
-
- if(queue.empty())
- return false;
-
- value = queue.front();
- queue.pop();
-
- return true;
- }
-
- void worker_done()
- {
- thread_scoped_lock lock(done_mutex);
- tot_done++;
- lock.unlock();
-
- assert(tot_done <= tot);
-
- done_cond.notify_all();
- }
-
- bool worker_cancel()
- {
- return do_cancel;
- }
-
-protected:
- void clear()
- {
- thread_scoped_lock lock(queue_mutex);
-
- while(!queue.empty()) {
- thread_scoped_lock done_lock(done_mutex);
- tot_done++;
- done_lock.unlock();
-
- queue.pop();
- }
-
- done_cond.notify_all();
- }
-
- std::queue<T> queue;
- thread_mutex queue_mutex;
- thread_mutex done_mutex;
- thread_condition_variable queue_cond;
- thread_condition_variable done_cond;
- volatile bool do_stop;
- volatile bool do_cancel;
- volatile int tot, tot_done;
-};
-
/* Thread Local Storage
*
* Boost implementation is a bit slow, and Mac OS X __thread is not supported
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 61bc36ae888..0fd26825911 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -129,23 +129,26 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
Transform transform_inverse(const Transform& tfm)
{
- union { Transform T; float M[4][4]; } R, M;
-
- R.T = transform_identity();
- M.T = tfm;
+ Transform tfmR = transform_identity();
+ float M[4][4], R[4][4];
- if(!transform_matrix4_gj_inverse(R.M, M.M)) {
+ memcpy(R, &tfmR, sizeof(R));
+ memcpy(M, &tfm, sizeof(M));
+
+ if(!transform_matrix4_gj_inverse(R, M)) {
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
never be in this situation, but try to invert it anyway with tweak */
- M.M[0][0] += 1e-8f;
- M.M[1][1] += 1e-8f;
- M.M[2][2] += 1e-8f;
+ M[0][0] += 1e-8f;
+ M[1][1] += 1e-8f;
+ M[2][2] += 1e-8f;
- if(!transform_matrix4_gj_inverse(R.M, M.M))
+ if(!transform_matrix4_gj_inverse(R, M))
return transform_identity();
}
- return R.T;
+ memcpy(&tfmR, R, sizeof(R));
+
+ return tfmR;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index efdda98571a..cf167707e47 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -36,23 +36,37 @@
#define __shared
#define __constant
-#ifdef __GNUC__
-#define __device_inline static inline __attribute__((always_inline))
-#else
+#ifdef _WIN32
#define __device_inline static __forceinline
+#define __align(...) __declspec(align(__VA_ARGS__))
+#else
+#define __device_inline static inline __attribute__((always_inline))
+#define __forceinline inline __attribute__((always_inline))
+#define __align(...) __attribute__((aligned(__VA_ARGS__)))
#endif
#endif
+/* Bitness */
+
+#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
+#define __KERNEL_64_BIT__
+#endif
+
/* SIMD Types */
-/* not needed yet, will be for qbvh
-#ifndef __KERNEL_GPU__
+/* not enabled, globally applying it just gives slowdown,
+ * but useful for testing. */
+//#define __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
-#include <emmintrin.h>
-#include <xmmintrin.h>
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+#include <pmmintrin.h> /* SSE 3 */
+#include <tmmintrin.h> /* SSE 3 */
+#include <smmintrin.h> /* SSE 4 */
-#endif*/
+#endif
#ifndef _WIN32
#ifndef __KERNEL_GPU__
@@ -97,6 +111,12 @@ typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
+#ifdef __KERNEL_64_BIT__
+typedef int64_t ssize_t;
+#else
+typedef int32_t ssize_t;
+#endif
+
#endif
/* Generic Memory Pointer */
@@ -108,89 +128,137 @@ typedef uint64_t device_ptr;
struct uchar2 {
uchar x, y;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct uchar3 {
uchar x, y, z;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct uchar4 {
uchar x, y, z, w;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct int2 {
int x, y;
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) int3 {
+ union {
+ __m128i m128;
+ struct { int x, y, z, w; };
+ };
+
+ __forceinline int3() {}
+ __forceinline int3(const __m128i a) : m128(a) {}
+ __forceinline operator const __m128i&(void) const { return m128; }
+ __forceinline operator __m128i&(void) { return m128; }
+#else
struct int3 {
- int x, y, z;
+ int x, y, z, w;
+#endif
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) int4 {
+ union {
+ __m128i m128;
+ struct { int x, y, z, w; };
+ };
+
+ __forceinline int4() {}
+ __forceinline int4(const __m128i a) : m128(a) {}
+ __forceinline operator const __m128i&(void) const { return m128; }
+ __forceinline operator __m128i&(void) { return m128; }
+#else
struct int4 {
int x, y, z, w;
+#endif
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
struct uint2 {
uint x, y;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct uint3 {
uint x, y, z;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct uint4 {
uint x, y, z, w;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct float2 {
float x, y;
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) float3 {
+ union {
+ __m128 m128;
+ struct { float x, y, z, w; };
+ };
+
+ __forceinline float3() {}
+ __forceinline float3(const __m128 a) : m128(a) {}
+ __forceinline operator const __m128&(void) const { return m128; }
+ __forceinline operator __m128&(void) { return m128; }
+#else
struct float3 {
- float x, y, z;
-
-#ifdef WITH_OPENCL
- float w;
+ float x, y, z, w;
#endif
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) float4 {
+ union {
+ __m128 m128;
+ struct { float x, y, z, w; };
+ };
+
+ __forceinline float4() {}
+ __forceinline float4(const __m128 a) : m128(a) {}
+ __forceinline operator const __m128&(void) const { return m128; }
+ __forceinline operator __m128&(void) { return m128; }
+#else
struct float4 {
float x, y, z, w;
+#endif
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
#endif
@@ -201,87 +269,179 @@ struct float4 {
*
* OpenCL does not support C++ class, so we use these instead. */
-__device uchar2 make_uchar2(uchar x, uchar y)
+__device_inline uchar2 make_uchar2(uchar x, uchar y)
{
uchar2 a = {x, y};
return a;
}
-__device uchar3 make_uchar3(uchar x, uchar y, uchar z)
+__device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
{
uchar3 a = {x, y, z};
return a;
}
-__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
+__device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
{
uchar4 a = {x, y, z, w};
return a;
}
-__device int2 make_int2(int x, int y)
+__device_inline int2 make_int2(int x, int y)
{
int2 a = {x, y};
return a;
}
-__device int3 make_int3(int x, int y, int z)
+__device_inline int3 make_int3(int x, int y, int z)
{
- int3 a = {x, y, z};
+#ifdef __KERNEL_SSE__
+ int3 a;
+ a.m128 = _mm_set_epi32(0, z, y, x);
+#else
+ int3 a = {x, y, z, 0};
+#endif
+
return a;
}
-__device int4 make_int4(int x, int y, int z, int w)
+__device_inline int4 make_int4(int x, int y, int z, int w)
{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_set_epi32(w, z, y, x);
+#else
int4 a = {x, y, z, w};
+#endif
+
return a;
}
-__device uint2 make_uint2(uint x, uint y)
+__device_inline uint2 make_uint2(uint x, uint y)
{
uint2 a = {x, y};
return a;
}
-__device uint3 make_uint3(uint x, uint y, uint z)
+__device_inline uint3 make_uint3(uint x, uint y, uint z)
{
uint3 a = {x, y, z};
return a;
}
-__device uint4 make_uint4(uint x, uint y, uint z, uint w)
+__device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
{
uint4 a = {x, y, z, w};
return a;
}
-__device float2 make_float2(float x, float y)
+__device_inline float2 make_float2(float x, float y)
{
float2 a = {x, y};
return a;
}
-__device float3 make_float3(float x, float y, float z)
+__device_inline float3 make_float3(float x, float y, float z)
{
-#ifdef WITH_OPENCL
- float3 a = {x, y, z, 0.0f};
+#ifdef __KERNEL_SSE__
+ float3 a;
+ a.m128 = _mm_set_ps(0.0f, z, y, x);
#else
- float3 a = {x, y, z};
+ float3 a = {x, y, z, 0.0f};
#endif
+
return a;
}
-__device float4 make_float4(float x, float y, float z, float w)
+__device_inline float4 make_float4(float x, float y, float z, float w)
{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_set_ps(w, z, y, x);
+#else
float4 a = {x, y, z, w};
+#endif
+
return a;
}
-__device int align_up(int offset, int alignment)
+__device_inline int align_up(int offset, int alignment)
{
return (offset + alignment - 1) & ~(alignment - 1);
}
+__device_inline int3 make_int3(int i)
+{
+#ifdef __KERNEL_SSE__
+ int3 a;
+ a.m128 = _mm_set1_epi32(i);
+#else
+ int3 a = {i, i, i, i};
+#endif
+
+ return a;
+}
+
+__device_inline int4 make_int4(int i)
+{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_set1_epi32(i);
+#else
+ int4 a = {i, i, i, i};
+#endif
+
+ return a;
+}
+
+__device_inline float3 make_float3(float f)
+{
+#ifdef __KERNEL_SSE__
+ float3 a;
+ a.m128 = _mm_set1_ps(f);
+#else
+ float3 a = {f, f, f, f};
+#endif
+
+ return a;
+}
+
+__device_inline float4 make_float4(float f)
+{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_set1_ps(f);
+#else
+ float4 a = {f, f, f, f};
+#endif
+
+ return a;
+}
+
+__device_inline float4 make_float4(const int4& i)
+{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_cvtepi32_ps(i.m128);
+#else
+ float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
+#endif
+
+ return a;
+}
+
+__device_inline int4 make_int4(const float3& f)
+{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_cvtps_epi32(f.m128);
+#else
+ int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#endif
+
+ return a;
+}
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/ghost/GHOST_Rect.h b/intern/ghost/GHOST_Rect.h
index bcbcaded364..30d9d16b701 100644
--- a/intern/ghost/GHOST_Rect.h
+++ b/intern/ghost/GHOST_Rect.h
@@ -241,8 +241,10 @@ inline void GHOST_Rect::wrapPoint(GHOST_TInt32 &x, GHOST_TInt32 &y, GHOST_TInt32
GHOST_TInt32 h= getHeight();
/* highly unlikely but avoid eternal loop */
- if(w-ofs*2 <= 0 || h-ofs*2 <= 0)
+ if (w-ofs*2 <= 0 || h-ofs*2 <= 0) {
return;
+ }
+
while(x-ofs < m_l) x+= w-(ofs*2);
while(y-ofs < m_t) y+= h-(ofs*2);
while(x+ofs > m_r) x-= w-(ofs*2);
diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c
index 2a6a0df0ff4..bb3a1c66ddc 100644
--- a/intern/guardedalloc/intern/mallocn.c
+++ b/intern/guardedalloc/intern/mallocn.c
@@ -243,7 +243,7 @@ void *MEM_dupallocN(void *vmemh)
MemHead *memh= vmemh;
memh--;
- if(memh->mmap)
+ if (memh->mmap)
newp= MEM_mapallocN(memh->len, "dupli_mapalloc");
else
newp= MEM_mallocN(memh->len, "dupli_alloc");
@@ -265,8 +265,8 @@ void *MEM_reallocN(void *vmemh, size_t len)
memh--;
newp= MEM_mallocN(len, memh->name);
- if(newp) {
- if(len < memh->len)
+ if (newp) {
+ if (len < memh->len)
memcpy(newp, vmemh, len);
else
memcpy(newp, vmemh, memh->len);
@@ -311,14 +311,14 @@ void *MEM_mallocN(size_t len, const char *str)
memh= (MemHead *)malloc(len+sizeof(MemHead)+sizeof(MemTail));
- if(memh) {
+ if (memh) {
make_memhead_header(memh, len, str);
mem_unlock_thread();
- if(malloc_debug_memset && len)
+ if (malloc_debug_memset && len)
memset(memh+1, 255, len);
#ifdef DEBUG_MEMCOUNTER
- if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
+ if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
memcount_raise(__func__);
memh->_count= _mallocn_count++;
#endif
@@ -339,11 +339,11 @@ void *MEM_callocN(size_t len, const char *str)
memh= (MemHead *)calloc(len+sizeof(MemHead)+sizeof(MemTail),1);
- if(memh) {
+ if (memh) {
make_memhead_header(memh, len, str);
mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER
- if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
+ if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
memcount_raise(__func__);
memh->_count= _mallocn_count++;
#endif
@@ -366,14 +366,14 @@ void *MEM_mapallocN(size_t len, const char *str)
memh= mmap(NULL, len+sizeof(MemHead)+sizeof(MemTail),
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0);
- if(memh!=(MemHead *)-1) {
+ if (memh!=(MemHead *)-1) {
make_memhead_header(memh, len, str);
memh->mmap= 1;
mmap_in_use += len;
peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem;
mem_unlock_thread();
#ifdef DEBUG_MEMCOUNTER
- if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
+ if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL)
memcount_raise(__func__);
memh->_count= _mallocn_count++;
#endif
@@ -406,9 +406,9 @@ static int compare_len(const void *p1, const void *p2)
const MemPrintBlock *pb1= (const MemPrintBlock*)p1;
const MemPrintBlock *pb2= (const MemPrintBlock*)p2;
- if(pb1->len < pb2->len)
+ if (pb1->len < pb2->len)
return 1;
- else if(pb1->len == pb2->len)
+ else if (pb1->len == pb2->len)
return 0;
else
return -1;
@@ -431,7 +431,7 @@ void MEM_printmemlist_stats(void)
membl = membase->first;
if (membl) membl = MEMNEXT(membl);
- while(membl) {
+ while (membl) {
pb->name= membl->name;
pb->len= membl->len;
pb->items= 1;
@@ -439,18 +439,18 @@ void MEM_printmemlist_stats(void)
totpb++;
pb++;
- if(membl->next)
+ if (membl->next)
membl= MEMNEXT(membl->next);
else break;
}
/* sort by name and add together blocks with the same name */
qsort(printblock, totpb, sizeof(MemPrintBlock), compare_name);
- for(a=0, b=0; a<totpb; a++) {
- if(a == b) {
+ for (a = 0, b=0; a<totpb; a++) {
+ if (a == b) {
continue;
}
- else if(strcmp(printblock[a].name, printblock[b].name) == 0) {
+ else if (strcmp(printblock[a].name, printblock[b].name) == 0) {
printblock[b].len += printblock[a].len;
printblock[b].items++;
}
@@ -465,7 +465,7 @@ void MEM_printmemlist_stats(void)
qsort(printblock, totpb, sizeof(MemPrintBlock), compare_len);
printf("\ntotal memory len: %.3f MB\n", (double)mem_in_use/(double)(1024*1024));
printf(" ITEMS TOTAL-MiB AVERAGE-KiB TYPE\n");
- for(a=0, pb=printblock; a<totpb; a++, pb++)
+ for (a = 0, pb=printblock; a<totpb; a++, pb++)
printf("%6d (%8.3f %8.3f) %s\n", pb->items, (double)pb->len/(double)(1024*1024), (double)pb->len/1024.0/(double)pb->items, pb->name);
free(printblock);
@@ -491,7 +491,7 @@ static void MEM_printmemlist_internal( int pydict )
print_error("# membase_debug.py\n");
print_error("membase = [\\\n");
}
- while(membl) {
+ while (membl) {
if (pydict) {
fprintf(stderr, "{'len':" SIZET_FORMAT ", 'name':'''%s''', 'pointer':'%p'},\\\n", SIZET_ARG(membl->len), membl->name, (void *)(membl+1));
} else {
@@ -501,7 +501,7 @@ static void MEM_printmemlist_internal( int pydict )
print_error("%s len: " SIZET_FORMAT " %p\n", membl->name, SIZET_ARG(membl->len), membl+1);
#endif
}
- if(membl->next)
+ if (membl->next)
membl= MEMNEXT(membl->next);
else break;
}
@@ -536,9 +536,9 @@ void MEM_callbackmemlist(void (*func)(void*)) {
membl = membase->first;
if (membl) membl = MEMNEXT(membl);
- while(membl) {
+ while (membl) {
func(membl+1);
- if(membl->next)
+ if (membl->next)
membl= MEMNEXT(membl->next);
else break;
}
@@ -554,13 +554,13 @@ short MEM_testN(void *vmemh) {
membl = membase->first;
if (membl) membl = MEMNEXT(membl);
- while(membl) {
+ while (membl) {
if (vmemh == membl+1) {
mem_unlock_thread();
return 1;
}
- if(membl->next)
+ if (membl->next)
membl= MEMNEXT(membl->next);
else break;
}
@@ -585,13 +585,13 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */
MemHead *memh= vmemh;
const char *name;
- if (memh == NULL){
+ if (memh == NULL) {
MemorY_ErroR("free","attempt to free NULL pointer");
/* print_error(err_stream, "%d\n", (memh+4000)->tag1); */
return(-1);
}
- if(sizeof(intptr_t)==8) {
+ if (sizeof(intptr_t)==8) {
if (((intptr_t) memh) & 0x7) {
MemorY_ErroR("free","attempt to free illegal pointer");
return(-1);
@@ -605,7 +605,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */
}
memh--;
- if(memh->tag1 == MEMFREE && memh->tag2 == MEMFREE) {
+ if (memh->tag1 == MEMFREE && memh->tag2 == MEMFREE) {
MemorY_ErroR(memh->name,"double free");
return(-1);
}
@@ -613,7 +613,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */
mem_lock_thread();
if ((memh->tag1 == MEMTAG1) && (memh->tag2 == MEMTAG2) && ((memh->len & 0x3) == 0)) {
memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + memh->len);
- if (memt->tag3 == MEMTAG3){
+ if (memt->tag3 == MEMTAG3) {
memh->tag1 = MEMFREE;
memh->tag2 = MEMFREE;
@@ -628,7 +628,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */
error = 2;
MemorY_ErroR(memh->name,"end corrupt");
name = check_memlist(memh);
- if (name != NULL){
+ if (name != NULL) {
if (name != memh->name) MemorY_ErroR(name,"is also corrupt");
}
} else{
@@ -694,13 +694,13 @@ static void rem_memblock(MemHead *memh)
totblock--;
mem_in_use -= memh->len;
- if(memh->mmap) {
+ if (memh->mmap) {
mmap_in_use -= memh->len;
if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail)))
printf("Couldn't unmap memory %s\n", memh->name);
}
else {
- if(malloc_debug_memset && memh->len)
+ if (malloc_debug_memset && memh->len)
memset(memh+1, 255, memh->len);
free(memh);
}
@@ -723,7 +723,7 @@ static const char *check_memlist(MemHead *memh)
forw = membase->first;
if (forw) forw = MEMNEXT(forw);
forwok = NULL;
- while(forw){
+ while (forw) {
if (forw->tag1 != MEMTAG1 || forw->tag2 != MEMTAG2) break;
forwok = forw;
if (forw->next) forw = MEMNEXT(forw->next);
@@ -733,7 +733,7 @@ static const char *check_memlist(MemHead *memh)
back = (MemHead *) membase->last;
if (back) back = MEMNEXT(back);
backok = NULL;
- while(back){
+ while (back) {
if (back->tag1 != MEMTAG1 || back->tag2 != MEMTAG2) break;
backok = back;
if (back->prev) back = MEMNEXT(back->prev);
@@ -742,13 +742,13 @@ static const char *check_memlist(MemHead *memh)
if (forw != back) return ("MORE THAN 1 MEMORYBLOCK CORRUPT");
- if (forw == NULL && back == NULL){
+ if (forw == NULL && back == NULL) {
/* geen foute headers gevonden dan maar op zoek naar memblock*/
forw = membase->first;
if (forw) forw = MEMNEXT(forw);
forwok = NULL;
- while(forw){
+ while (forw) {
if (forw == memh) break;
if (forw->tag1 != MEMTAG1 || forw->tag2 != MEMTAG2) break;
forwok = forw;
@@ -760,7 +760,7 @@ static const char *check_memlist(MemHead *memh)
back = (MemHead *) membase->last;
if (back) back = MEMNEXT(back);
backok = NULL;
- while(back){
+ while (back) {
if (back == memh) break;
if (back->tag1 != MEMTAG1 || back->tag2 != MEMTAG2) break;
backok = back;
@@ -772,10 +772,10 @@ static const char *check_memlist(MemHead *memh)
if (forwok) name = forwok->nextname;
else name = "No name found";
- if (forw == memh){
+ if (forw == memh) {
/* voor alle zekerheid wordt dit block maar uit de lijst gehaald */
- if (forwok){
- if (backok){
+ if (forwok) {
+ if (backok) {
forwok->next = (MemHead *)&backok->next;
backok->prev = (MemHead *)&forwok->next;
forwok->nextname = backok->name;
@@ -785,7 +785,7 @@ static const char *check_memlist(MemHead *memh)
/* membase->last = (struct Link *) &forwok->next; */
}
} else{
- if (backok){
+ if (backok) {
backok->prev = NULL;
membase->first = &backok->next;
} else{
diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c
index 2036e601bcb..24c77c439a7 100644
--- a/intern/mikktspace/mikktspace.c
+++ b/intern/mikktspace/mikktspace.c
@@ -193,7 +193,7 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
// this if is important. Due to floating point precision
// averaging when ts0==ts1 will cause a slight difference
// which results in tangent space splits later on
- if(pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT &&
+ if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT &&
veq(pTS0->vOs,pTS1->vOs) && veq(pTS0->vOt, pTS1->vOt))
{
ts_res.fMagS = pTS0->fMagS;
@@ -207,8 +207,8 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT);
ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs);
ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt);
- if( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs);
- if( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt);
+ if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs);
+ if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt);
}
return ts_res;
@@ -246,7 +246,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f);
// verify all call-backs have been set
- if( pContext->m_pInterface->m_getNumFaces==NULL ||
+ if ( pContext->m_pInterface->m_getNumFaces==NULL ||
pContext->m_pInterface->m_getNumVerticesOfFace==NULL ||
pContext->m_pInterface->m_getPosition==NULL ||
pContext->m_pInterface->m_getNormal==NULL ||
@@ -254,21 +254,21 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
return TFALSE;
// count triangles on supported faces
- for(f=0; f<iNrFaces; f++)
+ for (f=0; f<iNrFaces; f++)
{
const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
- if(verts==3) ++iNrTrianglesIn;
+ if (verts==3) ++iNrTrianglesIn;
else if(verts==4) iNrTrianglesIn += 2;
}
- if(iNrTrianglesIn<=0) return TFALSE;
+ if (iNrTrianglesIn<=0) return TFALSE;
// allocate memory for an index list
piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn);
pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn);
- if(piTriListIn==NULL || pTriInfos==NULL)
+ if (piTriListIn==NULL || pTriInfos==NULL)
{
- if(piTriListIn!=NULL) free(piTriListIn);
- if(pTriInfos!=NULL) free(pTriInfos);
+ if (piTriListIn!=NULL) free(piTriListIn);
+ if (pTriInfos!=NULL) free(pTriInfos);
return TFALSE;
}
@@ -283,7 +283,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
// Mark all degenerate triangles
iTotTris = iNrTrianglesIn;
iDegenTriangles = 0;
- for(t=0; t<iTotTris; t++)
+ for (t=0; t<iTotTris; t++)
{
const int i0 = piTriListIn[t*3+0];
const int i1 = piTriListIn[t*3+1];
@@ -291,7 +291,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
const SVec3 p0 = GetPosition(pContext, i0);
const SVec3 p1 = GetPosition(pContext, i1);
const SVec3 p2 = GetPosition(pContext, i2);
- if(veq(p0,p1) || veq(p0,p2) || veq(p1,p2)) // degenerate
+ if (veq(p0,p1) || veq(p0,p2) || veq(p1,p2)) // degenerate
{
pTriInfos[t].iFlag |= MARK_DEGENERATE;
++iDegenTriangles;
@@ -317,10 +317,10 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
iNrMaxGroups = iNrTrianglesIn*3;
pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups);
piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
- if(pGroups==NULL || piGroupTrianglesBuffer==NULL)
+ if (pGroups==NULL || piGroupTrianglesBuffer==NULL)
{
- if(pGroups!=NULL) free(pGroups);
- if(piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer);
+ if (pGroups!=NULL) free(pGroups);
+ if (piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer);
free(piTriListIn);
free(pTriInfos);
return TFALSE;
@@ -333,7 +333,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
//
psTspace = (STSpace *) malloc(sizeof(STSpace)*iNrTSPaces);
- if(psTspace==NULL)
+ if (psTspace==NULL)
{
free(piTriListIn);
free(pTriInfos);
@@ -342,7 +342,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
return TFALSE;
}
memset(psTspace, 0, sizeof(STSpace)*iNrTSPaces);
- for(t=0; t<iNrTSPaces; t++)
+ for (t=0; t<iNrTSPaces; t++)
{
psTspace[t].vOs.x=1.0f; psTspace[t].vOs.y=0.0f; psTspace[t].vOs.z=0.0f; psTspace[t].fMagS = 1.0f;
psTspace[t].vOt.x=0.0f; psTspace[t].vOt.y=1.0f; psTspace[t].vOt.z=0.0f; psTspace[t].fMagT = 1.0f;
@@ -359,7 +359,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
free(pGroups);
free(piGroupTrianglesBuffer);
- if(!bRes) // if an allocation in GenerateTSpaces() failed
+ if (!bRes) // if an allocation in GenerateTSpaces() failed
{
// clean up and return false
free(pTriInfos); free(piTriListIn); free(psTspace);
@@ -376,10 +376,10 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
free(pTriInfos); free(piTriListIn);
index = 0;
- for(f=0; f<iNrFaces; f++)
+ for (f=0; f<iNrFaces; f++)
{
const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
- if(verts!=3 && verts!=4) continue;
+ if (verts!=3 && verts!=4) continue;
// I've decided to let degenerate triangles and group-with-anythings
@@ -390,28 +390,28 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre
// (this is already the case for good triangles but not for
// degenerate ones and those with bGroupWithAnything==true)
bool bOrient = psTspace[index].bOrient;
- if(psTspace[index].iCounter == 0) // tspace was not derived from a group
+ if (psTspace[index].iCounter == 0) // tspace was not derived from a group
{
// look for a space created in GenerateTSpaces() by iCounter>0
bool bNotFound = true;
int i=1;
- while(i<verts && bNotFound)
+ while (i<verts && bNotFound)
{
- if(psTspace[index+i].iCounter > 0) bNotFound=false;
+ if (psTspace[index+i].iCounter > 0) bNotFound=false;
else ++i;
}
- if(!bNotFound) bOrient = psTspace[index+i].bOrient;
+ if (!bNotFound) bOrient = psTspace[index+i].bOrient;
}*/
// set data
- for(i=0; i<verts; i++)
+ for (i=0; i<verts; i++)
{
const STSpace * pTSpace = &psTspace[index];
float tang[] = {pTSpace->vOs.x, pTSpace->vOs.y, pTSpace->vOs.z};
float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z};
- if(pContext->m_pInterface->m_setTSpace!=NULL)
+ if (pContext->m_pInterface->m_setTSpace!=NULL)
pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i);
- if(pContext->m_pInterface->m_setTSpaceBasic!=NULL)
+ if (pContext->m_pInterface->m_setTSpaceBasic!=NULL)
pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i);
++index;
@@ -464,23 +464,23 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
int iMaxCount=0;
SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
float fMin, fMax;
- for(i=1; i<(iNrTrianglesIn*3); i++)
+ for (i=1; i<(iNrTrianglesIn*3); i++)
{
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
- if(vMin.x > vP.x) vMin.x = vP.x;
+ if (vMin.x > vP.x) vMin.x = vP.x;
else if(vMax.x < vP.x) vMax.x = vP.x;
- if(vMin.y > vP.y) vMin.y = vP.y;
+ if (vMin.y > vP.y) vMin.y = vP.y;
else if(vMax.y < vP.y) vMax.y = vP.y;
- if(vMin.z > vP.z) vMin.z = vP.z;
+ if (vMin.z > vP.z) vMin.z = vP.z;
else if(vMax.z < vP.z) vMax.z = vP.z;
}
vDim = vsub(vMax,vMin);
iChannel = 0;
fMin = vMin.x; fMax=vMax.x;
- if(vDim.y>vDim.x && vDim.y>vDim.z)
+ if (vDim.y>vDim.x && vDim.y>vDim.z)
{
iChannel=1;
fMin = vMin.y, fMax=vMax.y;
@@ -497,12 +497,12 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
- if(piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
+ if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
{
- if(piHashTable!=NULL) free(piHashTable);
- if(piHashCount!=NULL) free(piHashCount);
- if(piHashOffsets!=NULL) free(piHashOffsets);
- if(piHashCount2!=NULL) free(piHashCount2);
+ if (piHashTable!=NULL) free(piHashTable);
+ if (piHashCount!=NULL) free(piHashCount);
+ if (piHashOffsets!=NULL) free(piHashOffsets);
+ if (piHashCount2!=NULL) free(piHashCount2);
GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
return;
}
@@ -510,7 +510,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
memset(piHashCount2, 0, sizeof(int)*g_iCells);
// count amount of elements in each cell unit
- for(i=0; i<(iNrTrianglesIn*3); i++)
+ for (i=0; i<(iNrTrianglesIn*3); i++)
{
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
@@ -521,11 +521,11 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
// evaluate start index of each cell.
piHashOffsets[0]=0;
- for(k=1; k<g_iCells; k++)
+ for (k=1; k<g_iCells; k++)
piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
// insert vertices
- for(i=0; i<(iNrTrianglesIn*3); i++)
+ for (i=0; i<(iNrTrianglesIn*3); i++)
{
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
@@ -538,29 +538,29 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
pTable[piHashCount2[iCell]] = i; // vertex i has been inserted.
++piHashCount2[iCell];
}
- for(k=0; k<g_iCells; k++)
+ for (k=0; k<g_iCells; k++)
assert(piHashCount2[k] == piHashCount[k]); // verify the count
free(piHashCount2);
// find maximum amount of entries in any hash entry
iMaxCount = piHashCount[0];
- for(k=1; k<g_iCells; k++)
- if(iMaxCount<piHashCount[k])
+ for (k=1; k<g_iCells; k++)
+ if (iMaxCount<piHashCount[k])
iMaxCount=piHashCount[k];
pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
// complete the merge
- for(k=0; k<g_iCells; k++)
+ for (k=0; k<g_iCells; k++)
{
// extract table of cell k and amount of entries in it
int * pTable = &piHashTable[piHashOffsets[k]];
const int iEntries = piHashCount[k];
- if(iEntries < 2) continue;
+ if (iEntries < 2) continue;
- if(pTmpVert!=NULL)
+ if (pTmpVert!=NULL)
{
- for(e=0; e<iEntries; e++)
+ for (e=0; e<iEntries; e++)
{
int i = pTable[e];
const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
@@ -573,7 +573,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM
MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
}
- if(pTmpVert!=NULL) { free(pTmpVert); }
+ if (pTmpVert!=NULL) { free(pTmpVert); }
free(piHashTable);
free(piHashCount);
free(piHashOffsets);
@@ -585,11 +585,11 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
int c=0, l=0, channel=0;
float fvMin[3], fvMax[3];
float dx=0, dy=0, dz=0, fSep=0;
- for(c=0; c<3; c++)
+ for (c=0; c<3; c++)
{ fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; }
- for(l=(iL_in+1); l<=iR_in; l++)
- for(c=0; c<3; c++)
- if(fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
+ for (l=(iL_in+1); l<=iR_in; l++)
+ for (c=0; c<3; c++)
+ if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
else if(fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
dx = fvMax[0]-fvMin[0];
@@ -597,17 +597,17 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
dz = fvMax[2]-fvMin[2];
channel = 0;
- if(dy>dx && dy>dz) channel=1;
+ if (dy>dx && dy>dz) channel=1;
else if(dz>dx) channel=2;
fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
// terminate recursion when the separation/average value
// is no longer strictly between fMin and fMax values.
- if(fSep>=fvMax[channel] || fSep<=fvMin[channel])
+ if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
{
// complete the weld
- for(l=iL_in; l<=iR_in; l++)
+ for (l=iL_in; l<=iR_in; l++)
{
int i = pTmpVert[l].index;
const int index = piTriList_in_and_out[i];
@@ -617,7 +617,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
tbool bNotFound = TTRUE;
int l2=iL_in, i2rec=-1;
- while(l2<l && bNotFound)
+ while (l2<l && bNotFound)
{
const int i2 = pTmpVert[l2].index;
const int index2 = piTriList_in_and_out[i2];
@@ -627,7 +627,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
i2rec=i2;
//if(vP==vP2 && vN==vN2 && vT==vT2)
- if(vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
+ if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z &&
vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z)
bNotFound = TFALSE;
@@ -636,7 +636,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
}
// merge if previously found
- if(!bNotFound)
+ if (!bNotFound)
piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
}
}
@@ -646,24 +646,24 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
assert((iR_in-iL_in)>0); // at least 2 entries
// separate (by fSep) all points between iL_in and iR_in in pTmpVert[]
- while(iL < iR)
+ while (iL < iR)
{
tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE;
- while((!bReadyLeftSwap) && iL<iR)
+ while ((!bReadyLeftSwap) && iL<iR)
{
assert(iL>=iL_in && iL<=iR_in);
bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep);
- if(!bReadyLeftSwap) ++iL;
+ if (!bReadyLeftSwap) ++iL;
}
- while((!bReadyRightSwap) && iL<iR)
+ while ((!bReadyRightSwap) && iL<iR)
{
assert(iR>=iL_in && iR<=iR_in);
bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
- if(!bReadyRightSwap) --iR;
+ if (!bReadyRightSwap) --iR;
}
assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) );
- if(bReadyLeftSwap && bReadyRightSwap)
+ if (bReadyLeftSwap && bReadyRightSwap)
{
const STmpVert sTmp = pTmpVert[iL];
assert(iL<iR);
@@ -674,17 +674,17 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons
}
assert(iL==(iR+1) || (iL==iR));
- if(iL==iR)
+ if (iL==iR)
{
const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
- if(bReadyRightSwap) ++iL;
+ if (bReadyRightSwap) ++iL;
else --iR;
}
// only need to weld when there is more than 1 instance of the (x,y,z)
- if(iL_in < iR)
+ if (iL_in < iR)
MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR); // weld all left of fSep
- if(iL < iR_in)
+ if (iL < iR_in)
MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in); // weld all right of (or equal to) fSep
}
}
@@ -693,7 +693,7 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext
{
// this can be optimized further using a tree structure or more hashing.
int e=0;
- for(e=0; e<iEntries; e++)
+ for (e=0; e<iEntries; e++)
{
int i = pTable[e];
const int index = piTriList_in_and_out[i];
@@ -703,7 +703,7 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext
tbool bNotFound = TTRUE;
int e2=0, i2rec=-1;
- while(e2<e && bNotFound)
+ while (e2<e && bNotFound)
{
const int i2 = pTable[e2];
const int index2 = piTriList_in_and_out[i2];
@@ -712,14 +712,14 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext
const SVec3 vT2 = GetTexCoord(pContext, index2);
i2rec = i2;
- if(veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+ if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
bNotFound = TFALSE;
else
++e2;
}
// merge if previously found
- if(!bNotFound)
+ if (!bNotFound)
piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
}
}
@@ -727,9 +727,9 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext
static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
{
int iNumUniqueVerts = 0, t=0, i=0;
- for(t=0; t<iNrTrianglesIn; t++)
+ for (t=0; t<iNrTrianglesIn; t++)
{
- for(i=0; i<3; i++)
+ for (i=0; i<3; i++)
{
const int offs = t*3 + i;
const int index = piTriList_in_and_out[offs];
@@ -740,27 +740,27 @@ static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], cons
tbool bFound = TFALSE;
int t2=0, index2rec=-1;
- while(!bFound && t2<=t)
+ while (!bFound && t2<=t)
{
int j=0;
- while(!bFound && j<3)
+ while (!bFound && j<3)
{
const int index2 = piTriList_in_and_out[t2*3 + j];
const SVec3 vP2 = GetPosition(pContext, index2);
const SVec3 vN2 = GetNormal(pContext, index2);
const SVec3 vT2 = GetTexCoord(pContext, index2);
- if(veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+ if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
bFound = TTRUE;
else
++j;
}
- if(!bFound) ++t2;
+ if (!bFound) ++t2;
}
assert(bFound);
// if we found our own
- if(index2rec == index) { ++iNumUniqueVerts; }
+ if (index2rec == index) { ++iNumUniqueVerts; }
piTriList_in_and_out[offs] = index2rec;
}
@@ -771,15 +771,15 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_
{
int iTSpacesOffs = 0, f=0, t=0;
int iDstTriIndex = 0;
- for(f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++)
+ for (f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++)
{
const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
- if(verts!=3 && verts!=4) continue;
+ if (verts!=3 && verts!=4) continue;
pTriInfos[iDstTriIndex].iOrgFaceNumber = f;
pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs;
- if(verts==3)
+ if (verts==3)
{
unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num;
pVerts[0]=0; pVerts[1]=1; pVerts[2]=2;
@@ -810,7 +810,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_
const float distSQ_02 = LengthSquared(vsub(T2,T0));
const float distSQ_13 = LengthSquared(vsub(T3,T1));
tbool bQuadDiagIs_02;
- if(distSQ_02<distSQ_13)
+ if (distSQ_02<distSQ_13)
bQuadDiagIs_02 = TTRUE;
else if(distSQ_13<distSQ_02)
bQuadDiagIs_02 = TFALSE;
@@ -826,7 +826,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_
bQuadDiagIs_02 = distSQ_13<distSQ_02 ? TFALSE : TTRUE;
}
- if(bQuadDiagIs_02)
+ if (bQuadDiagIs_02)
{
{
unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
@@ -871,7 +871,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_
assert(iDstTriIndex<=iNrTrianglesIn);
}
- for(t=0; t<iNrTrianglesIn; t++)
+ for (t=0; t<iNrTrianglesIn; t++)
pTriInfos[t].iFlag = 0;
// return total amount of tspaces
@@ -946,8 +946,8 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
// pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function.
// generate neighbor info list
- for(f=0; f<iNrTrianglesIn; f++)
- for(i=0; i<3; i++)
+ for (f=0; f<iNrTrianglesIn; f++)
+ for (i=0; i<3; i++)
{
pTriInfos[f].FaceNeighbors[i] = -1;
pTriInfos[f].AssignedGroup[i] = NULL;
@@ -962,7 +962,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
}
// evaluate first order derivatives
- for(f=0; f<iNrTrianglesIn; f++)
+ for (f=0; f<iNrTrianglesIn; f++)
{
// initial values
const SVec3 v1 = GetPosition(pContext, piTriListIn[f*3+0]);
@@ -986,47 +986,47 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
pTriInfos[f].iFlag |= (fSignedAreaSTx2>0 ? ORIENT_PRESERVING : 0);
- if( NotZero(fSignedAreaSTx2) )
+ if ( NotZero(fSignedAreaSTx2) )
{
const float fAbsArea = fabsf(fSignedAreaSTx2);
const float fLenOs = Length(vOs);
const float fLenOt = Length(vOt);
const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f;
- if( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs);
- if( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt);
+ if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs);
+ if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt);
// evaluate magnitudes prior to normalization of vOs and vOt
pTriInfos[f].fMagS = fLenOs / fAbsArea;
pTriInfos[f].fMagT = fLenOt / fAbsArea;
// if this is a good triangle
- if( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT))
+ if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT))
pTriInfos[f].iFlag &= (~GROUP_WITH_ANY);
}
}
// force otherwise healthy quads to a fixed orientation
- while(t<(iNrTrianglesIn-1))
+ while (t<(iNrTrianglesIn-1))
{
const int iFO_a = pTriInfos[t].iOrgFaceNumber;
const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
- if(iFO_a==iFO_b) // this is a quad
+ if (iFO_a==iFO_b) // this is a quad
{
const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
// bad triangles should already have been removed by
// DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false
- if((bIsDeg_a||bIsDeg_b)==TFALSE)
+ if ((bIsDeg_a||bIsDeg_b)==TFALSE)
{
const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
// if this happens the quad has extremely bad mapping!!
- if(bOrientA!=bOrientB)
+ if (bOrientA!=bOrientB)
{
//printf("found quad with bad mapping\n");
tbool bChooseOrientFirstTri = TFALSE;
- if((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE;
+ if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE;
else if( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) )
bChooseOrientFirstTri = TTRUE;
@@ -1048,7 +1048,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi
// match up edge pairs
{
SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3);
- if(pEdges==NULL)
+ if (pEdges==NULL)
BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn);
else
{
@@ -1070,12 +1070,12 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT
const int iNrMaxGroups = iNrTrianglesIn*3;
int iNrActiveGroups = 0;
int iOffset = 0, f=0, i=0;
- for(f=0; f<iNrTrianglesIn; f++)
+ for (f=0; f<iNrTrianglesIn; f++)
{
- for(i=0; i<3; i++)
+ for (i=0; i<3; i++)
{
// if not assigned to a group
- if((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL)
+ if ((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL)
{
tbool bOrPre;
int neigh_indexL, neigh_indexR;
@@ -1092,7 +1092,7 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT
bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
neigh_indexL = pTriInfos[f].FaceNeighbors[i];
neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2];
- if(neigh_indexL>=0) // neighbor
+ if (neigh_indexL>=0) // neighbor
{
const tbool bAnswer =
AssignRecur(piTriListIn, pTriInfos, neigh_indexL,
@@ -1102,7 +1102,7 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT
const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
assert(bAnswer || bDiff);
}
- if(neigh_indexR>=0) // neighbor
+ if (neigh_indexR>=0) // neighbor
{
const tbool bAnswer =
AssignRecur(piTriListIn, pTriInfos, neigh_indexR,
@@ -1141,20 +1141,20 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
const int iVertRep = pGroup->iVertexRepresentitive;
const int * pVerts = &piTriListIn[3*iMyTriIndex+0];
int i=-1;
- if(pVerts[0]==iVertRep) i=0;
+ if (pVerts[0]==iVertRep) i=0;
else if(pVerts[1]==iVertRep) i=1;
else if(pVerts[2]==iVertRep) i=2;
assert(i>=0 && i<3);
// early out
- if(pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE;
+ if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE;
else if(pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE;
- if((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0)
+ if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0)
{
// first to group with a group-with-anything triangle
// determines it's orientation.
// This is the only existing order dependency in the code!!
- if( pMyTriInfo->AssignedGroup[0] == NULL &&
+ if ( pMyTriInfo->AssignedGroup[0] == NULL &&
pMyTriInfo->AssignedGroup[1] == NULL &&
pMyTriInfo->AssignedGroup[2] == NULL )
{
@@ -1164,7 +1164,7 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
}
{
const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
- if(bOrient != pGroup->bOrientPreservering) return TFALSE;
+ if (bOrient != pGroup->bOrientPreservering) return TFALSE;
}
AddTriToGroup(pGroup, iMyTriIndex);
@@ -1173,9 +1173,9 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
{
const int neigh_indexL = pMyTriInfo->FaceNeighbors[i];
const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2];
- if(neigh_indexL>=0)
+ if (neigh_indexL>=0)
AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup);
- if(neigh_indexR>=0)
+ if (neigh_indexR>=0)
AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup);
}
@@ -1199,39 +1199,39 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
SSubGroup * pUniSubGroups = NULL;
int * pTmpMembers = NULL;
int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0;
- for(g=0; g<iNrActiveGroups; g++)
- if(iMaxNrFaces < pGroups[g].iNrFaces)
+ for (g=0; g<iNrActiveGroups; g++)
+ if (iMaxNrFaces < pGroups[g].iNrFaces)
iMaxNrFaces = pGroups[g].iNrFaces;
- if(iMaxNrFaces == 0) return TTRUE;
+ if (iMaxNrFaces == 0) return TTRUE;
// make initial allocations
pSubGroupTspace = (STSpace *) malloc(sizeof(STSpace)*iMaxNrFaces);
pUniSubGroups = (SSubGroup *) malloc(sizeof(SSubGroup)*iMaxNrFaces);
pTmpMembers = (int *) malloc(sizeof(int)*iMaxNrFaces);
- if(pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL)
+ if (pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL)
{
- if(pSubGroupTspace!=NULL) free(pSubGroupTspace);
- if(pUniSubGroups!=NULL) free(pUniSubGroups);
- if(pTmpMembers!=NULL) free(pTmpMembers);
+ if (pSubGroupTspace!=NULL) free(pSubGroupTspace);
+ if (pUniSubGroups!=NULL) free(pUniSubGroups);
+ if (pTmpMembers!=NULL) free(pTmpMembers);
return TFALSE;
}
iUniqueTspaces = 0;
- for(g=0; g<iNrActiveGroups; g++)
+ for (g=0; g<iNrActiveGroups; g++)
{
const SGroup * pGroup = &pGroups[g];
int iUniqueSubGroups = 0, s=0;
- for(i=0; i<pGroup->iNrFaces; i++) // triangles
+ for (i=0; i<pGroup->iNrFaces; i++) // triangles
{
const int f = pGroup->pFaceIndices[i]; // triangle number
int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0;
SSubGroup tmp_group;
tbool bFound;
SVec3 n, vOs, vOt;
- if(pTriInfos[f].AssignedGroup[0]==pGroup) index=0;
+ if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0;
else if(pTriInfos[f].AssignedGroup[1]==pGroup) index=1;
else if(pTriInfos[f].AssignedGroup[2]==pGroup) index=2;
assert(index>=0 && index<3);
@@ -1245,14 +1245,14 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
// project
vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
- if( VNotZero(vOs) ) vOs = Normalize(vOs);
- if( VNotZero(vOt) ) vOt = Normalize(vOt);
+ if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+ if ( VNotZero(vOt) ) vOt = Normalize(vOt);
// original face number
iOF_1 = pTriInfos[f].iOrgFaceNumber;
iMembers = 0;
- for(j=0; j<pGroup->iNrFaces; j++)
+ for (j=0; j<pGroup->iNrFaces; j++)
{
const int t = pGroup->pFaceIndices[j]; // triangle number
const int iOF_2 = pTriInfos[t].iOrgFaceNumber;
@@ -1260,8 +1260,8 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
// project
SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n));
SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n));
- if( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
- if( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
+ if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
+ if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
{
const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE;
@@ -1272,7 +1272,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
const float fCosT = vdot(vOt,vOt2);
assert(f!=t || bSameOrgFace); // sanity check
- if(bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos))
+ if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos))
pTmpMembers[iMembers++] = t;
}
}
@@ -1280,7 +1280,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
// sort pTmpMembers
tmp_group.iNrFaces = iMembers;
tmp_group.pTriMembers = pTmpMembers;
- if(iMembers>1)
+ if (iMembers>1)
{
unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed?
QuickSort(pTmpMembers, 0, iMembers-1, uSeed);
@@ -1289,10 +1289,10 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
// look for an existing match
bFound = TFALSE;
l=0;
- while(l<iUniqueSubGroups && !bFound)
+ while (l<iUniqueSubGroups && !bFound)
{
bFound = CompareSubGroups(&tmp_group, &pUniSubGroups[l]);
- if(!bFound) ++l;
+ if (!bFound) ++l;
}
// assign tangent space index
@@ -1300,15 +1300,15 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
//piTempTangIndices[f*3+index] = iUniqueTspaces+l;
// if no match was found we allocate a new subgroup
- if(!bFound)
+ if (!bFound)
{
// insert new subgroup
int * pIndices = (int *) malloc(sizeof(int)*iMembers);
- if(pIndices==NULL)
+ if (pIndices==NULL)
{
// clean up and return false
int s=0;
- for(s=0; s<iUniqueSubGroups; s++)
+ for (s=0; s<iUniqueSubGroups; s++)
free(pUniSubGroups[s].pTriMembers);
free(pUniSubGroups);
free(pTmpMembers);
@@ -1330,7 +1330,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
STSpace * pTS_out = &psTspace[iOffs+iVert];
assert(pTS_out->iCounter<2);
assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering);
- if(pTS_out->iCounter==1)
+ if (pTS_out->iCounter==1)
{
*pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]);
pTS_out->iCounter = 2; // update counter
@@ -1347,7 +1347,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con
}
// clean up and offset iUniqueTspaces
- for(s=0; s<iUniqueSubGroups; s++)
+ for (s=0; s<iUniqueSubGroups; s++)
free(pUniSubGroups[s].pTriMembers);
iUniqueTspaces += iUniqueSubGroups;
}
@@ -1370,17 +1370,17 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
res.vOt.x=0.0f; res.vOt.y=0.0f; res.vOt.z=0.0f;
res.fMagS = 0; res.fMagT = 0;
- for(face=0; face<iFaces; face++)
+ for (face=0; face<iFaces; face++)
{
const int f = face_indices[face];
// only valid triangles get to add their contribution
- if( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 )
+ if ( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 )
{
SVec3 n, vOs, vOt, p0, p1, p2, v1, v2;
float fCos, fAngle, fMagS, fMagT;
int i=-1, index=-1, i0=-1, i1=-1, i2=-1;
- if(piTriListIn[3*f+0]==iVertexRepresentitive) i=0;
+ if (piTriListIn[3*f+0]==iVertexRepresentitive) i=0;
else if(piTriListIn[3*f+1]==iVertexRepresentitive) i=1;
else if(piTriListIn[3*f+2]==iVertexRepresentitive) i=2;
assert(i>=0 && i<3);
@@ -1390,8 +1390,8 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
n = GetNormal(pContext, index);
vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
- if( VNotZero(vOs) ) vOs = Normalize(vOs);
- if( VNotZero(vOt) ) vOt = Normalize(vOt);
+ if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+ if ( VNotZero(vOt) ) vOt = Normalize(vOt);
i2 = piTriListIn[3*f + (i<2?(i+1):0)];
i1 = piTriListIn[3*f + i];
@@ -1423,9 +1423,9 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL
}
// normalize
- if( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs);
- if( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt);
- if(fAngleSum>0)
+ if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs);
+ if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt);
+ if (fAngleSum>0)
{
res.fMagS /= fAngleSum;
res.fMagT /= fAngleSum;
@@ -1438,11 +1438,11 @@ static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2)
{
tbool bStillSame=TTRUE;
int i=0;
- if(pg1->iNrFaces!=pg2->iNrFaces) return TFALSE;
- while(i<pg1->iNrFaces && bStillSame)
+ if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE;
+ while (i<pg1->iNrFaces && bStillSame)
{
bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE;
- if(bStillSame) ++i;
+ if (bStillSame) ++i;
}
return bStillSame;
}
@@ -1467,12 +1467,12 @@ static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSee
do
{
- while(pSortBuffer[iL] < iMid)
+ while (pSortBuffer[iL] < iMid)
++iL;
- while(pSortBuffer[iR] > iMid)
+ while (pSortBuffer[iR] > iMid)
--iR;
- if(iL <= iR)
+ if (iL <= iR)
{
iTmp = pSortBuffer[iL];
pSortBuffer[iL] = pSortBuffer[iR];
@@ -1480,11 +1480,11 @@ static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSee
++iL; --iR;
}
}
- while(iL <= iR);
+ while (iL <= iR);
- if(iLeft < iR)
+ if (iLeft < iR)
QuickSort(pSortBuffer, iLeft, iR, uSeed);
- if(iL < iRight)
+ if (iL < iRight)
QuickSort(pSortBuffer, iL, iRight, uSeed);
}
@@ -1499,8 +1499,8 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
// build array of edges
unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed?
int iEntries=0, iCurStartIndex=-1, f=0, i=0;
- for(f=0; f<iNrTrianglesIn; f++)
- for(i=0; i<3; i++)
+ for (f=0; f<iNrTrianglesIn; f++)
+ for (i=0; i<3; i++)
{
const int i0 = piTriListIn[f*3+i];
const int i1 = piTriListIn[f*3+(i<2?(i+1):0)];
@@ -1517,9 +1517,9 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
// with i0 as msb in the quicksort call above.
iEntries = iNrTrianglesIn*3;
iCurStartIndex = 0;
- for(i=1; i<iEntries; i++)
+ for (i=1; i<iEntries; i++)
{
- if(pEdges[iCurStartIndex].i0 != pEdges[i].i0)
+ if (pEdges[iCurStartIndex].i0 != pEdges[i].i0)
{
const int iL = iCurStartIndex;
const int iR = i-1;
@@ -1533,9 +1533,9 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
// this step is to remain compliant with BuildNeighborsSlow() when
// more than 2 triangles use the same edge (such as a butterfly topology).
iCurStartIndex = 0;
- for(i=1; i<iEntries; i++)
+ for (i=1; i<iEntries; i++)
{
- if(pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1)
+ if (pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1)
{
const int iL = iCurStartIndex;
const int iR = i-1;
@@ -1546,7 +1546,7 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
}
// pair up, adjacent triangles
- for(i=0; i<iEntries; i++)
+ for (i=0; i<iEntries; i++)
{
const int i0=pEdges[i].i0;
const int i1=pEdges[i].i1;
@@ -1558,12 +1558,12 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
GetEdge(&i0_A, &i1_A, &edgenum_A, &piTriListIn[f*3], i0, i1); // resolve index ordering and edge_num
bUnassigned_A = pTriInfos[f].FaceNeighbors[edgenum_A] == -1 ? TTRUE : TFALSE;
- if(bUnassigned_A)
+ if (bUnassigned_A)
{
// get true index ordering
int j=i+1, t;
tbool bNotFound = TTRUE;
- while(j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound)
+ while (j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound)
{
tbool bUnassigned_B;
int i0_B, i1_B;
@@ -1572,13 +1572,13 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
GetEdge(&i1_B, &i0_B, &edgenum_B, &piTriListIn[t*3], pEdges[j].i0, pEdges[j].i1); // resolve index ordering and edge_num
//assert(!(i0_A==i1_B && i1_A==i0_B));
bUnassigned_B = pTriInfos[t].FaceNeighbors[edgenum_B]==-1 ? TTRUE : TFALSE;
- if(i0_A==i0_B && i1_A==i1_B && bUnassigned_B)
+ if (i0_A==i0_B && i1_A==i1_B && bUnassigned_B)
bNotFound = TFALSE;
else
++j;
}
- if(!bNotFound)
+ if (!bNotFound)
{
int t = pEdges[j].f;
pTriInfos[f].FaceNeighbors[edgenum_A] = t;
@@ -1592,12 +1592,12 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p
static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn)
{
int f=0, i=0;
- for(f=0; f<iNrTrianglesIn; f++)
+ for (f=0; f<iNrTrianglesIn; f++)
{
- for(i=0; i<3; i++)
+ for (i=0; i<3; i++)
{
// if unassigned
- if(pTriInfos[f].FaceNeighbors[i] == -1)
+ if (pTriInfos[f].FaceNeighbors[i] == -1)
{
const int i0_A = piTriListIn[f*3+i];
const int i1_A = piTriListIn[f*3+(i<2?(i+1):0)];
@@ -1605,29 +1605,29 @@ static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], co
// search for a neighbor
tbool bFound = TFALSE;
int t=0, j=0;
- while(!bFound && t<iNrTrianglesIn)
+ while (!bFound && t<iNrTrianglesIn)
{
- if(t!=f)
+ if (t!=f)
{
j=0;
- while(!bFound && j<3)
+ while (!bFound && j<3)
{
// in rev order
const int i1_B = piTriListIn[t*3+j];
const int i0_B = piTriListIn[t*3+(j<2?(j+1):0)];
//assert(!(i0_A==i1_B && i1_A==i0_B));
- if(i0_A==i0_B && i1_A==i1_B)
+ if (i0_A==i0_B && i1_A==i1_B)
bFound = TTRUE;
else
++j;
}
}
- if(!bFound) ++t;
+ if (!bFound) ++t;
}
// assign neighbors
- if(bFound)
+ if (bFound)
{
pTriInfos[f].FaceNeighbors[i] = t;
//assert(pTriInfos[t].FaceNeighbors[j]==-1);
@@ -1646,10 +1646,10 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int
// early out
SEdge sTmp;
const int iElems = iRight-iLeft+1;
- if(iElems<2) return;
+ if (iElems<2) return;
else if(iElems==2)
{
- if(pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel])
+ if (pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel])
{
sTmp = pSortBuffer[iLeft];
pSortBuffer[iLeft] = pSortBuffer[iRight];
@@ -1673,12 +1673,12 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int
do
{
- while(pSortBuffer[iL].array[channel] < iMid)
+ while (pSortBuffer[iL].array[channel] < iMid)
++iL;
- while(pSortBuffer[iR].array[channel] > iMid)
+ while (pSortBuffer[iR].array[channel] > iMid)
--iR;
- if(iL <= iR)
+ if (iL <= iR)
{
sTmp = pSortBuffer[iL];
pSortBuffer[iL] = pSortBuffer[iR];
@@ -1686,11 +1686,11 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int
++iL; --iR;
}
}
- while(iL <= iR);
+ while (iL <= iR);
- if(iLeft < iR)
+ if (iLeft < iR)
QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed);
- if(iL < iRight)
+ if (iL < iRight)
QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed);
}
@@ -1700,10 +1700,10 @@ static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int ind
*edgenum_out = -1;
// test if first index is on the edge
- if(indices[0]==i0_in || indices[0]==i1_in)
+ if (indices[0]==i0_in || indices[0]==i1_in)
{
// test if second index is on the edge
- if(indices[1]==i0_in || indices[1]==i1_in)
+ if (indices[1]==i0_in || indices[1]==i1_in)
{
edgenum_out[0]=0; // first edge
i0_out[0]=indices[0];
@@ -1736,15 +1736,15 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i
// locate quads with only one good triangle
int t=0;
- while(t<(iTotTris-1))
+ while (t<(iTotTris-1))
{
const int iFO_a = pTriInfos[t].iOrgFaceNumber;
const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
- if(iFO_a==iFO_b) // this is a quad
+ if (iFO_a==iFO_b) // this is a quad
{
const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
- if((bIsDeg_a^bIsDeg_b)!=0)
+ if ((bIsDeg_a^bIsDeg_b)!=0)
{
pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI;
pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI;
@@ -1760,12 +1760,12 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i
iNextGoodTriangleSearchIndex = 1;
t=0;
bStillFindingGoodOnes = TTRUE;
- while(t<iNrTrianglesIn && bStillFindingGoodOnes)
+ while (t<iNrTrianglesIn && bStillFindingGoodOnes)
{
const tbool bIsGood = (pTriInfos[t].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
- if(bIsGood)
+ if (bIsGood)
{
- if(iNextGoodTriangleSearchIndex < (t+2))
+ if (iNextGoodTriangleSearchIndex < (t+2))
iNextGoodTriangleSearchIndex = t+2;
}
else
@@ -1773,10 +1773,10 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i
int t0, t1;
// search for the first good triangle.
tbool bJustADegenerate = TTRUE;
- while(bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris)
+ while (bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris)
{
const tbool bIsGood = (pTriInfos[iNextGoodTriangleSearchIndex].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
- if(bIsGood) bJustADegenerate=TFALSE;
+ if (bIsGood) bJustADegenerate=TFALSE;
else ++iNextGoodTriangleSearchIndex;
}
@@ -1786,10 +1786,10 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i
assert(iNextGoodTriangleSearchIndex > (t+1));
// swap triangle t0 and t1
- if(!bJustADegenerate)
+ if (!bJustADegenerate)
{
int i=0;
- for(i=0; i<3; i++)
+ for (i=0; i<3; i++)
{
const int index = piTriList_out[t0*3+i];
piTriList_out[t0*3+i] = piTriList_out[t1*3+i];
@@ -1805,7 +1805,7 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i
bStillFindingGoodOnes = TFALSE; // this is not supposed to happen
}
- if(bStillFindingGoodOnes) ++t;
+ if (bStillFindingGoodOnes) ++t;
}
assert(bStillFindingGoodOnes); // code will still work.
@@ -1817,28 +1817,28 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis
int t=0, i=0;
// deal with degenerate triangles
// punishment for degenerate triangles is O(N^2)
- for(t=iNrTrianglesIn; t<iTotTris; t++)
+ for (t=iNrTrianglesIn; t<iTotTris; t++)
{
// degenerate triangles on a quad with one good triangle are skipped
// here but processed in the next loop
const tbool bSkip = (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ? TTRUE : TFALSE;
- if(!bSkip)
+ if (!bSkip)
{
- for(i=0; i<3; i++)
+ for (i=0; i<3; i++)
{
const int index1 = piTriListIn[t*3+i];
// search through the good triangles
tbool bNotFound = TTRUE;
int j=0;
- while(bNotFound && j<(3*iNrTrianglesIn))
+ while (bNotFound && j<(3*iNrTrianglesIn))
{
const int index2 = piTriListIn[j];
- if(index1==index2) bNotFound=TFALSE;
+ if (index1==index2) bNotFound=TFALSE;
else ++j;
}
- if(!bNotFound)
+ if (!bNotFound)
{
const int iTri = j/3;
const int iVert = j%3;
@@ -1855,11 +1855,11 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis
}
// deal with degenerate quads with one good triangle
- for(t=0; t<iNrTrianglesIn; t++)
+ for (t=0; t<iNrTrianglesIn; t++)
{
// this triangle belongs to a quad where the
// other triangle is degenerate
- if( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 )
+ if ( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 )
{
SVec3 vDstP;
int iOrgF=-1, i=0;
@@ -1867,7 +1867,7 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis
unsigned char * pV = pTriInfos[t].vert_num;
int iFlag = (1<<pV[0]) | (1<<pV[1]) | (1<<pV[2]);
int iMissingIndex = 0;
- if((iFlag&2)==0) iMissingIndex=1;
+ if ((iFlag&2)==0) iMissingIndex=1;
else if((iFlag&4)==0) iMissingIndex=2;
else if((iFlag&8)==0) iMissingIndex=3;
@@ -1875,11 +1875,11 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis
vDstP = GetPosition(pContext, MakeIndex(iOrgF, iMissingIndex));
bNotFound = TTRUE;
i=0;
- while(bNotFound && i<3)
+ while (bNotFound && i<3)
{
const int iVert = pV[i];
const SVec3 vSrcP = GetPosition(pContext, MakeIndex(iOrgF, iVert));
- if(veq(vSrcP, vDstP)==TTRUE)
+ if (veq(vSrcP, vDstP)==TTRUE)
{
const int iOffs = pTriInfos[t].iTSpacesOffs;
psTspace[iOffs+iMissingIndex] = psTspace[iOffs+iVert];
diff --git a/intern/smoke/extern/smoke_API.h b/intern/smoke/extern/smoke_API.h
index 9d5dfd98823..a0eb1bf38e0 100644
--- a/intern/smoke/extern/smoke_API.h
+++ b/intern/smoke/extern/smoke_API.h
@@ -41,11 +41,11 @@ struct FLUID_3D;
void smoke_export(struct FLUID_3D *fluid, float *dt, float *dx, float **dens, float **densold, float **heat, float **heatold, float **vx, float **vy, float **vz, float **vxold, float **vyold, float **vzold, unsigned char **obstacles);
// low res
-struct FLUID_3D *smoke_init(int *res, float *p0);
+struct FLUID_3D *smoke_init(int *res, float *p0, float dtdef);
void smoke_free(struct FLUID_3D *fluid);
void smoke_initBlenderRNA(struct FLUID_3D *fluid, float *alpha, float *beta, float *dt_factor, float *vorticity, int *border_colli);
-void smoke_step(struct FLUID_3D *fluid, size_t framenr, float fps);
+void smoke_step(struct FLUID_3D *fluid, float dtSubdiv);
float *smoke_get_density(struct FLUID_3D *fluid);
float *smoke_get_heat(struct FLUID_3D *fluid);
@@ -53,6 +53,9 @@ float *smoke_get_velocity_x(struct FLUID_3D *fluid);
float *smoke_get_velocity_y(struct FLUID_3D *fluid);
float *smoke_get_velocity_z(struct FLUID_3D *fluid);
+/* Moving obstacle velocity provided by blender */
+void smoke_get_ob_velocity(struct FLUID_3D *fluid, float **x, float **y, float **z);
+
float *smoke_get_force_x(struct FLUID_3D *fluid);
float *smoke_get_force_y(struct FLUID_3D *fluid);
float *smoke_get_force_z(struct FLUID_3D *fluid);
diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp
index 9f036cc6d2f..04971f898e9 100644
--- a/intern/smoke/intern/FLUID_3D.cpp
+++ b/intern/smoke/intern/FLUID_3D.cpp
@@ -34,6 +34,8 @@
#include "SPHERE.h"
#include <zlib.h>
+#include "float.h"
+
#if PARALLEL==1
#include <omp.h>
#endif // PARALLEL
@@ -42,11 +44,11 @@
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
-FLUID_3D::FLUID_3D(int *res, float *p0) :
+FLUID_3D::FLUID_3D(int *res, float *p0, float dtdef) :
_xRes(res[0]), _yRes(res[1]), _zRes(res[2]), _res(0.0f)
{
// set simulation consts
- _dt = DT_DEFAULT; // just in case. set in step from a RNA factor
+ _dt = dtdef; // just in case. set in step from a RNA factor
// start point of array
_p0[0] = p0[0];
@@ -81,6 +83,9 @@ FLUID_3D::FLUID_3D(int *res, float *p0) :
_xVelocity = new float[_totalCells];
_yVelocity = new float[_totalCells];
_zVelocity = new float[_totalCells];
+ _xVelocityOb = new float[_totalCells];
+ _yVelocityOb = new float[_totalCells];
+ _zVelocityOb = new float[_totalCells];
_xVelocityOld = new float[_totalCells];
_yVelocityOld = new float[_totalCells];
_zVelocityOld = new float[_totalCells];
@@ -111,6 +116,9 @@ FLUID_3D::FLUID_3D(int *res, float *p0) :
_xVelocity[x] = 0.0f;
_yVelocity[x] = 0.0f;
_zVelocity[x] = 0.0f;
+ _xVelocityOb[x] = 0.0f;
+ _yVelocityOb[x] = 0.0f;
+ _zVelocityOb[x] = 0.0f;
_xVelocityOld[x] = 0.0f;
_yVelocityOld[x] = 0.0f;
_zVelocityOld[x] = 0.0f;
@@ -131,9 +139,15 @@ FLUID_3D::FLUID_3D(int *res, float *p0) :
_colloPrev = 1; // default value
+ setBorderObstacles(); // walls
+
+}
+void FLUID_3D::setBorderObstacles()
+{
+
// set side obstacles
- int index;
+ unsigned int index;
for (int y = 0; y < _yRes; y++)
for (int x = 0; x < _xRes; x++)
{
@@ -169,7 +183,6 @@ FLUID_3D::FLUID_3D(int *res, float *p0) :
index += _xRes - 1;
if(_domainBcRight==1) _obstacles[index] = 1;
}
-
}
FLUID_3D::~FLUID_3D()
@@ -177,6 +190,9 @@ FLUID_3D::~FLUID_3D()
if (_xVelocity) delete[] _xVelocity;
if (_yVelocity) delete[] _yVelocity;
if (_zVelocity) delete[] _zVelocity;
+ if (_xVelocityOb) delete[] _xVelocityOb;
+ if (_yVelocityOb) delete[] _yVelocityOb;
+ if (_zVelocityOb) delete[] _zVelocityOb;
if (_xVelocityOld) delete[] _xVelocityOld;
if (_yVelocityOld) delete[] _yVelocityOld;
if (_zVelocityOld) delete[] _zVelocityOld;
@@ -214,10 +230,18 @@ void FLUID_3D::initBlenderRNA(float *alpha, float *beta, float *dt_factor, float
//////////////////////////////////////////////////////////////////////
void FLUID_3D::step(float dt)
{
+#if 0
// If border rules have been changed
if (_colloPrev != *_borderColli) {
+ printf("Border collisions changed\n");
+
+ // DG TODO: Need to check that no animated obstacle flags are overwritten
setBorderCollisions();
}
+#endif
+
+ // DG: TODO for the moment redo border for every timestep since it's been deleted every time by moving obstacles
+ setBorderCollisions();
// set delta time by dt_factor
@@ -786,6 +810,7 @@ void FLUID_3D::project()
memset(_pressure, 0, sizeof(float)*_totalCells);
memset(_divergence, 0, sizeof(float)*_totalCells);
+ // set velocity and pressure inside of obstacles to zero
setObstacleBoundaries(_pressure, 0, _zRes);
// copy out the boundaries
@@ -798,12 +823,49 @@ void FLUID_3D::project()
if(_domainBcTop == 0) setNeumannZ(_zVelocity, _res, 0, _zRes);
else setZeroZ(_zVelocity, _res, 0, _zRes);
+ /*
+ {
+ float maxx = 0, maxy = 0, maxz = 0;
+ for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++)
+ {
+ if(_xVelocity[i] > maxx)
+ maxx = _xVelocity[i];
+ if(_yVelocity[i] > maxy)
+ maxy = _yVelocity[i];
+ if(_zVelocity[i] > maxz)
+ maxz = _zVelocity[i];
+ }
+ printf("Max velx: %f, vely: %f, velz: %f\n", maxx, maxy, maxz);
+ }
+ */
+
+ /*
+ {
+ float maxvalue = 0;
+ for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++)
+ {
+ if(_heat[i] > maxvalue)
+ maxvalue = _heat[i];
+
+ }
+ printf("Max heat: %f\n", maxvalue);
+ }
+ */
+
// calculate divergence
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
+
+ if(_obstacles[index])
+ {
+ _divergence[index] = 0.0f;
+ continue;
+ }
+
+
float xright = _xVelocity[index + 1];
float xleft = _xVelocity[index - 1];
float yup = _yVelocity[index + _xRes];
@@ -811,26 +873,82 @@ void FLUID_3D::project()
float ztop = _zVelocity[index + _slabSize];
float zbottom = _zVelocity[index - _slabSize];
- if(_obstacles[index+1]) xright = - _xVelocity[index];
+ if(_obstacles[index+1]) xright = - _xVelocity[index]; // DG: +=
if(_obstacles[index-1]) xleft = - _xVelocity[index];
if(_obstacles[index+_xRes]) yup = - _yVelocity[index];
if(_obstacles[index-_xRes]) ydown = - _yVelocity[index];
if(_obstacles[index+_slabSize]) ztop = - _zVelocity[index];
if(_obstacles[index-_slabSize]) zbottom = - _zVelocity[index];
+ if(_obstacles[index+1] & 8) xright += _xVelocityOb[index + 1];
+ if(_obstacles[index-1] & 8) xleft += _xVelocityOb[index - 1];
+ if(_obstacles[index+_xRes] & 8) yup += _yVelocityOb[index + _xRes];
+ if(_obstacles[index-_xRes] & 8) ydown += _yVelocityOb[index - _xRes];
+ if(_obstacles[index+_slabSize] & 8) ztop += _zVelocityOb[index + _slabSize];
+ if(_obstacles[index-_slabSize] & 8) zbottom += _zVelocityOb[index - _slabSize];
+
_divergence[index] = -_dx * 0.5f * (
xright - xleft +
yup - ydown +
ztop - zbottom );
- // DG: commenting this helps CG to get a better start, 10-20% speed improvement
- // _pressure[index] = 0.0f;
+ // Pressure is zero anyway since now a local array is used
+ _pressure[index] = 0.0f;
}
+
+
+ /*
+ {
+ float maxvalue = 0;
+ for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++)
+ {
+ if(_divergence[i] > maxvalue)
+ maxvalue = _divergence[i];
+
+ }
+ printf("Max divergence: %f\n", maxvalue);
+ }
+ */
+
copyBorderAll(_pressure, 0, _zRes);
+ /*
+ {
+ float maxvalue = 0;
+ for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++)
+ {
+ if(_pressure[i] > maxvalue)
+ maxvalue = _pressure[i];
+ }
+ printf("Max pressure BEFORE: %f\n", maxvalue);
+ }
+ */
+
// solve Poisson equation
solvePressurePre(_pressure, _divergence, _obstacles);
+ {
+ float maxvalue = 0;
+ for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++)
+ {
+ if(_pressure[i] > maxvalue)
+ maxvalue = _pressure[i];
+
+ /* HACK: Animated collision object sometimes result in a non converging solvePressurePre() */
+ if(_pressure[i] > _dx * _dt)
+ _pressure[i] = _dx * _dt;
+ else if(_pressure[i] < -_dx * _dt)
+ _pressure[i] = -_dx * _dt;
+
+ // if(_obstacle[i] && _pressure[i] != 0.0)
+ // printf("BAD PRESSURE i\n");
+
+ // if(_pressure[i]>1)
+ // printf("index: %d\n", i);
+ }
+ // printf("Max pressure: %f, dx: %f\n", maxvalue, _dx);
+ }
+
setObstaclePressure(_pressure, 0, _zRes);
// project out solution
@@ -848,12 +966,74 @@ void FLUID_3D::project()
}
}
+ setObstacleVelocity(0, _zRes);
+
if (_pressure) delete[] _pressure;
if (_divergence) delete[] _divergence;
}
+//////////////////////////////////////////////////////////////////////
+// calculate the obstacle velocity at boundary
+//////////////////////////////////////////////////////////////////////
+void FLUID_3D::setObstacleVelocity(int zBegin, int zEnd)
+{
+
+ // completely TODO <-- who wrote this and what is here TODO? DG
+ const size_t index_ = _slabSize + _xRes + 1;
+
+ //int vIndex=_slabSize + _xRes + 1;
+
+ int bb=0;
+ int bt=0;
+
+ if (zBegin == 0) {bb = 1;}
+ if (zEnd == _zRes) {bt = 1;}
+ // tag remaining obstacle blocks
+ for (int z = zBegin + bb; z < zEnd - bt; z++)
+ {
+ size_t index = index_ +(z-1)*_slabSize;
+
+ for (int y = 1; y < _yRes - 1; y++, index += 2)
+ {
+ for (int x = 1; x < _xRes - 1; x++, index++)
+ {
+ if (!_obstacles[index])
+ {
+ // if(_obstacles[index+1]) xright = - _xVelocityOb[index];
+ if((_obstacles[index - 1] & 8) && abs(_xVelocityOb[index - 1]) > FLT_EPSILON )
+ {
+ // printf("velocity x!\n");
+ _xVelocity[index] = _xVelocityOb[index - 1];
+ _xVelocity[index - 1] = _xVelocityOb[index - 1];
+ }
+ // if(_obstacles[index+_xRes]) yup = - _yVelocityOb[index];
+ if((_obstacles[index - _xRes] & 8) && abs(_yVelocityOb[index - _xRes]) > FLT_EPSILON)
+ {
+ // printf("velocity y!\n");
+ _yVelocity[index] = _yVelocityOb[index - _xRes];
+ _yVelocity[index - _xRes] = _yVelocityOb[index - _xRes];
+ }
+ // if(_obstacles[index+_slabSize]) ztop = - _zVelocityOb[index];
+ if((_obstacles[index - _slabSize] & 8) && abs(_zVelocityOb[index - _slabSize]) > FLT_EPSILON)
+ {
+ // printf("velocity z!\n");
+ _zVelocity[index] = _zVelocityOb[index - _slabSize];
+ _zVelocity[index - _slabSize] = _zVelocityOb[index - _slabSize];
+ }
+ }
+ else
+ {
+ _density[index] = 0;
+ }
+ //vIndex++;
+ } // x loop
+ //vIndex += 2;
+ } // y loop
+ //vIndex += 2 * _xRes;
+ } // z loop
+}
//////////////////////////////////////////////////////////////////////
// diffuse heat
@@ -892,7 +1072,7 @@ void FLUID_3D::addObstacle(OBSTACLE* obstacle)
void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd)
{
- // compleately TODO
+ // completely TODO <-- who wrote this and what is here TODO? DG
const size_t index_ = _slabSize + _xRes + 1;
@@ -914,7 +1094,7 @@ void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd)
for (int x = 1; x < _xRes - 1; x++, index++)
{
// could do cascade of ifs, but they are a pain
- if (_obstacles[index])
+ if (_obstacles[index] /* && !(_obstacles[index] & 8) DG TODO TEST THIS CONDITION */)
{
const int top = _obstacles[index + _slabSize];
const int bottom= _obstacles[index - _slabSize];
@@ -928,9 +1108,11 @@ void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd)
// const bool fully = (up && down);
//const bool fullx = (left && right);
+ /*
_xVelocity[index] =
_yVelocity[index] =
_zVelocity[index] = 0.0f;
+ */
_pressure[index] = 0.0f;
// average pressure neighbors
@@ -1253,7 +1435,35 @@ void FLUID_3D::advectMacCormackEnd2(int zBegin, int zEnd)
setZeroBorder(_density, res, zBegin, zEnd);
setZeroBorder(_heat, res, zBegin, zEnd);
+#if 0
+ {
+ const size_t index_ = _slabSize + _xRes + 1;
+ int bb=0;
+ int bt=0;
+
+ if (zBegin == 0) {bb = 1;}
+ if (zEnd == _zRes) {bt = 1;}
+
+ for (int z = zBegin + bb; z < zEnd - bt; z++)
+ {
+ size_t index = index_ +(z-1)*_slabSize;
+ for (int y = 1; y < _yRes - 1; y++, index += 2)
+ {
+ for (int x = 1; x < _xRes - 1; x++, index++)
+ {
+ // clean custom velocities from moving obstacles again
+ if (_obstacles[index])
+ {
+ _xVelocity[index] =
+ _yVelocity[index] =
+ _zVelocity[index] = 0.0f;
+ }
+ }
+ }
+ }
+ }
+#endif
/*int begin=zBegin * _slabSize;
int end=begin + (zEnd - zBegin) * _slabSize;
diff --git a/intern/smoke/intern/FLUID_3D.h b/intern/smoke/intern/FLUID_3D.h
index c9e18926fb2..5704cba3ed4 100644
--- a/intern/smoke/intern/FLUID_3D.h
+++ b/intern/smoke/intern/FLUID_3D.h
@@ -39,9 +39,6 @@
// #include "WTURBULENCE.h"
#include "VEC3.h"
-// timestep default value for nice appearance
-#define DT_DEFAULT 0.1f;
-
using namespace std;
using namespace BasicVector;
class WTURBULENCE;
@@ -49,7 +46,7 @@ class WTURBULENCE;
class FLUID_3D
{
public:
- FLUID_3D(int *res, /* int amplify, */ float *p0);
+ FLUID_3D(int *res, /* int amplify, */ float *p0, float dtdef);
FLUID_3D() {};
virtual ~FLUID_3D();
@@ -72,7 +69,7 @@ class FLUID_3D
int yRes() const { return _yRes; };
int zRes() const { return _zRes; };
- public:
+ public:
// dimensions
int _xRes, _yRes, _zRes, _maxRes;
Vec3Int _res;
@@ -89,6 +86,8 @@ class FLUID_3D
void artificialDampingSL(int zBegin, int zEnd);
void artificialDampingExactSL(int pos);
+ void setBorderObstacles();
+
// fields
float* _density;
float* _densityOld;
@@ -97,13 +96,17 @@ class FLUID_3D
float* _xVelocity;
float* _yVelocity;
float* _zVelocity;
+ float* _xVelocityOb;
+ float* _yVelocityOb;
+ float* _zVelocityOb;
float* _xVelocityOld;
float* _yVelocityOld;
float* _zVelocityOld;
float* _xForce;
float* _yForce;
float* _zForce;
- unsigned char* _obstacles;
+ unsigned char* _obstacles; /* only used (usefull) for static obstacles like domain boundaries */
+ unsigned char* _obstaclesAnim;
// Required for proper threading:
float* _xVelocityTemp;
@@ -137,6 +140,8 @@ class FLUID_3D
// have to recalibrate borders if nothing has changed
void setBorderCollisions();
+ void setObstacleVelocity(int zBegin, int zEnd);
+
// WTURBULENCE object, if active
// WTURBULENCE* _wTurbulence;
diff --git a/intern/smoke/intern/OBSTACLE.h b/intern/smoke/intern/OBSTACLE.h
index 61d47b727f0..da8ec6be024 100644
--- a/intern/smoke/intern/OBSTACLE.h
+++ b/intern/smoke/intern/OBSTACLE.h
@@ -27,9 +27,11 @@
#define OBSTACLE_H
enum OBSTACLE_FLAGS {
- EMPTY = 0,
+ EMPTY = 0,
+ /* 1 is used to flag an object cell */
MARCHED = 2,
- RETIRED = 4
+ RETIRED = 4,
+ ANIMATED = 8,
};
class OBSTACLE
diff --git a/intern/smoke/intern/WTURBULENCE.cpp b/intern/smoke/intern/WTURBULENCE.cpp
index cd18cf7b344..83bec466c9f 100644
--- a/intern/smoke/intern/WTURBULENCE.cpp
+++ b/intern/smoke/intern/WTURBULENCE.cpp
@@ -431,8 +431,11 @@ void WTURBULENCE::decomposeEnergy(float *_energy, float *_highFreqEnergy)
// compute velocity from energies and march into obstacles
// for wavelet decomposition
//////////////////////////////////////////////////////////////////////
-void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* zvel, unsigned char *obstacles)
+void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* zvel, unsigned char *origObstacles)
{
+ unsigned char *obstacles = new unsigned char[_totalCellsSm];
+ memcpy(obstacles, origObstacles, sizeof(unsigned char) * _totalCellsSm);
+
// compute everywhere
for (int x = 0; x < _totalCellsSm; x++)
_energy[x] = 0.5f * (xvel[x] * xvel[x] + yvel[x] * yvel[x] + zvel[x] * zvel[x]);
@@ -506,7 +509,9 @@ void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float*
for (int y = 1; y < _yResSm - 1; y++, index += 2)
for (int x = 1; x < _xResSm - 1; x++, index++)
if (obstacles[index])
- obstacles[index] = 1;
+ obstacles[index] = 1; // DG TODO ? animated obstacle flag?
+
+ free(obstacles);
}
//////////////////////////////////////////////////////////////////////////////////////////
diff --git a/intern/smoke/intern/smoke_API.cpp b/intern/smoke/intern/smoke_API.cpp
index a2f3c21bbbf..78f7d35360a 100644
--- a/intern/smoke/intern/smoke_API.cpp
+++ b/intern/smoke/intern/smoke_API.cpp
@@ -19,6 +19,7 @@
* All rights reserved.
*
* Contributor(s): Daniel Genrich
+ * Blender Foundation
*
* ***** END GPL LICENSE BLOCK *****
*/
@@ -36,10 +37,10 @@
#include <math.h>
// y in smoke is z in blender
-extern "C" FLUID_3D *smoke_init(int *res, float *p0)
+extern "C" FLUID_3D *smoke_init(int *res, float *p0, float dtdef)
{
// smoke lib uses y as top-bottom/vertical axis where blender uses z
- FLUID_3D *fluid = new FLUID_3D(res, p0);
+ FLUID_3D *fluid = new FLUID_3D(res, p0, dtdef);
// printf("xres: %d, yres: %d, zres: %d\n", res[0], res[1], res[2]);
@@ -78,41 +79,9 @@ extern "C" size_t smoke_get_index2d(int x, int max_x, int y /*, int max_y, int z
return x + y * max_x;
}
-extern "C" void smoke_step(FLUID_3D *fluid, size_t framenr, float fps)
+extern "C" void smoke_step(FLUID_3D *fluid, float dtSubdiv)
{
- /* stability values copied from wturbulence.cpp */
- const int maxSubSteps = 25;
- const float maxVel = 0.5f; /* TODO: maybe 0.5 is still too high, please confirm! -dg */
-
- float dt = DT_DEFAULT;
- float maxVelMag = 0.0f;
- int totalSubsteps;
- int substep = 0;
- float dtSubdiv;
-
- /* get max velocity and lower the dt value if it is too high */
- size_t size= fluid->_xRes * fluid->_yRes * fluid->_zRes;
-
- for(size_t i = 0; i < size; i++)
- {
- float vtemp = (fluid->_xVelocity[i]*fluid->_xVelocity[i]+fluid->_yVelocity[i]*fluid->_yVelocity[i]+fluid->_zVelocity[i]*fluid->_zVelocity[i]);
- if(vtemp > maxVelMag)
- maxVelMag = vtemp;
- }
-
- /* adapt timestep for different framerates, dt = 0.1 is at 25fps */
- dt *= (25.0f / fps);
-
- maxVelMag = sqrt(maxVelMag) * dt * (*(fluid->_dtFactor));
- totalSubsteps = (int)((maxVelMag / maxVel) + 1.0f); /* always round up */
- totalSubsteps = (totalSubsteps < 1) ? 1 : totalSubsteps;
- totalSubsteps = (totalSubsteps > maxSubSteps) ? maxSubSteps : totalSubsteps;
- dtSubdiv = (float)dt / (float)totalSubsteps;
-
- // printf("totalSubsteps: %d, maxVelMag: %f, dt: %f\n", totalSubsteps, maxVelMag, dt);
-
- for(substep = 0; substep < totalSubsteps; substep++)
- fluid->step(dtSubdiv);
+ fluid->step(dtSubdiv);
}
extern "C" void smoke_turbulence_step(WTURBULENCE *wt, FLUID_3D *fluid)
@@ -307,6 +276,18 @@ extern "C" unsigned char *smoke_get_obstacle(FLUID_3D *fluid)
return fluid->_obstacles;
}
+extern "C" void smoke_get_ob_velocity(struct FLUID_3D *fluid, float **x, float **y, float **z)
+{
+ *x = fluid->_xVelocityOb;
+ *y = fluid->_yVelocityOb;
+ *z = fluid->_zVelocityOb;
+}
+
+extern "C" unsigned char *smoke_get_obstacle_anim(FLUID_3D *fluid)
+{
+ return fluid->_obstaclesAnim;
+}
+
extern "C" void smoke_turbulence_set_noise(WTURBULENCE *wt, int type)
{
wt->setNoise(type);
diff --git a/intern/utfconv/utf_winfunc.c b/intern/utfconv/utf_winfunc.c
index 2e200ea3ad3..68d1d6bb403 100644
--- a/intern/utfconv/utf_winfunc.c
+++ b/intern/utfconv/utf_winfunc.c
@@ -39,7 +39,7 @@ FILE * ufopen(const char * filename, const char * mode)
UTF16_ENCODE(filename);
UTF16_ENCODE (mode);
- if(filename_16 && mode_16) {
+ if (filename_16 && mode_16) {
f = _wfopen(filename_16, mode_16);
}
@@ -81,7 +81,7 @@ int urename(const char *oldname, const char *newname )
UTF16_ENCODE(oldname);
UTF16_ENCODE (newname);
- if(oldname_16 && newname_16) r = _wrename(oldname_16, newname_16);
+ if (oldname_16 && newname_16) r = _wrename(oldname_16, newname_16);
UTF16_UN_ENCODE(newname);
UTF16_UN_ENCODE(oldname);
@@ -94,7 +94,7 @@ int umkdir(const char *pathname)
BOOL r = 0;
UTF16_ENCODE(pathname);
- if(pathname_16) r = CreateDirectoryW(pathname_16, NULL);
+ if (pathname_16) r = CreateDirectoryW(pathname_16, NULL);
UTF16_UN_ENCODE(pathname);
@@ -123,10 +123,10 @@ int uput_getenv(const char *varname, char * value, size_t buffsize)
{
int r = 0;
wchar_t * str;
- if(!buffsize) return r;
+ if (!buffsize) return r;
UTF16_ENCODE(varname);
- if(varname_16) {
+ if (varname_16) {
str = _wgetenv(varname_16);
conv_utf_16_to_8(str, value, buffsize);
r = 1;
@@ -143,7 +143,7 @@ int uputenv(const char *name, const char *value)
int r = -1;
UTF16_ENCODE(name);
UTF16_ENCODE(value);
- if(name_16 && value_16) {
+ if (name_16 && value_16) {
r = (SetEnvironmentVariableW(name_16,value_16)!= 0) ? 0 : -1;
}
UTF16_UN_ENCODE(value);