diff options
Diffstat (limited to 'intern')
59 files changed, 2996 insertions, 1189 deletions
diff --git a/intern/audaspace/intern/AUD_C-API.cpp b/intern/audaspace/intern/AUD_C-API.cpp index 9100a277124..50b47650696 100644 --- a/intern/audaspace/intern/AUD_C-API.cpp +++ b/intern/audaspace/intern/AUD_C-API.cpp @@ -41,6 +41,7 @@ #include <cstdlib> #include <cstring> #include <cmath> +#include <sstream> #include "AUD_NULLDevice.h" #include "AUD_I3DDevice.h" @@ -1236,6 +1237,47 @@ const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int lengt } } +const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate) +{ + try + { + AUD_SequencerFactory* f = dynamic_cast<AUD_SequencerFactory*>(sound->get()); + + f->setSpecs(specs.specs); + + std::vector<AUD_Reference<AUD_IWriter> > writers; + + int channels = specs.channels; + specs.channels = AUD_CHANNELS_MONO; + + for(int i = 0; i < channels; i++) + { + std::stringstream stream; + std::string fn = filename; + size_t index = fn.find_last_of('.'); + size_t index_slash = fn.find_last_of('/'); + size_t index_backslash = fn.find_last_of('\\'); + if((index == std::string::npos) || + ((index < index_slash) && (index_slash != std::string::npos)) || + ((index < index_backslash) && (index_backslash != std::string::npos))) + stream << filename << "_" << (i + 1); + else + stream << fn.substr(0, index) << "_" << (i + 1) << fn.substr(index); + writers.push_back(AUD_FileWriter::createWriter(stream.str(), specs, format, codec, bitrate)); + } + + AUD_Reference<AUD_IReader> reader = f->createQualityReader(); + reader->seek(start); + AUD_FileWriter::writeReader(reader, writers, length, buffersize); + + return NULL; + } + catch(AUD_Exception& e) + { + return e.str; + } +} + AUD_Device* AUD_openMixdownDevice(AUD_DeviceSpecs specs, AUD_Sound* sequencer, float volume, float start) { try diff --git a/intern/audaspace/intern/AUD_C-API.h b/intern/audaspace/intern/AUD_C-API.h index 8388af2170d..a52a1fa8369 100644 --- a/intern/audaspace/intern/AUD_C-API.h +++ b/intern/audaspace/intern/AUD_C-API.h @@ -710,6 +710,21 @@ extern void* AUD_getSet(void* set); extern const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate); /** + * Mixes a sound down into multiple files. + * \param sound The sound scene to mix down. + * \param start The start frame. + * \param length The count of frames to write. + * \param buffersize How many samples should be written at once. + * \param filename The file to write to, the channel number and an underscore are added at the beginning. + * \param specs The file's audio specification. + * \param format The file's container format. + * \param codec The codec used for encoding the audio data. + * \param bitrate The bitrate for encoding. + * \return An error message or NULL in case of success. + */ +extern const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate); + +/** * Opens a read device and prepares it for mixdown of the sound scene. * \param specs Output audio specifications. * \param sequencer The sound scene to mix down. diff --git a/intern/audaspace/intern/AUD_FileWriter.cpp b/intern/audaspace/intern/AUD_FileWriter.cpp index df76b667e3f..f74021acad1 100644 --- a/intern/audaspace/intern/AUD_FileWriter.cpp +++ b/intern/audaspace/intern/AUD_FileWriter.cpp @@ -93,3 +93,39 @@ void AUD_FileWriter::writeReader(AUD_Reference<AUD_IReader> reader, AUD_Referenc writer->write(len, buf); } } + +void AUD_FileWriter::writeReader(AUD_Reference<AUD_IReader> reader, std::vector<AUD_Reference<AUD_IWriter> >& writers, unsigned int length, unsigned int buffersize) +{ + AUD_Buffer buffer(buffersize * AUD_SAMPLE_SIZE(reader->getSpecs())); + AUD_Buffer buffer2(buffersize * sizeof(sample_t)); + sample_t* buf = buffer.getBuffer(); + sample_t* buf2 = buffer2.getBuffer(); + + int len; + bool eos = false; + int channels = reader->getSpecs().channels; + + for(unsigned int pos = 0; ((pos < length) || (length <= 0)) && !eos; pos += len) + { + len = buffersize; + if((len > length - pos) && (length > 0)) + len = length - pos; + reader->read(len, eos, buf); + + for(int channel = 0; channel < channels; channel++) + { + for(int i = 0; i < len; i++) + { + // clamping! + if(buf[i * channels + channel] > 1) + buf2[i] = 1; + else if(buf[i * channels + channel] < -1) + buf2[i] = -1; + else + buf2[i] = buf[i * channels + channel]; + } + + writers[channel]->write(len, buf2); + } + } +} diff --git a/intern/audaspace/intern/AUD_FileWriter.h b/intern/audaspace/intern/AUD_FileWriter.h index c9ee2b1ee12..385aba5ef45 100644 --- a/intern/audaspace/intern/AUD_FileWriter.h +++ b/intern/audaspace/intern/AUD_FileWriter.h @@ -31,6 +31,7 @@ #define __AUD_FILEWRITER_H__ #include <string> +#include <vector> #include "AUD_Reference.h" @@ -68,6 +69,15 @@ public: * \param buffersize How many samples should be transfered at once. */ static void writeReader(AUD_Reference<AUD_IReader> reader, AUD_Reference<AUD_IWriter> writer, unsigned int length, unsigned int buffersize); + + /** + * Writes a reader to several writers. + * \param reader The reader to read from. + * \param writers The writers to write to. + * \param length How many samples should be transfered. + * \param buffersize How many samples should be transfered at once. + */ + static void writeReader(AUD_Reference<AUD_IReader> reader, std::vector<AUD_Reference<AUD_IWriter> >& writers, unsigned int length, unsigned int buffersize); }; #endif //__AUD_FILEWRITER_H__ diff --git a/intern/audaspace/intern/AUD_Reference.h b/intern/audaspace/intern/AUD_Reference.h index 2e07417154b..0c9f02c0155 100644 --- a/intern/audaspace/intern/AUD_Reference.h +++ b/intern/audaspace/intern/AUD_Reference.h @@ -31,6 +31,7 @@ #include <map> #include <cstddef> +#include <pthread.h> // #define MEM_DEBUG @@ -49,8 +50,13 @@ private: * Saves the reference counts. */ static std::map<void*, unsigned int> m_references; + static pthread_mutex_t m_mutex; + static bool m_mutex_initialised; public: + + static pthread_mutex_t* getMutex(); + /** * Reference increment. * \param reference The reference. @@ -108,6 +114,7 @@ public: template <class U> AUD_Reference(U* reference) { + pthread_mutex_lock(AUD_ReferenceHandler::getMutex()); m_original = reference; m_reference = dynamic_cast<T*>(reference); AUD_ReferenceHandler::incref(m_original); @@ -115,6 +122,7 @@ public: if(m_reference != NULL) std::cerr << "+" << typeid(*m_reference).name() << std::endl; #endif + pthread_mutex_unlock(AUD_ReferenceHandler::getMutex()); } AUD_Reference() @@ -129,6 +137,7 @@ public: */ AUD_Reference(const AUD_Reference& ref) { + pthread_mutex_lock(AUD_ReferenceHandler::getMutex()); m_original = ref.m_original; m_reference = ref.m_reference; AUD_ReferenceHandler::incref(m_original); @@ -136,11 +145,13 @@ public: if(m_reference != NULL) std::cerr << "+" << typeid(*m_reference).name() << std::endl; #endif + pthread_mutex_unlock(AUD_ReferenceHandler::getMutex()); } template <class U> explicit AUD_Reference(const AUD_Reference<U>& ref) { + pthread_mutex_lock(AUD_ReferenceHandler::getMutex()); m_original = ref.get(); m_reference = dynamic_cast<T*>(ref.get()); AUD_ReferenceHandler::incref(m_original); @@ -148,6 +159,7 @@ public: if(m_reference != NULL) std::cerr << "+" << typeid(*m_reference).name() << std::endl; #endif + pthread_mutex_unlock(AUD_ReferenceHandler::getMutex()); } /** @@ -156,12 +168,14 @@ public: */ ~AUD_Reference() { + pthread_mutex_lock(AUD_ReferenceHandler::getMutex()); #ifdef MEM_DEBUG if(m_reference != NULL) std::cerr << "-" << typeid(*m_reference).name() << std::endl; #endif if(AUD_ReferenceHandler::decref(m_original)) delete m_reference; + pthread_mutex_unlock(AUD_ReferenceHandler::getMutex()); } /** @@ -173,6 +187,8 @@ public: if(&ref == this) return *this; + pthread_mutex_lock(AUD_ReferenceHandler::getMutex()); + #ifdef MEM_DEBUG if(m_reference != NULL) std::cerr << "-" << typeid(*m_reference).name() << std::endl; @@ -188,6 +204,8 @@ public: std::cerr << "+" << typeid(*m_reference).name() << std::endl; #endif + pthread_mutex_unlock(AUD_ReferenceHandler::getMutex()); + return *this; } diff --git a/intern/audaspace/intern/AUD_ReferenceHandler.cpp b/intern/audaspace/intern/AUD_ReferenceHandler.cpp index 24f645df761..3e9f6707262 100644 --- a/intern/audaspace/intern/AUD_ReferenceHandler.cpp +++ b/intern/audaspace/intern/AUD_ReferenceHandler.cpp @@ -29,3 +29,24 @@ #include "AUD_Reference.h" std::map<void*, unsigned int> AUD_ReferenceHandler::m_references; +pthread_mutex_t AUD_ReferenceHandler::m_mutex; +bool AUD_ReferenceHandler::m_mutex_initialised = false; + +pthread_mutex_t *AUD_ReferenceHandler::getMutex() +{ + if(!m_mutex_initialised) + { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + + pthread_mutex_init(&m_mutex, &attr); + + pthread_mutexattr_destroy(&attr); + + m_mutex_initialised = true; + } + + return &m_mutex; +} + diff --git a/intern/container/CTR_Map.h b/intern/container/CTR_Map.h index 8b6d84337c2..9557821d642 100644 --- a/intern/container/CTR_Map.h +++ b/intern/container/CTR_Map.h @@ -63,7 +63,7 @@ public: for (int i = 0; i < m_num_buckets; ++i) { m_buckets[i] = 0; - for(Entry *entry = map.m_buckets[i]; entry; entry=entry->m_next) + for (Entry *entry = map.m_buckets[i]; entry; entry=entry->m_next) insert(entry->m_key, entry->m_value); } } diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index cb99ea3b499..35f97bf629f 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -85,10 +85,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Leave out caustics, resulting in a darker image with less noise", default=False, ) - cls.blur_caustics = FloatProperty( - name="Blur Caustics", - description="Blur caustics to reduce noise", - min=0.0, max=1.0, + cls.blur_glossy = FloatProperty( + name="Filter Glossy", + description="Adaptively blur glossy shaders after blurry bounces, to reduce noise at the cost of accuracy", + min=0.0, max=10.0, default=0.0, ) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 4a8b639b390..0ed08589327 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -87,11 +87,11 @@ class CyclesRender_PT_integrator(CyclesButtonsPanel, Panel): sub.prop(cscene, "diffuse_bounces", text="Diffuse") sub.prop(cscene, "glossy_bounces", text="Glossy") sub.prop(cscene, "transmission_bounces", text="Transmission") - sub.prop(cscene, "no_caustics") - #row = col.row() - #row.prop(cscene, "blur_caustics") - #row.active = not cscene.no_caustics + col.separator() + + col.prop(cscene, "no_caustics") + col.prop(cscene, "blur_glossy") class CyclesRender_PT_film(CyclesButtonsPanel, Panel): @@ -178,10 +178,7 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel): col = split.column() col.prop(scene, "layers", text="Scene") - col.label(text="Material:") - col.prop(rl, "material_override", text="") - - col.prop(rl, "use_sky", "Use Environment") + col.prop(rl, "layers_exclude", text="Exclude") col = split.column() col.prop(rl, "layers", text="Layer") @@ -191,6 +188,16 @@ class CyclesRender_PT_layers(CyclesButtonsPanel, Panel): split = layout.split() col = split.column() + col.label(text="Material:") + col.prop(rl, "material_override", text="") + + col = split.column() + col.prop(rl, "samples") + col.prop(rl, "use_sky", "Use Environment") + + split = layout.split() + + col = split.column() col.label(text="Passes:") col.prop(rl, "use_pass_combined") col.prop(rl, "use_pass_z") @@ -783,6 +790,31 @@ class CyclesTexture_PT_colors(CyclesButtonsPanel, Panel): layout.template_color_ramp(mapping, "color_ramp", expand=True) +class CyclesScene_PT_simplify(CyclesButtonsPanel, Panel): + bl_label = "Simplify" + bl_context = "scene" + COMPAT_ENGINES = {'CYCLES'} + + def draw_header(self, context): + rd = context.scene.render + self.layout.prop(rd, "use_simplify", text="") + + def draw(self, context): + layout = self.layout + + rd = context.scene.render + + layout.active = rd.use_simplify + + split = layout.split() + + col = split.column() + col.prop(rd, "simplify_subdivision", text="Subdivision") + + col = split.column() + col.prop(rd, "simplify_child_particles", text="Child Particles") + + def draw_device(self, context): scene = context.scene layout = self.layout diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index dc6c69e2904..5ece7aa26e2 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -218,12 +218,13 @@ void BlenderSession::render() scene->film->passes = passes; scene->film->tag_update(scene); - /* update session */ - session->reset(buffer_params, session_params.samples); - /* update scene */ sync->sync_data(b_v3d, b_iter->name().c_str()); + /* update session */ + int samples = sync->get_layer_samples(); + session->reset(buffer_params, (samples == 0)? session_params.samples: samples); + /* render */ session->start(); session->wait(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 5a286298774..41cd200d003 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -153,6 +153,8 @@ void BlenderSync::sync_integrator() integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows"); integrator->no_caustics = get_boolean(cscene, "no_caustics"); + integrator->filter_glossy = get_float(cscene, "blur_glossy"); + integrator->seed = get_int(cscene, "seed"); integrator->layer_flag = render_layer.layer; @@ -208,6 +210,7 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer) render_layer.holdout_layer = 0; render_layer.material_override = PointerRNA_NULL; render_layer.use_background = true; + render_layer.samples = 0; return; } } @@ -220,12 +223,13 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer) for(r.layers.begin(b_rlay); b_rlay != r.layers.end(); ++b_rlay) { if((!layer && first_layer) || (layer && b_rlay->name() == layer)) { render_layer.name = b_rlay->name(); - render_layer.scene_layer = get_layer(b_scene.layers()); + render_layer.scene_layer = get_layer(b_scene.layers()) & ~get_layer(b_rlay->layers_exclude()); render_layer.layer = get_layer(b_rlay->layers()); render_layer.holdout_layer = get_layer(b_rlay->layers_zmask()); render_layer.layer |= render_layer.holdout_layer; render_layer.material_override = b_rlay->material_override(); render_layer.use_background = b_rlay->use_sky(); + render_layer.samples = b_rlay->samples(); } first_layer = false; diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index d2550a1ffd7..ab8e4bd8d00 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -57,6 +57,7 @@ public: void sync_data(BL::SpaceView3D b_v3d, const char *layer = 0); void sync_camera(BL::Object b_override, int width, int height); void sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int width, int height); + int get_layer_samples() { return render_layer.samples; } /* get parameters */ static SceneParams get_scene_params(BL::Scene b_scene, bool background); @@ -108,7 +109,8 @@ private: RenderLayerInfo() : scene_layer(0), layer(0), holdout_layer(0), material_override(PointerRNA_NULL), - use_background(true) + use_background(true), + samples(0) {} string name; @@ -117,6 +119,7 @@ private: uint holdout_layer; BL::Material material_override; bool use_background; + int samples; } render_layer; }; diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index decc576fe51..131a7a1f750 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -10,17 +10,21 @@ set(INC set(SRC bvh.cpp + bvh_binning.cpp bvh_build.cpp bvh_node.cpp bvh_sort.cpp + bvh_split.cpp ) set(SRC_HEADERS bvh.h + bvh_binning.h bvh_build.h bvh_node.h bvh_params.h bvh_sort.h + bvh_split.h ) include_directories(${INC}) diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index c9bfa964332..15695dddf45 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -530,7 +530,7 @@ void RegularBVH::refit_nodes() { assert(!params.top_level); - BoundBox bbox; + BoundBox bbox = BoundBox::empty; uint visibility = 0; refit_node(0, (pack.is_leaf[0])? true: false, bbox, visibility); } @@ -572,7 +572,7 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility } else { /* refit inner node, set bbox from children */ - BoundBox bbox0, bbox1; + BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty; uint visibility0 = 0, visibility1 = 0; refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0); diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp new file mode 100644 index 00000000000..661541a8d23 --- /dev/null +++ b/intern/cycles/bvh/bvh_binning.cpp @@ -0,0 +1,223 @@ +/* + * Adapted from code copyright 2009-2011 Intel Corporation + * Modifications Copyright 2012, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//#define __KERNEL_SSE__ + +#include <stdlib.h> + +#include "bvh_binning.h" + +#include "util_algorithm.h" +#include "util_boundbox.h" +#include "util_types.h" + +CCL_NAMESPACE_BEGIN + +/* SSE replacements */ + +__forceinline void prefetch_L1 (const void* ptr) { } +__forceinline void prefetch_L2 (const void* ptr) { } +__forceinline void prefetch_L3 (const void* ptr) { } +__forceinline void prefetch_NTA(const void* ptr) { } + +template<size_t src> __forceinline float extract(const int4& b) +{ return b[src]; } +template<size_t dst> __forceinline const float4 insert(const float4& a, const float b) +{ float4 r = a; r[dst] = b; return r; } + +__forceinline int get_best_dimension(const float4& bestSAH) +{ + // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH)); + + float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z)); + + if(bestSAH.x == minSAH) return 0; + else if(bestSAH.y == minSAH) return 1; + else return 2; +} + +/* BVH Object Binning */ + +BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims) +: BVHRange(job), splitSAH(FLT_MAX), dim(0), pos(0) +{ + /* compute number of bins to use and precompute scaling factor for binning */ + num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size())); + scale = rcp(cent_bounds().size()) * make_float3((float)num_bins); + + /* initialize binning counter and bounds */ + BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */ + int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */ + + for(size_t i = 0; i < num_bins; i++) { + bin_count[i] = make_int4(0); + bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty; + } + + /* map geometry to bins, unrolled once */ + { + ssize_t i; + + for(i = 0; i < ssize_t(size()) - 1; i += 2) { + prefetch_L2(&prims[start() + i + 8]); + + /* map even and odd primitive to bin */ + BVHReference prim0 = prims[start() + i + 0]; + BVHReference prim1 = prims[start() + i + 1]; + + int4 bin0 = get_bin(prim0.bounds()); + int4 bin1 = get_bin(prim1.bounds()); + + /* increase bounds for bins for even primitive */ + int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + + /* increase bounds of bins for odd primitive */ + int b10 = extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds()); + int b11 = extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds()); + int b12 = extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds()); + } + + /* for uneven number of primitives */ + if(i < ssize_t(size())) { + /* map primitive to bin */ + BVHReference prim0 = prims[start() + i]; + int4 bin0 = get_bin(prim0.bounds()); + + /* increase bounds of bins */ + int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + } + } + + /* sweep from right to left and compute parallel prefix of merged bounds */ + float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */ + float4 r_count[MAX_BINS]; /* number of primitives on the right */ + int4 count = make_int4(0); + + BoundBox bx = BoundBox::empty; + BoundBox by = BoundBox::empty; + BoundBox bz = BoundBox::empty; + + for(size_t i = num_bins - 1; i > 0; i--) { + count = count + bin_count[i]; + r_count[i] = blocks(count); + + bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area(); + by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area(); + bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area(); + } + + /* sweep from left to right and compute SAH */ + int4 ii = make_int4(1); + float4 bestSAH = make_float4(FLT_MAX); + int4 bestSplit = make_int4(-1); + + count = make_int4(0); + + bx = BoundBox::empty; + by = BoundBox::empty; + bz = BoundBox::empty; + + for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) { + count = count + bin_count[i-1]; + + bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area(); + by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area(); + bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area(); + + float4 lCount = blocks(count); + float4 lArea = make_float4(Ax,Ay,Az,Az); + float4 sah = lArea*lCount + r_area[i]*r_count[i]; + + bestSplit = select(sah < bestSAH,ii,bestSplit); + bestSAH = min(sah,bestSAH); + } + + int4 mask = float3_to_float4(cent_bounds().size()) <= make_float4(0.0f); + bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX); + + /* find best dimension */ + dim = get_best_dimension(bestSAH); + splitSAH = bestSAH[dim]; + pos = bestSplit[dim]; + leafSAH = bounds().half_area() * blocks(size()); +} + +void BVHObjectBinning::split(BVHReference* prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const +{ + size_t N = size(); + + BoundBox lgeom_bounds = BoundBox::empty; + BoundBox rgeom_bounds = BoundBox::empty; + BoundBox lcent_bounds = BoundBox::empty; + BoundBox rcent_bounds = BoundBox::empty; + + ssize_t l = 0, r = N-1; + + while(l <= r) { + prefetch_L2(&prims[start() + l + 8]); + prefetch_L2(&prims[start() + r - 8]); + + BVHReference prim = prims[start() + l]; + float3 center = prim.bounds().center2(); + + if(get_bin(center)[dim] < pos) { + lgeom_bounds.grow(prim.bounds()); + lcent_bounds.grow(center); + l++; + } + else { + rgeom_bounds.grow(prim.bounds()); + rcent_bounds.grow(center); + swap(prims[start()+l],prims[start()+r]); + r--; + } + } + + /* finish */ + if(l != 0 && N-1-r != 0) { + right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims); + left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims); + return; + } + + /* object medium split if we did not make progress, can happen when all + primitives have same centroid */ + lgeom_bounds = BoundBox::empty; + rgeom_bounds = BoundBox::empty; + lcent_bounds = BoundBox::empty; + rcent_bounds = BoundBox::empty; + + for(size_t i = 0; i < N/2; i++) { + lgeom_bounds.grow(prims[start()+i].bounds()); + lcent_bounds.grow(prims[start()+i].bounds().center2()); + } + + for(size_t i = N/2; i < N; i++) { + rgeom_bounds.grow(prims[start()+i].bounds()); + rcent_bounds.grow(prims[start()+i].bounds().center2()); + } + + right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims); + left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/bvh/bvh_binning.h b/intern/cycles/bvh/bvh_binning.h new file mode 100644 index 00000000000..60742157055 --- /dev/null +++ b/intern/cycles/bvh/bvh_binning.h @@ -0,0 +1,86 @@ +/* + * Adapted from code copyright 2009-2011 Intel Corporation + * Modifications Copyright 2012, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BVH_BINNING_H__ +#define __BVH_BINNING_H__ + +#include "bvh_params.h" + +#include "util_types.h" + +CCL_NAMESPACE_BEGIN + +/* Single threaded object binner. Finds the split with the best SAH heuristic + * by testing for each dimension multiple partitionings for regular spaced + * partition locations. A partitioning for a partition location is computed, + * by putting primitives whose centroid is on the left and right of the split + * location to different sets. The SAH is evaluated by computing the number of + * blocks occupied by the primitives in the partitions. */ + +class BVHObjectBinning : public BVHRange +{ +public: + __forceinline BVHObjectBinning() {} + BVHObjectBinning(const BVHRange& job, BVHReference *prims); + + void split(BVHReference *prims, BVHObjectBinning& left_o, BVHObjectBinning& right_o) const; + + float splitSAH; /* SAH cost of the best split */ + float leafSAH; /* SAH cost of creating a leaf */ + +protected: + int dim; /* best split dimension */ + int pos; /* best split position */ + size_t num_bins; /* actual number of bins to use */ + float3 scale; /* scaling factor to compute bin */ + + enum { MAX_BINS = 32 }; + enum { LOG_BLOCK_SIZE = 2 }; + + /* computes the bin numbers for each dimension for a box. */ + __forceinline int4 get_bin(const BoundBox& box) const + { + int4 a = make_int4((box.center2() - cent_bounds().min)*scale - make_float3(0.5f)); + int4 mn = make_int4(0); + int4 mx = make_int4((int)num_bins-1); + + return clamp(a, mn, mx); + } + + /* computes the bin numbers for each dimension for a point. */ + __forceinline int4 get_bin(const float3& c) const + { + return make_int4((c - cent_bounds().min)*scale - make_float3(0.5f)); + } + + /* compute the number of blocks occupied for each dimension. */ + __forceinline float4 blocks(const int4& a) const + { + return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); + } + + /* compute the number of blocks occupied in one dimension. */ + __forceinline int blocks(size_t a) const + { + return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE); + } +}; + +CCL_NAMESPACE_END + +#endif + diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index 38674c2c561..c5b4f1d01ae 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -15,22 +15,36 @@ * limitations under the License. */ +#include "bvh_binning.h" #include "bvh_build.h" #include "bvh_node.h" #include "bvh_params.h" -#include "bvh_sort.h" +#include "bvh_split.h" #include "mesh.h" #include "object.h" #include "scene.h" -#include "util_algorithm.h" +#include "util_debug.h" #include "util_foreach.h" #include "util_progress.h" #include "util_time.h" CCL_NAMESPACE_BEGIN +/* BVH Build Task */ + +class BVHBuildTask : public Task { +public: + BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_) + : node(node_), child(child_), level(level_), range(range_) {} + + InnerNode *node; + int child; + int level; + BVHObjectBinning range; +}; + /* Constructor / Destructor */ BVHBuild::BVHBuild(const vector<Object*>& objects_, @@ -41,10 +55,10 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_, prim_object(prim_object_), params(params_), progress(progress_), - progress_start_time(0.0) + progress_start_time(0.0), + task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2)) { spatial_min_overlap = 0.0f; - progress_num_duplicates = 0; } BVHBuild::~BVHBuild() @@ -53,57 +67,63 @@ BVHBuild::~BVHBuild() /* Adding References */ -void BVHBuild::add_reference_mesh(NodeSpec& root, Mesh *mesh, int i) +void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i) { for(uint j = 0; j < mesh->triangles.size(); j++) { Mesh::Triangle t = mesh->triangles[j]; - Reference ref; + BoundBox bounds = BoundBox::empty; for(int k = 0; k < 3; k++) { float3 pt = mesh->verts[t.v[k]]; - ref.bounds.grow(pt); + bounds.grow(pt); } - if(ref.bounds.valid()) { - ref.prim_index = j; - ref.prim_object = i; - - references.push_back(ref); - root.bounds.grow(ref.bounds); + if(bounds.valid()) { + references.push_back(BVHReference(bounds, j, i)); + root.grow(bounds); + center.grow(bounds.center2()); } } } -void BVHBuild::add_reference_object(NodeSpec& root, Object *ob, int i) +void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i) { - Reference ref; - - ref.prim_index = -1; - ref.prim_object = i; - ref.bounds = ob->bounds; - - references.push_back(ref); - root.bounds.grow(ref.bounds); + references.push_back(BVHReference(ob->bounds, -1, i)); + root.grow(ob->bounds); + center.grow(ob->bounds.center2()); } -void BVHBuild::add_references(NodeSpec& root) +void BVHBuild::add_references(BVHRange& root) { - /* init root spec */ - root.num = 0; - root.bounds = BoundBox(); + /* reserve space for references */ + size_t num_alloc_references = 0; + + foreach(Object *ob, objects) { + if(params.top_level) { + if(ob->mesh->transform_applied) + num_alloc_references += ob->mesh->triangles.size(); + else + num_alloc_references++; + } + else + num_alloc_references += ob->mesh->triangles.size(); + } + + references.reserve(num_alloc_references); - /* add objects */ + /* add references from objects */ + BoundBox bounds = BoundBox::empty, center = BoundBox::empty; int i = 0; foreach(Object *ob, objects) { if(params.top_level) { if(ob->mesh->transform_applied) - add_reference_mesh(root, ob->mesh, i); + add_reference_mesh(bounds, center, ob->mesh, i); else - add_reference_object(root, ob, i); + add_reference_object(bounds, center, ob, i); } else - add_reference_mesh(root, ob->mesh, i); + add_reference_mesh(bounds, center, ob->mesh, i); i++; @@ -111,129 +131,213 @@ void BVHBuild::add_references(NodeSpec& root) } /* happens mostly on empty meshes */ - if(!root.bounds.valid()) - root.bounds.grow(make_float3(0.0f, 0.0f, 0.0f)); + if(!bounds.valid()) + bounds.grow(make_float3(0.0f, 0.0f, 0.0f)); - root.num = references.size(); + root = BVHRange(bounds, center, 0, references.size()); } /* Build */ BVHNode* BVHBuild::run() { - NodeSpec root; + BVHRange root; /* add references */ add_references(root); - if(progress.get_cancel()) return NULL; + if(progress.get_cancel()) + return NULL; /* init spatial splits */ if(params.top_level) /* todo: get rid of this */ params.use_spatial_split = false; - spatial_min_overlap = root.bounds.area() * params.spatial_split_alpha; + spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha; spatial_right_bounds.clear(); - spatial_right_bounds.resize(max(root.num, (int)BVHParams::NUM_SPATIAL_BINS) - 1); + spatial_right_bounds.resize(max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1); /* init progress updates */ - progress_num_duplicates = 0; progress_start_time = time_dt(); + progress_count = 0; + progress_total = references.size(); + progress_original_total = progress_total; + + prim_index.resize(references.size()); + prim_object.resize(references.size()); /* build recursively */ - return build_node(root, 0, 0.0f, 1.0f); + BVHNode *rootnode; + + if(params.use_spatial_split) { + /* singlethreaded spatial split build */ + rootnode = build_node(root, 0); + } + else { + /* multithreaded binning build */ + BVHObjectBinning rootbin(root, &references[0]); + rootnode = build_node(rootbin, 0); + task_pool.wait(); + } + + /* delete if we cancelled */ + if(rootnode) { + if(progress.get_cancel()) { + rootnode->deleteSubtree(); + rootnode = NULL; + } + else if(!params.use_spatial_split) { + /*rotate(rootnode, 4, 5);*/ + rootnode->update_visibility(); + } + } + + return rootnode; } -void BVHBuild::progress_update(float progress_start, float progress_end) +void BVHBuild::progress_update() { if(time_dt() - progress_start_time < 0.25f) return; + + double progress_start = (double)progress_count/(double)progress_total; + double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total; - float duplicates = (float)progress_num_duplicates/(float)references.size(); string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%", progress_start*100.0f, duplicates*100.0f); progress.set_substatus(msg); - progress_start_time = time_dt(); + progress_start_time = time_dt(); } -BVHNode* BVHBuild::build_node(const NodeSpec& spec, int level, float progress_start, float progress_end) +void BVHBuild::thread_build_node(Task *task_, int thread_id) { - /* progress update */ - progress_update(progress_start, progress_end); - if(progress.get_cancel()) return NULL; + if(progress.get_cancel()) + return; - /* small enough or too deep => create leaf. */ - if(spec.num <= params.min_leaf_size || level >= BVHParams::MAX_DEPTH) - return create_leaf_node(spec); - - /* find split candidates. */ - float area = spec.bounds.area(); - float leafSAH = area * params.triangle_cost(spec.num); - float nodeSAH = area * params.node_cost(2); - ObjectSplit object = find_object_split(spec, nodeSAH); - SpatialSplit spatial; - - if(params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) { - BoundBox overlap = object.left_bounds; - overlap.intersect(object.right_bounds); - - if(overlap.area() >= spatial_min_overlap) - spatial = find_spatial_split(spec, nodeSAH); - } + /* build nodes */ + BVHBuildTask *task = (BVHBuildTask*)task_; + BVHNode *node = build_node(task->range, task->level); + + /* set child in inner node */ + task->node->children[task->child] = node; - /* leaf SAH is the lowest => create leaf. */ - float minSAH = min(min(leafSAH, object.sah), spatial.sah); + /* update progress */ + if(task->range.size() < THREAD_TASK_SIZE) { + /*rotate(node, INT_MAX, 5);*/ - if(minSAH == leafSAH && spec.num <= params.max_leaf_size) - return create_leaf_node(spec); + thread_scoped_lock lock(build_mutex); - /* perform split. */ - NodeSpec left, right; + progress_count += task->range.size(); + progress_update(); + } +} + +/* multithreaded binning builder */ +BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level) +{ + size_t size = range.size(); + float leafSAH = params.sah_triangle_cost * range.leafSAH; + float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_triangle_cost * range.splitSAH; - if(params.use_spatial_split && minSAH == spatial.sah) - do_spatial_split(left, right, spec, spatial); - if(!left.num || !right.num) - do_object_split(left, right, spec, object); + /* make leaf node when threshold reached or SAH tells us */ + if(params.small_enough_for_leaf(size, level) || (size <= params.max_leaf_size && leafSAH < splitSAH)) + return create_leaf_node(range); + + /* perform split */ + BVHObjectBinning left, right; + range.split(&references[0], left, right); /* create inner node. */ - progress_num_duplicates += left.num + right.num - spec.num; + InnerNode *inner; - float progress_mid = lerp(progress_start, progress_end, (float)right.num / (float)(left.num + right.num)); + if(range.size() < THREAD_TASK_SIZE) { + /* local build */ + BVHNode *leftnode = build_node(left, level + 1); + BVHNode *rightnode = build_node(right, level + 1); - BVHNode* rightNode = build_node(right, level + 1, progress_start, progress_mid); - if(progress.get_cancel()) { - if(rightNode) rightNode->deleteSubtree(); - return NULL; + inner = new InnerNode(range.bounds(), leftnode, rightnode); } + else { + /* threaded build */ + inner = new InnerNode(range.bounds()); + + task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true); + task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true); + } + + return inner; +} - BVHNode* leftNode = build_node(left, level + 1, progress_mid, progress_end); - if(progress.get_cancel()) { - if(leftNode) leftNode->deleteSubtree(); +/* single threaded spatial split builder */ +BVHNode* BVHBuild::build_node(const BVHRange& range, int level) +{ + /* progress update */ + progress_update(); + if(progress.get_cancel()) return NULL; + + /* small enough or too deep => create leaf. */ + if(params.small_enough_for_leaf(range.size(), level)) { + progress_count += range.size(); + return create_leaf_node(range); + } + + /* splitting test */ + BVHMixedSplit split(this, range, level); + + if(split.no_split) { + progress_count += range.size(); + return create_leaf_node(range); } + + /* do split */ + BVHRange left, right; + split.split(this, left, right, range); + + progress_total += left.size() + right.size() - range.size(); + size_t total = progress_total; + + /* leaft node */ + BVHNode *leftnode = build_node(left, level + 1); + + /* right node (modify start for splits) */ + right.set_start(right.start() + progress_total - total); + BVHNode *rightnode = build_node(right, level + 1); - return new InnerNode(spec.bounds, leftNode, rightNode); + /* inner node */ + return new InnerNode(range.bounds(), leftnode, rightnode); } -BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num) +/* Create Nodes */ + +BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num) { if(num == 0) { - BoundBox bounds; + BoundBox bounds = BoundBox::empty; return new LeafNode(bounds, 0, 0, 0); } else if(num == 1) { - prim_index.push_back(ref[0].prim_index); - prim_object.push_back(ref[0].prim_object); - uint visibility = objects[ref[0].prim_object]->visibility; - return new LeafNode(ref[0].bounds, visibility, prim_index.size()-1, prim_index.size()); + if(start == prim_index.size()) { + assert(params.use_spatial_split); + + prim_index.push_back(ref->prim_index()); + prim_object.push_back(ref->prim_object()); + } + else { + prim_index[start] = ref->prim_index(); + prim_object[start] = ref->prim_object(); + } + + uint visibility = objects[ref->prim_object()]->visibility; + return new LeafNode(ref->bounds(), visibility, start, start+1); } else { int mid = num/2; - BVHNode *leaf0 = create_object_leaf_nodes(ref, mid); - BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, num-mid); + BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid); + BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid); - BoundBox bounds; + BoundBox bounds = BoundBox::empty; bounds.grow(leaf0->m_bounds); bounds.grow(leaf1->m_bounds); @@ -241,310 +345,136 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const Reference *ref, int num) } } -BVHNode* BVHBuild::create_leaf_node(const NodeSpec& spec) +BVHNode* BVHBuild::create_leaf_node(const BVHRange& range) { vector<int>& p_index = prim_index; vector<int>& p_object = prim_object; - BoundBox bounds; - int num = 0; + BoundBox bounds = BoundBox::empty; + int num = 0, ob_num = 0; uint visibility = 0; - for(int i = 0; i < spec.num; i++) { - if(references.back().prim_index != -1) { - p_index.push_back(references.back().prim_index); - p_object.push_back(references.back().prim_object); - bounds.grow(references.back().bounds); - visibility |= objects[references.back().prim_object]->visibility; - references.pop_back(); + for(int i = 0; i < range.size(); i++) { + BVHReference& ref = references[range.start() + i]; + + if(ref.prim_index() != -1) { + if(range.start() + num == prim_index.size()) { + assert(params.use_spatial_split); + + p_index.push_back(ref.prim_index()); + p_object.push_back(ref.prim_object()); + } + else { + p_index[range.start() + num] = ref.prim_index(); + p_object[range.start() + num] = ref.prim_object(); + } + + bounds.grow(ref.bounds()); + visibility |= objects[ref.prim_object()]->visibility; num++; } + else { + if(ob_num < i) + references[range.start() + ob_num] = ref; + ob_num++; + } } BVHNode *leaf = NULL; if(num > 0) { - leaf = new LeafNode(bounds, visibility, p_index.size() - num, p_index.size()); + leaf = new LeafNode(bounds, visibility, range.start(), range.start() + num); - if(num == spec.num) + if(num == range.size()) return leaf; } /* while there may be multiple triangles in a leaf, for object primitives - * we want them to be the only one, so we */ - int ob_num = spec.num - num; - const Reference *ref = (ob_num)? &references.back() - (ob_num - 1): NULL; - BVHNode *oleaf = create_object_leaf_nodes(ref, ob_num); - for(int i = 0; i < ob_num; i++) - references.pop_back(); + * we want there to be the only one, so we keep splitting */ + const BVHReference *ref = (ob_num)? &references[range.start()]: NULL; + BVHNode *oleaf = create_object_leaf_nodes(ref, range.start() + num, ob_num); if(leaf) - return new InnerNode(spec.bounds, leaf, oleaf); + return new InnerNode(range.bounds(), leaf, oleaf); else return oleaf; } -/* Object Split */ +/* Tree Rotations */ -BVHBuild::ObjectSplit BVHBuild::find_object_split(const NodeSpec& spec, float nodeSAH) +void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations) { - ObjectSplit split; - const Reference *ref_ptr = &references[references.size() - spec.num]; - - for(int dim = 0; dim < 3; dim++) { - /* sort references */ - bvh_reference_sort(references.size() - spec.num, references.size(), &references[0], dim); - - /* sweep right to left and determine bounds. */ - BoundBox right_bounds; - - for(int i = spec.num - 1; i > 0; i--) { - right_bounds.grow(ref_ptr[i].bounds); - spatial_right_bounds[i - 1] = right_bounds; - } - - /* sweep left to right and select lowest SAH. */ - BoundBox left_bounds; - - for(int i = 1; i < spec.num; i++) { - left_bounds.grow(ref_ptr[i - 1].bounds); - right_bounds = spatial_right_bounds[i - 1]; - - float sah = nodeSAH + - left_bounds.area() * params.triangle_cost(i) + - right_bounds.area() * params.triangle_cost(spec.num - i); - - if(sah < split.sah) { - split.sah = sah; - split.dim = dim; - split.num_left = i; - split.left_bounds = left_bounds; - split.right_bounds = right_bounds; - } - } - } - - return split; + /* in tested scenes, this resulted in slightly slower raytracing, so disabled + * it for now. could be implementation bug, or depend on the scene */ + if(node) + for(int i = 0; i < iterations; i++) + rotate(node, max_depth); } -void BVHBuild::do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split) +void BVHBuild::rotate(BVHNode *node, int max_depth) { - /* sort references according to split */ - int start = references.size() - spec.num; - int end = references.size(); /* todo: is this right? */ - - bvh_reference_sort(start, end, &references[0], split.dim); - - /* split node specs */ - left.num = split.num_left; - left.bounds = split.left_bounds; - right.num = spec.num - split.num_left; - right.bounds = split.right_bounds; -} - -/* Spatial Split */ - -BVHBuild::SpatialSplit BVHBuild::find_spatial_split(const NodeSpec& spec, float nodeSAH) -{ - /* initialize bins. */ - float3 origin = spec.bounds.min; - float3 binSize = (spec.bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS); - float3 invBinSize = 1.0f / binSize; - - for(int dim = 0; dim < 3; dim++) { - for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) { - SpatialBin& bin = spatial_bins[dim][i]; - - bin.bounds = BoundBox(); - bin.enter = 0; - bin.exit = 0; - } - } - - /* chop references into bins. */ - for(unsigned int refIdx = references.size() - spec.num; refIdx < references.size(); refIdx++) { - const Reference& ref = references[refIdx]; - float3 firstBinf = (ref.bounds.min - origin) * invBinSize; - float3 lastBinf = (ref.bounds.max - origin) * invBinSize; - int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z); - int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z); + /* nothing to rotate if we reached a leaf node. */ + if(node->is_leaf() || max_depth < 0) + return; + + InnerNode *parent = (InnerNode*)node; - firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1); - lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1); + /* rotate all children first */ + for(size_t c = 0; c < 2; c++) + rotate(parent->children[c], max_depth-1); - for(int dim = 0; dim < 3; dim++) { - Reference currRef = ref; + /* compute current area of all children */ + BoundBox bounds0 = parent->children[0]->m_bounds; + BoundBox bounds1 = parent->children[1]->m_bounds; - for(int i = firstBin[dim]; i < lastBin[dim]; i++) { - Reference leftRef, rightRef; + float area0 = bounds0.half_area(); + float area1 = bounds1.half_area(); + float4 child_area = make_float4(area0, area1, 0.0f, 0.0f); - split_reference(leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1)); - spatial_bins[dim][i].bounds.grow(leftRef.bounds); - currRef = rightRef; - } + /* find best rotation. we pick a target child of a first child, and swap + * this with an other child. we perform the best such swap. */ + float best_cost = FLT_MAX; + int best_child = -1, bets_target = -1, best_other = -1; - spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds); - spatial_bins[dim][firstBin[dim]].enter++; - spatial_bins[dim][lastBin[dim]].exit++; - } - } + for(size_t c = 0; c < 2; c++) { + /* ignore leaf nodes as we cannot descent into */ + if(parent->children[c]->is_leaf()) + continue; - /* select best split plane. */ - SpatialSplit split; + InnerNode *child = (InnerNode*)parent->children[c]; + BoundBox& other = (c == 0)? bounds1: bounds0; - for(int dim = 0; dim < 3; dim++) { - /* sweep right to left and determine bounds. */ - BoundBox right_bounds; + /* transpose child bounds */ + BoundBox target0 = child->children[0]->m_bounds; + BoundBox target1 = child->children[1]->m_bounds; - for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { - right_bounds.grow(spatial_bins[dim][i].bounds); - spatial_right_bounds[i - 1] = right_bounds; - } + /* compute cost for both possible swaps */ + float cost0 = merge(other, target1).half_area() - child_area[c]; + float cost1 = merge(target0, other).half_area() - child_area[c]; - /* sweep left to right and select lowest SAH. */ - BoundBox left_bounds; - int leftNum = 0; - int rightNum = spec.num; + if(min(cost0,cost1) < best_cost) { + best_child = (int)c; + best_other = (int)(1-c); - for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) { - left_bounds.grow(spatial_bins[dim][i - 1].bounds); - leftNum += spatial_bins[dim][i - 1].enter; - rightNum -= spatial_bins[dim][i - 1].exit; - - float sah = nodeSAH + - left_bounds.area() * params.triangle_cost(leftNum) + - spatial_right_bounds[i - 1].area() * params.triangle_cost(rightNum); - - if(sah < split.sah) { - split.sah = sah; - split.dim = dim; - split.pos = origin[dim] + binSize[dim] * (float)i; + if(cost0 < cost1) { + best_cost = cost0; + bets_target = 0; + } + else { + best_cost = cost0; + bets_target = 1; } } } - return split; -} - -void BVHBuild::do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split) -{ - /* Categorize references and compute bounds. - * - * Left-hand side: [left_start, left_end[ - * Uncategorized/split: [left_end, right_start[ - * Right-hand side: [right_start, refs.size()[ */ - - vector<Reference>& refs = references; - int left_start = refs.size() - spec.num; - int left_end = left_start; - int right_start = refs.size(); - - left.bounds = right.bounds = BoundBox(); - - for(int i = left_end; i < right_start; i++) { - if(refs[i].bounds.max[split.dim] <= split.pos) { - /* entirely on the left-hand side */ - left.bounds.grow(refs[i].bounds); - swap(refs[i], refs[left_end++]); - } - else if(refs[i].bounds.min[split.dim] >= split.pos) { - /* entirely on the right-hand side */ - right.bounds.grow(refs[i].bounds); - swap(refs[i--], refs[--right_start]); - } - } - - /* duplicate or unsplit references intersecting both sides. */ - while(left_end < right_start) { - /* split reference. */ - Reference lref, rref; - - split_reference(lref, rref, refs[left_end], split.dim, split.pos); - - /* compute SAH for duplicate/unsplit candidates. */ - BoundBox lub = left.bounds; // Unsplit to left: new left-hand bounds. - BoundBox rub = right.bounds; // Unsplit to right: new right-hand bounds. - BoundBox ldb = left.bounds; // Duplicate: new left-hand bounds. - BoundBox rdb = right.bounds; // Duplicate: new right-hand bounds. - - lub.grow(refs[left_end].bounds); - rub.grow(refs[left_end].bounds); - ldb.grow(lref.bounds); - rdb.grow(rref.bounds); - - float lac = params.triangle_cost(left_end - left_start); - float rac = params.triangle_cost(refs.size() - right_start); - float lbc = params.triangle_cost(left_end - left_start + 1); - float rbc = params.triangle_cost(refs.size() - right_start + 1); - - float unsplitLeftSAH = lub.area() * lbc + right.bounds.area() * rac; - float unsplitRightSAH = left.bounds.area() * lac + rub.area() * rbc; - float duplicateSAH = ldb.area() * lbc + rdb.area() * rbc; - float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH); - - if(minSAH == unsplitLeftSAH) { - /* unsplit to left */ - left.bounds = lub; - left_end++; - } - else if(minSAH == unsplitRightSAH) { - /* unsplit to right */ - right.bounds = rub; - swap(refs[left_end], refs[--right_start]); - } - else { - /* duplicate */ - left.bounds = ldb; - right.bounds = rdb; - refs[left_end++] = lref; - refs.push_back(rref); - } - } - - left.num = left_end - left_start; - right.num = refs.size() - right_start; -} + /* if we did not find a swap that improves the SAH then do nothing */ + if(best_cost >= 0) + return; -void BVHBuild::split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos) -{ - /* initialize references. */ - left.prim_index = right.prim_index = ref.prim_index; - left.prim_object = right.prim_object = ref.prim_object; - left.bounds = right.bounds = BoundBox(); - - /* loop over vertices/edges. */ - Object *ob = objects[ref.prim_object]; - const Mesh *mesh = ob->mesh; - const int *inds = mesh->triangles[ref.prim_index].v; - const float3 *verts = &mesh->verts[0]; - const float3* v1 = &verts[inds[2]]; - - for(int i = 0; i < 3; i++) { - const float3* v0 = v1; - int vindex = inds[i]; - v1 = &verts[vindex]; - float v0p = (*v0)[dim]; - float v1p = (*v1)[dim]; - - /* insert vertex to the boxes it belongs to. */ - if(v0p <= pos) - left.bounds.grow(*v0); - - if(v0p >= pos) - right.bounds.grow(*v0); - - /* edge intersects the plane => insert intersection to both boxes. */ - if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { - float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); - left.bounds.grow(t); - right.bounds.grow(t); - } - } + /* perform the best found tree rotation */ + InnerNode *child = (InnerNode*)parent->children[best_child]; - /* intersect with original bounds. */ - left.bounds.max[dim] = pos; - right.bounds.min[dim] = pos; - left.bounds.intersect(ref.bounds); - right.bounds.intersect(ref.bounds); + swap(parent->children[best_other], child->children[bets_target]); + child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds); } CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index 1fa1951d7f2..84e14632b4b 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -21,8 +21,10 @@ #include <float.h> #include "bvh.h" +#include "bvh_binning.h" #include "util_boundbox.h" +#include "util_task.h" #include "util_vector.h" CCL_NAMESPACE_BEGIN @@ -37,28 +39,7 @@ class Progress; class BVHBuild { public: - struct Reference - { - int prim_index; - int prim_object; - BoundBox bounds; - - Reference() - { - } - }; - - struct NodeSpec - { - int num; - BoundBox bounds; - - NodeSpec() - { - num = 0; - } - }; - + /* Constructor/Destructor */ BVHBuild( const vector<Object*>& objects, vector<int>& prim_index, @@ -70,63 +51,37 @@ public: BVHNode *run(); protected: + friend class BVHMixedSplit; + friend class BVHObjectSplit; + friend class BVHSpatialSplit; + /* adding references */ - void add_reference_mesh(NodeSpec& root, Mesh *mesh, int i); - void add_reference_object(NodeSpec& root, Object *ob, int i); - void add_references(NodeSpec& root); + void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i); + void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i); + void add_references(BVHRange& root); /* building */ - BVHNode *build_node(const NodeSpec& spec, int level, float progress_start, float progress_end); - BVHNode *create_leaf_node(const NodeSpec& spec); - BVHNode *create_object_leaf_nodes(const Reference *ref, int num); - - void progress_update(float progress_start, float progress_end); - - /* object splits */ - struct ObjectSplit - { - float sah; - int dim; - int num_left; - BoundBox left_bounds; - BoundBox right_bounds; - - ObjectSplit() - : sah(FLT_MAX), dim(0), num_left(0) - { - } - }; - - ObjectSplit find_object_split(const NodeSpec& spec, float nodeSAH); - void do_object_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const ObjectSplit& split); - - /* spatial splits */ - struct SpatialSplit - { - float sah; - int dim; - float pos; - - SpatialSplit() - : sah(FLT_MAX), dim(0), pos(0.0f) - { - } - }; - - struct SpatialBin - { - BoundBox bounds; - int enter; - int exit; - }; - - SpatialSplit find_spatial_split(const NodeSpec& spec, float nodeSAH); - void do_spatial_split(NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const SpatialSplit& split); - void split_reference(Reference& left, Reference& right, const Reference& ref, int dim, float pos); + BVHNode *build_node(const BVHRange& range, int level); + BVHNode *build_node(const BVHObjectBinning& range, int level); + BVHNode *create_leaf_node(const BVHRange& range); + BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num); + + /* threads */ + enum { THREAD_TASK_SIZE = 4096 }; + void thread_build_node(Task *task_, int thread_id); + thread_mutex build_mutex; + + /* progress */ + void progress_update(); + + /* tree rotations */ + void rotate(BVHNode *node, int max_depth); + void rotate(BVHNode *node, int max_depth, int iterations); /* objects and primitive references */ vector<Object*> objects; - vector<Reference> references; + vector<BVHReference> references; + int num_original_references; /* output primitive indexes and objects */ vector<int>& prim_index; @@ -138,12 +93,17 @@ protected: /* progress reporting */ Progress& progress; double progress_start_time; - int progress_num_duplicates; + size_t progress_count; + size_t progress_total; + size_t progress_original_total; /* spatial splitting */ float spatial_min_overlap; vector<BoundBox> spatial_right_bounds; - SpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS]; + BVHSpatialBin spatial_bins[3][BVHParams::NUM_SPATIAL_BINS]; + + /* threads */ + TaskPool task_pool; }; CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp index 63683bae4a3..4edfb4b70a4 100644 --- a/intern/cycles/bvh/bvh_node.cpp +++ b/intern/cycles/bvh/bvh_node.cpp @@ -24,6 +24,8 @@ CCL_NAMESPACE_BEGIN +/* BVH Node */ + int BVHNode::getSubtreeSize(BVH_STAT stat) const { int cnt = 0; @@ -59,7 +61,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const void BVHNode::deleteSubtree() { for(int i=0;i<num_children();i++) - get_child(i)->deleteSubtree(); + if(get_child(i)) + get_child(i)->deleteSubtree(); delete this; } @@ -70,12 +73,27 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons for(int i=0;i<num_children();i++) { BVHNode *child = get_child(i); - SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.area()/m_bounds.area()); + SAH += child->computeSubtreeSAHCost(p, probability * child->m_bounds.safe_area()/m_bounds.safe_area()); } return SAH; } +uint BVHNode::update_visibility() +{ + if(!is_leaf() && m_visibility == 0) { + InnerNode *inner = (InnerNode*)this; + BVHNode *child0 = inner->children[0]; + BVHNode *child1 = inner->children[1]; + + m_visibility = child0->update_visibility()|child1->update_visibility(); + } + + return m_visibility; +} + +/* Inner Node */ + void InnerNode::print(int depth) const { for(int i = 0; i < depth; i++) diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h index 5e0a17a1193..5c00f7b7a38 100644 --- a/intern/cycles/bvh/bvh_node.h +++ b/intern/cycles/bvh/bvh_node.h @@ -49,8 +49,6 @@ public: virtual int num_triangles() const { return 0; } virtual void print(int depth = 0) const = 0; - float getArea() const { return m_bounds.area(); } - BoundBox m_bounds; uint m_visibility; @@ -58,6 +56,8 @@ public: int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const; float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const; void deleteSubtree(); + + uint update_visibility(); }; class InnerNode : public BVHNode @@ -66,9 +66,21 @@ public: InnerNode(const BoundBox& bounds, BVHNode* child0, BVHNode* child1) { m_bounds = bounds; - m_visibility = child0->m_visibility|child1->m_visibility; children[0] = child0; children[1] = child1; + + if(child0 && child1) + m_visibility = child0->m_visibility|child1->m_visibility; + else + m_visibility = 0; /* happens on build cancel */ + } + + InnerNode(const BoundBox& bounds) + { + m_bounds = bounds; + m_visibility = 0; + children[0] = NULL; + children[1] = NULL; } bool is_leaf() const { return false; } diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index 38093438500..0cf5e905fea 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -18,6 +18,8 @@ #ifndef __BVH_PARAMS_H__ #define __BVH_PARAMS_H__ +#include "util_boundbox.h" + CCL_NAMESPACE_BEGIN /* BVH Parameters */ @@ -73,14 +75,97 @@ public: } /* SAH costs */ - float cost(int num_nodes, int num_tris) const + __forceinline float cost(int num_nodes, int num_tris) const { return node_cost(num_nodes) + triangle_cost(num_tris); } - float triangle_cost(int n) const + __forceinline float triangle_cost(int n) const { return n*sah_triangle_cost; } - float node_cost(int n) const + __forceinline float node_cost(int n) const { return n*sah_node_cost; } + + __forceinline bool small_enough_for_leaf(int size, int level) + { return (size <= min_leaf_size || level >= MAX_DEPTH); } +}; + +/* BVH Reference + * + * Reference to a primitive. Primitive index and object are sneakily packed + * into BoundBox to reduce memory usage and align nicely */ + +class BVHReference +{ +public: + __forceinline BVHReference() {} + + __forceinline BVHReference(const BoundBox& bounds_, int prim_index, int prim_object) + : rbounds(bounds_) + { + rbounds.min.w = __int_as_float(prim_index); + rbounds.max.w = __int_as_float(prim_object); + } + + __forceinline const BoundBox& bounds() const { return rbounds; } + __forceinline int prim_index() const { return __float_as_int(rbounds.min.w); } + __forceinline int prim_object() const { return __float_as_int(rbounds.max.w); } + +protected: + BoundBox rbounds; +}; + +/* BVH Range + * + * Build range used during construction, to indicate the bounds and place in + * the reference array of a subset of pirmitives Again uses trickery to pack + * integers into BoundBox for alignment purposes. */ + +class BVHRange +{ +public: + __forceinline BVHRange() + { + rbounds.min.w = __int_as_float(0); + rbounds.max.w = __int_as_float(0); + } + + __forceinline BVHRange(const BoundBox& bounds_, int start_, int size_) + : rbounds(bounds_) + { + rbounds.min.w = __int_as_float(start_); + rbounds.max.w = __int_as_float(size_); + } + + __forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_) + : rbounds(bounds_), cbounds(cbounds_) + { + rbounds.min.w = __int_as_float(start_); + rbounds.max.w = __int_as_float(size_); + } + + __forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); } + + __forceinline const BoundBox& bounds() const { return rbounds; } + __forceinline const BoundBox& cent_bounds() const { return cbounds; } + __forceinline int start() const { return __float_as_int(rbounds.min.w); } + __forceinline int size() const { return __float_as_int(rbounds.max.w); } + __forceinline int end() const { return start() + size(); } + +protected: + BoundBox rbounds; + BoundBox cbounds; +}; + +/* BVH Spatial Bin */ + +struct BVHSpatialBin +{ + BoundBox bounds; + int enter; + int exit; + + __forceinline BVHSpatialBin() + { + } }; CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp index ee4531a4843..bef384be592 100644 --- a/intern/cycles/bvh/bvh_sort.cpp +++ b/intern/cycles/bvh/bvh_sort.cpp @@ -32,23 +32,23 @@ public: dim = dim_; } - bool operator()(const BVHBuild::Reference& ra, const BVHBuild::Reference& rb) + bool operator()(const BVHReference& ra, const BVHReference& rb) { - float ca = ra.bounds.min[dim] + ra.bounds.max[dim]; - float cb = rb.bounds.min[dim] + rb.bounds.max[dim]; + float ca = ra.bounds().min[dim] + ra.bounds().max[dim]; + float cb = rb.bounds().min[dim] + rb.bounds().max[dim]; if(ca < cb) return true; else if(ca > cb) return false; - else if(ra.prim_object < rb.prim_object) return true; - else if(ra.prim_object > rb.prim_object) return false; - else if(ra.prim_index < rb.prim_index) return true; - else if(ra.prim_index > rb.prim_index) return false; + else if(ra.prim_object() < rb.prim_object()) return true; + else if(ra.prim_object() > rb.prim_object()) return false; + else if(ra.prim_index() < rb.prim_index()) return true; + else if(ra.prim_index() > rb.prim_index()) return false; return false; } }; -void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim) +void bvh_reference_sort(int start, int end, BVHReference *data, int dim) { sort(data+start, data+end, BVHReferenceCompare(dim)); } diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h index f0676948146..ba35ba3fae7 100644 --- a/intern/cycles/bvh/bvh_sort.h +++ b/intern/cycles/bvh/bvh_sort.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN -void bvh_reference_sort(int start, int end, BVHBuild::Reference *data, int dim); +void bvh_reference_sort(int start, int end, BVHReference *data, int dim); CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp new file mode 100644 index 00000000000..263c5834428 --- /dev/null +++ b/intern/cycles/bvh/bvh_split.cpp @@ -0,0 +1,293 @@ +/* + * Adapted from code copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bvh_build.h" +#include "bvh_split.h" +#include "bvh_sort.h" + +#include "mesh.h" +#include "object.h" + +#include "util_algorithm.h" + +CCL_NAMESPACE_BEGIN + +/* Object Split */ + +BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH) +: sah(FLT_MAX), dim(0), num_left(0), left_bounds(BoundBox::empty), right_bounds(BoundBox::empty) +{ + const BVHReference *ref_ptr = &builder->references[range.start()]; + float min_sah = FLT_MAX; + + for(int dim = 0; dim < 3; dim++) { + /* sort references */ + bvh_reference_sort(range.start(), range.end(), &builder->references[0], dim); + + /* sweep right to left and determine bounds. */ + BoundBox right_bounds = BoundBox::empty; + + for(int i = range.size() - 1; i > 0; i--) { + right_bounds.grow(ref_ptr[i].bounds()); + builder->spatial_right_bounds[i - 1] = right_bounds; + } + + /* sweep left to right and select lowest SAH. */ + BoundBox left_bounds = BoundBox::empty; + + for(int i = 1; i < range.size(); i++) { + left_bounds.grow(ref_ptr[i - 1].bounds()); + right_bounds = builder->spatial_right_bounds[i - 1]; + + float sah = nodeSAH + + left_bounds.safe_area() * builder->params.triangle_cost(i) + + right_bounds.safe_area() * builder->params.triangle_cost(range.size() - i); + + if(sah < min_sah) { + min_sah = sah; + + this->sah = sah; + this->dim = dim; + this->num_left = i; + this->left_bounds = left_bounds; + this->right_bounds = right_bounds; + } + } + } +} + +void BVHObjectSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) +{ + /* sort references according to split */ + bvh_reference_sort(range.start(), range.end(), &builder->references[0], this->dim); + + /* split node ranges */ + left = BVHRange(this->left_bounds, range.start(), this->num_left); + right = BVHRange(this->right_bounds, left.end(), range.size() - this->num_left); + +} + +/* Spatial Split */ + +BVHSpatialSplit::BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH) +: sah(FLT_MAX), dim(0), pos(0.0f) +{ + /* initialize bins. */ + float3 origin = range.bounds().min; + float3 binSize = (range.bounds().max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS); + float3 invBinSize = 1.0f / binSize; + + for(int dim = 0; dim < 3; dim++) { + for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) { + BVHSpatialBin& bin = builder->spatial_bins[dim][i]; + + bin.bounds = BoundBox::empty; + bin.enter = 0; + bin.exit = 0; + } + } + + /* chop references into bins. */ + for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) { + const BVHReference& ref = builder->references[refIdx]; + float3 firstBinf = (ref.bounds().min - origin) * invBinSize; + float3 lastBinf = (ref.bounds().max - origin) * invBinSize; + int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z); + int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z); + + firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1); + lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1); + + for(int dim = 0; dim < 3; dim++) { + BVHReference currRef = ref; + + for(int i = firstBin[dim]; i < lastBin[dim]; i++) { + BVHReference leftRef, rightRef; + + split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1)); + builder->spatial_bins[dim][i].bounds.grow(leftRef.bounds()); + currRef = rightRef; + } + + builder->spatial_bins[dim][lastBin[dim]].bounds.grow(currRef.bounds()); + builder->spatial_bins[dim][firstBin[dim]].enter++; + builder->spatial_bins[dim][lastBin[dim]].exit++; + } + } + + /* select best split plane. */ + for(int dim = 0; dim < 3; dim++) { + /* sweep right to left and determine bounds. */ + BoundBox right_bounds = BoundBox::empty; + + for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) { + right_bounds.grow(builder->spatial_bins[dim][i].bounds); + builder->spatial_right_bounds[i - 1] = right_bounds; + } + + /* sweep left to right and select lowest SAH. */ + BoundBox left_bounds = BoundBox::empty; + int leftNum = 0; + int rightNum = range.size(); + + for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) { + left_bounds.grow(builder->spatial_bins[dim][i - 1].bounds); + leftNum += builder->spatial_bins[dim][i - 1].enter; + rightNum -= builder->spatial_bins[dim][i - 1].exit; + + float sah = nodeSAH + + left_bounds.safe_area() * builder->params.triangle_cost(leftNum) + + builder->spatial_right_bounds[i - 1].safe_area() * builder->params.triangle_cost(rightNum); + + if(sah < this->sah) { + this->sah = sah; + this->dim = dim; + this->pos = origin[dim] + binSize[dim] * (float)i; + } + } + } +} + +void BVHSpatialSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) +{ + /* Categorize references and compute bounds. + * + * Left-hand side: [left_start, left_end[ + * Uncategorized/split: [left_end, right_start[ + * Right-hand side: [right_start, refs.size()[ */ + + vector<BVHReference>& refs = builder->references; + int left_start = range.start(); + int left_end = left_start; + int right_start = range.end(); + int right_end = range.end(); + BoundBox left_bounds = BoundBox::empty; + BoundBox right_bounds = BoundBox::empty; + + for(int i = left_end; i < right_start; i++) { + if(refs[i].bounds().max[this->dim] <= this->pos) { + /* entirely on the left-hand side */ + left_bounds.grow(refs[i].bounds()); + swap(refs[i], refs[left_end++]); + } + else if(refs[i].bounds().min[this->dim] >= this->pos) { + /* entirely on the right-hand side */ + right_bounds.grow(refs[i].bounds()); + swap(refs[i--], refs[--right_start]); + } + } + + /* duplicate or unsplit references intersecting both sides. */ + while(left_end < right_start) { + /* split reference. */ + BVHReference lref, rref; + + split_reference(builder, lref, rref, refs[left_end], this->dim, this->pos); + + /* compute SAH for duplicate/unsplit candidates. */ + BoundBox lub = left_bounds; // Unsplit to left: new left-hand bounds. + BoundBox rub = right_bounds; // Unsplit to right: new right-hand bounds. + BoundBox ldb = left_bounds; // Duplicate: new left-hand bounds. + BoundBox rdb = right_bounds; // Duplicate: new right-hand bounds. + + lub.grow(refs[left_end].bounds()); + rub.grow(refs[left_end].bounds()); + ldb.grow(lref.bounds()); + rdb.grow(rref.bounds()); + + float lac = builder->params.triangle_cost(left_end - left_start); + float rac = builder->params.triangle_cost(right_end - right_start); + float lbc = builder->params.triangle_cost(left_end - left_start + 1); + float rbc = builder->params.triangle_cost(right_end - right_start + 1); + + float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac; + float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc; + float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc; + float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH); + + if(minSAH == unsplitLeftSAH) { + /* unsplit to left */ + left_bounds = lub; + left_end++; + } + else if(minSAH == unsplitRightSAH) { + /* unsplit to right */ + right_bounds = rub; + swap(refs[left_end], refs[--right_start]); + } + else { + /* duplicate */ + left_bounds = ldb; + right_bounds = rdb; + refs[left_end++] = lref; + refs.insert(refs.begin() + right_end, rref); + right_end++; + } + } + + left = BVHRange(left_bounds, left_start, left_end - left_start); + right = BVHRange(right_bounds, right_start, right_end - right_start); +} + +void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos) +{ + /* initialize boundboxes */ + BoundBox left_bounds = BoundBox::empty; + BoundBox right_bounds = BoundBox::empty; + + /* loop over vertices/edges. */ + Object *ob = builder->objects[ref.prim_object()]; + const Mesh *mesh = ob->mesh; + const int *inds = mesh->triangles[ref.prim_index()].v; + const float3 *verts = &mesh->verts[0]; + const float3* v1 = &verts[inds[2]]; + + for(int i = 0; i < 3; i++) { + const float3* v0 = v1; + int vindex = inds[i]; + v1 = &verts[vindex]; + float v0p = (*v0)[dim]; + float v1p = (*v1)[dim]; + + /* insert vertex to the boxes it belongs to. */ + if(v0p <= pos) + left_bounds.grow(*v0); + + if(v0p >= pos) + right_bounds.grow(*v0); + + /* edge intersects the plane => insert intersection to both boxes. */ + if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { + float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); + left_bounds.grow(t); + right_bounds.grow(t); + } + } + + /* intersect with original bounds. */ + left_bounds.max[dim] = pos; + right_bounds.min[dim] = pos; + left_bounds.intersect(ref.bounds()); + right_bounds.intersect(ref.bounds()); + + /* set referecnes */ + left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object()); + right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object()); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h new file mode 100644 index 00000000000..1f4befbe8e2 --- /dev/null +++ b/intern/cycles/bvh/bvh_split.h @@ -0,0 +1,110 @@ +/* + * Adapted from code copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BVH_SPLIT_H__ +#define __BVH_SPLIT_H__ + +#include "bvh_build.h" +#include "bvh_params.h" + +CCL_NAMESPACE_BEGIN + +class BVHBuild; + +/* Object Split */ + +class BVHObjectSplit +{ +public: + float sah; + int dim; + int num_left; + BoundBox left_bounds; + BoundBox right_bounds; + + BVHObjectSplit() {} + BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH); + + void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range); +}; + +/* Spatial Split */ + +class BVHSpatialSplit +{ +public: + float sah; + int dim; + float pos; + + BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f) {} + BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float nodeSAH); + + void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range); + void split_reference(BVHBuild *builder, BVHReference& left, BVHReference& right, const BVHReference& ref, int dim, float pos); +}; + +/* Mixed Object-Spatial Split */ + +class BVHMixedSplit +{ +public: + BVHObjectSplit object; + BVHSpatialSplit spatial; + + float leafSAH; + float nodeSAH; + float minSAH; + + bool no_split; + + __forceinline BVHMixedSplit(BVHBuild *builder, const BVHRange& range, int level) + { + /* find split candidates. */ + float area = range.bounds().safe_area(); + + leafSAH = area * builder->params.triangle_cost(range.size()); + nodeSAH = area * builder->params.node_cost(2); + + object = BVHObjectSplit(builder, range, nodeSAH); + + if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) { + BoundBox overlap = object.left_bounds; + overlap.intersect(object.right_bounds); + + if(overlap.safe_area() >= builder->spatial_min_overlap) + spatial = BVHSpatialSplit(builder, range, nodeSAH); + } + + /* leaf SAH is the lowest => create leaf. */ + minSAH = min(min(leafSAH, object.sah), spatial.sah); + no_split = (minSAH == leafSAH && range.size() <= builder->params.max_leaf_size); + } + + __forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) + { + if(builder->params.use_spatial_split && minSAH == spatial.sah) + spatial.split(builder, left, right, range); + if(!left.size() || !right.size()) + object.split(builder, left, right, range); + } +}; + +CCL_NAMESPACE_END + +#endif /* __BVH_SPLIT_H__ */ + diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index cceec8b8e5c..42dda1180c7 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -58,15 +58,6 @@ void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size) split(tasks, num); } -void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num) -{ - list<DeviceTask> tasks; - split(tasks, num); - - foreach(DeviceTask& task, tasks) - queue.push(task); -} - void DeviceTask::split(list<DeviceTask>& tasks, int num) { if(type == SHADER) { diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index af2567498d9..87f255e54e7 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -25,6 +25,7 @@ #include "util_list.h" #include "util_string.h" +#include "util_task.h" #include "util_thread.h" #include "util_types.h" #include "util_vector.h" @@ -66,7 +67,7 @@ public: /* Device Task */ -class DeviceTask { +class DeviceTask : public Task { public: typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; Type type; @@ -87,7 +88,6 @@ public: DeviceTask(Type type = PATH_TRACE); void split(list<DeviceTask>& tasks, int num); - void split(ThreadQueue<DeviceTask>& tasks, int num); void split_max_size(list<DeviceTask>& tasks, int max_size); }; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index da977ed8472..ec84047c44f 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -40,35 +40,21 @@ CCL_NAMESPACE_BEGIN class CPUDevice : public Device { public: - vector<thread*> threads; - ThreadQueue<DeviceTask> tasks; + TaskPool task_pool; KernelGlobals *kg; CPUDevice(int threads_num) + : task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2)) { kg = kernel_globals_create(); /* do now to avoid thread issues */ system_cpu_support_optimized(); - - if(threads_num == 0) - threads_num = system_cpu_thread_count(); - - threads.resize(threads_num); - - for(size_t i = 0; i < threads.size(); i++) - threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i)); } ~CPUDevice() { - tasks.stop(); - - foreach(thread *t, threads) { - t->join(); - delete t; - } - + task_pool.stop(); kernel_globals_free(kg); } @@ -127,25 +113,21 @@ public: #endif } - void thread_run(int t) + void thread_run(Task *task_, int thread_id) { - DeviceTask task; - - while(tasks.worker_wait_pop(task)) { - if(task.type == DeviceTask::PATH_TRACE) - thread_path_trace(task); - else if(task.type == DeviceTask::TONEMAP) - thread_tonemap(task); - else if(task.type == DeviceTask::SHADER) - thread_shader(task); - - tasks.worker_done(); - } + DeviceTask *task = (DeviceTask*)task_; + + if(task->type == DeviceTask::PATH_TRACE) + thread_path_trace(*task); + else if(task->type == DeviceTask::TONEMAP) + thread_tonemap(*task); + else if(task->type == DeviceTask::SHADER) + thread_shader(*task); } void thread_path_trace(DeviceTask& task) { - if(tasks.worker_cancel()) + if(task_pool.cancelled()) return; #ifdef WITH_OSL @@ -160,7 +142,7 @@ public: kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y, task.offset, task.stride); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -172,7 +154,7 @@ public: kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y, task.offset, task.stride); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -214,7 +196,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -224,7 +206,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(tasks.worker_cancel()) + if(task_pool.cancelled()) break; } } @@ -239,17 +221,22 @@ public: { /* split task into smaller ones, more than number of threads for uneven workloads where some parts of the image render slower than others */ - task.split(tasks, threads.size()*10); + list<DeviceTask> tasks; + + task.split(tasks, TaskScheduler::num_threads()*10); + + foreach(DeviceTask& task, tasks) + task_pool.push(new DeviceTask(task)); } void task_wait() { - tasks.wait_done(); + task_pool.wait(); } void task_cancel() { - tasks.cancel(); + task_pool.cancel(); } }; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 1f69f2c53fa..9f7d65e640b 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -257,13 +257,14 @@ public: void task_add(DeviceTask& task) { - ThreadQueue<DeviceTask> tasks; + list<DeviceTask> tasks; task.split(tasks, devices.size()); foreach(SubDevice& sub, devices) { - DeviceTask subtask; + if(!tasks.empty()) { + DeviceTask subtask = tasks.front(); + tasks.pop_front(); - if(tasks.worker_wait_pop(subtask)) { if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state]; if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba]; diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 9a52531eec0..6c3ade1c531 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -266,7 +266,7 @@ __device_inline void path_radiance_accum_background(PathRadiance *L, float3 thro #endif } -__device_inline float3 path_radiance_sum(PathRadiance *L) +__device_inline float3 path_radiance_sum(KernelGlobals *kg, PathRadiance *L) { #ifdef __PASSES__ if(L->use_light_pass) { @@ -283,9 +283,14 @@ __device_inline float3 path_radiance_sum(PathRadiance *L) L->indirect_glossy *= L->indirect; L->indirect_transmission *= L->indirect; - return L->emission + L->background + float3 L_sum = L->emission + L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission; + + if(!kernel_data.background.transparent) + L_sum += L->background; + + return L_sum; } else return L->emission; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index ff12e85375c..8ebac177277 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -223,6 +223,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R path_radiance_init(&L, kernel_data.film.use_light_pass); + float min_ray_pdf = FLT_MAX; float ray_pdf = 0.0f; PathState state; int rng_offset = PRNG_BASE_NUM; @@ -239,13 +240,17 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R /* eval background shader if nothing hit */ if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) { L_transparent += average(throughput); + +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + break; } + #ifdef __BACKGROUND__ - else { - /* sample background shader */ - float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf); - path_radiance_accum_background(&L, throughput, L_background, state.bounce); - } + /* sample background shader */ + float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf); + path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif break; @@ -259,6 +264,18 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput); + /* blurring of bsdf after bounces, for rays that have a small likelihood + of following this particular path (diffuse, rough glossy) */ + if(kernel_data.integrator.filter_glossy != FLT_MAX) { + float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf; + + if(blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; + shader_bsdf_blur(kg, &sd, blur_roughness); + } + } + + /* holdout */ #ifdef __HOLDOUT__ if((sd.flag & SD_HOLDOUT) && (state.flag & PATH_RAY_CAMERA)) { float3 holdout_weight = shader_holdout_eval(kg, &sd); @@ -378,8 +395,10 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label); /* set labels */ - if(!(label & LABEL_TRANSPARENT)) + if(!(label & LABEL_TRANSPARENT)) { ray_pdf = bsdf_pdf; + min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf); + } /* update path state */ path_state_next(kg, &state, label); @@ -394,7 +413,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R #endif } - float3 L_sum = path_radiance_sum(&L); + float3 L_sum = path_radiance_sum(kg, &L); #ifdef __CLAMP_SAMPLE__ path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 391dcd12dad..102a2bb036d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -516,6 +516,7 @@ typedef struct KernelIntegrator { /* caustics */ int no_caustics; + float filter_glossy; /* seed */ int seed; @@ -525,9 +526,6 @@ typedef struct KernelIntegrator { /* clamp */ float sample_clamp; - - /* padding */ - int pad; } KernelIntegrator; typedef struct KernelBVH { diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index f494b6d66e1..98f8734aed2 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -40,6 +40,15 @@ __device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, float *stack data = sd->P; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = transform_direction(&tfm, sd->N); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; @@ -85,6 +94,15 @@ __device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, floa data = sd->P + sd->dP.dx; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = transform_direction(&tfm, sd->N); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; @@ -133,6 +151,15 @@ __device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, floa data = sd->P + sd->dP.dy; break; } + case NODE_TEXCO_NORMAL: { + if(sd->object != ~0) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + data = normalize(transform_direction(&tfm, sd->N)); + } + else + data = sd->N; + break; + } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 68eb39bdd29..fa7c211b5f9 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -119,6 +119,7 @@ typedef enum NodeLightPath { } NodeLightPath; typedef enum NodeTexCoord { + NODE_TEXCO_NORMAL, NODE_TEXCO_OBJECT, NODE_TEXCO_CAMERA, NODE_TEXCO_WINDOW, diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 6e6d30f3879..c1f066df10c 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -41,6 +41,7 @@ Integrator::Integrator() transparent_shadows = false; no_caustics = false; + filter_glossy = 0.0f; seed = 0; layer_flag = ~0; sample_clamp = 0.0f; @@ -81,6 +82,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene) kintegrator->transparent_shadows = transparent_shadows; kintegrator->no_caustics = no_caustics; + kintegrator->filter_glossy = (filter_glossy == 0.0f)? FLT_MAX: 1.0f/filter_glossy; + kintegrator->seed = hash_int(seed); kintegrator->layer_flag = layer_flag << PATH_RAY_LAYER_SHIFT; @@ -119,6 +122,7 @@ bool Integrator::modified(const Integrator& integrator) transparent_probalistic == integrator.transparent_probalistic && transparent_shadows == integrator.transparent_shadows && no_caustics == integrator.no_caustics && + filter_glossy == integrator.filter_glossy && layer_flag == integrator.layer_flag && seed == integrator.seed && sample_clamp == integrator.sample_clamp); diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index abbbaca894c..0817fcaa457 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -41,6 +41,7 @@ public: bool transparent_shadows; bool no_caustics; + float filter_glossy; int seed; int layer_flag; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index a7eb365f983..0ce16e65621 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -43,6 +43,7 @@ Mesh::Mesh() transform_applied = false; transform_negative_scaled = false; displacement_method = DISPLACE_BUMP; + bounds = BoundBox::empty; bvh = NULL; @@ -96,7 +97,7 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_) void Mesh::compute_bounds() { - BoundBox bnds; + BoundBox bnds = BoundBox::empty; size_t verts_size = verts.size(); for(size_t i = 0; i < verts_size; i++) @@ -697,6 +698,8 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen progress.set_status(msg, "Building BVH"); mesh->compute_bvh(&scene->params, progress); + + i++; } if(progress.get_cancel()) return; @@ -704,8 +707,6 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen mesh->need_update = false; mesh->need_update_rebuild = false; } - - i++; } foreach(Shader *shader, scene->shaders) diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index db696993737..d71438ebae1 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1503,6 +1503,7 @@ TextureCoordinateNode::TextureCoordinateNode() { add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL, true); add_output("Generated", SHADER_SOCKET_POINT); + add_output("Normal", SHADER_SOCKET_NORMAL); add_output("UV", SHADER_SOCKET_POINT); add_output("Object", SHADER_SOCKET_POINT); add_output("Camera", SHADER_SOCKET_POINT); @@ -1551,6 +1552,12 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler) } } + out = output("Normal"); + if(!out->links.empty()) { + compiler.stack_assign(out); + compiler.add_node(texco_node, NODE_TEXCO_NORMAL, out->stack_offset); + } + out = output("UV"); if(!out->links.empty()) { int attr = compiler.attribute(Attribute::STD_UV); diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 5f7a5810c09..28645d856a8 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -37,6 +37,7 @@ Object::Object() tfm = transform_identity(); visibility = ~0; pass_id = 0; + bounds = BoundBox::empty; } Object::~Object() diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 676f42be790..34a0c0ff877 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -27,6 +27,7 @@ #include "util_foreach.h" #include "util_function.h" +#include "util_task.h" #include "util_time.h" CCL_NAMESPACE_BEGIN @@ -37,6 +38,8 @@ Session::Session(const SessionParams& params_) { device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background); + TaskScheduler::init(params.threads); + device = Device::create(params.device, params.background, params.threads); buffers = new RenderBuffers(device); display = new DisplayBuffer(device); @@ -88,6 +91,8 @@ Session::~Session() delete display; delete scene; delete device; + + TaskScheduler::exit(); } void Session::start() diff --git a/intern/cycles/subd/subd_patch.cpp b/intern/cycles/subd/subd_patch.cpp index ff477296c7e..f6acc358959 100644 --- a/intern/cycles/subd/subd_patch.cpp +++ b/intern/cycles/subd/subd_patch.cpp @@ -93,7 +93,7 @@ void LinearQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float BoundBox LinearQuadPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 4; i++) bbox.grow(hull[i]); @@ -115,7 +115,7 @@ void LinearTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f BoundBox LinearTrianglePatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 3; i++) bbox.grow(hull[i]); @@ -132,7 +132,7 @@ void BicubicPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, float v) BoundBox BicubicPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 16; i++) bbox.grow(hull[i]); @@ -152,7 +152,7 @@ void BicubicTangentPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, f BoundBox BicubicTangentPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 16; i++) bbox.grow(hull[i]); @@ -205,7 +205,7 @@ void GregoryQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, floa BoundBox GregoryQuadPatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 20; i++) bbox.grow(hull[i]); @@ -276,7 +276,7 @@ void GregoryTrianglePatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float u, BoundBox GregoryTrianglePatch::bound() { - BoundBox bbox; + BoundBox bbox = BoundBox::empty; for(int i = 0; i < 20; i++) bbox.grow(hull[i]); diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 9182ee4cbe1..87bd84b4e0f 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC util_path.cpp util_string.cpp util_system.cpp + util_task.cpp util_time.cpp util_transform.cpp ) @@ -50,6 +51,7 @@ set(SRC_HEADERS util_set.h util_string.h util_system.h + util_task.h util_thread.h util_time.h util_transform.h diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h index bb1df0b220f..9511b48e103 100644 --- a/intern/cycles/util/util_boundbox.h +++ b/intern/cycles/util/util_boundbox.h @@ -23,6 +23,7 @@ #include <float.h> #include "util_math.h" +#include "util_string.h" #include "util_transform.h" #include "util_types.h" @@ -35,45 +36,81 @@ class BoundBox public: float3 min, max; - BoundBox(void) + __forceinline BoundBox() { - min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX); - max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX); } - BoundBox(const float3& min_, const float3& max_) + __forceinline BoundBox(const float3& pt) + : min(pt), max(pt) + { + } + + __forceinline BoundBox(const float3& min_, const float3& max_) : min(min_), max(max_) { } - void grow(const float3& pt) + static struct empty_t {} empty; + + __forceinline BoundBox(empty_t) + : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) + { + } + + __forceinline void grow(const float3& pt) { min = ccl::min(min, pt); max = ccl::max(max, pt); } - void grow(const BoundBox& bbox) + __forceinline void grow(const BoundBox& bbox) { grow(bbox.min); grow(bbox.max); } - void intersect(const BoundBox& bbox) + __forceinline void intersect(const BoundBox& bbox) { min = ccl::max(min, bbox.min); max = ccl::min(max, bbox.max); } - float area(void) const + /* todo: avoid using this */ + __forceinline float safe_area() const { - if(!valid()) + if(!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) return 0.0f; + return area(); + } + + __forceinline float area() const + { + return half_area()*2.0f; + } + + __forceinline float half_area() const + { float3 d = max - min; - return dot(d, d)*2.0f; + return (d.x*d.z + d.y*d.z + d.x*d.y); + } + + __forceinline float3 center() const + { + return 0.5f*(min + max); } - bool valid(void) const + __forceinline float3 center2() const + { + return min + max; + } + + __forceinline float3 size() const + { + return max - min; + } + + __forceinline bool valid() const { return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && @@ -82,7 +119,7 @@ public: BoundBox transformed(const Transform *tfm) { - BoundBox result; + BoundBox result = BoundBox::empty; for(int i = 0; i < 8; i++) { float3 p; @@ -98,6 +135,31 @@ public: } }; +__forceinline BoundBox merge(const BoundBox& bbox, const float3& pt) +{ + return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); +} + +__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b) +{ + return BoundBox(min(a.min, b.min), max(a.max, b.max)); +} + +__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b, const BoundBox& c, const BoundBox& d) +{ + return merge(merge(a, b), merge(c, d)); +} + +__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b) +{ + return BoundBox(max(a.min, b.min), min(a.max, b.max)); +} + +__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b, const BoundBox& c) +{ + return intersect(a, intersect(b, c)); +} + CCL_NAMESPACE_END #endif /* __UTIL_BOUNDBOX_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 019dede07fa..33e351c74e9 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -182,93 +182,74 @@ __device_inline float average(const float2 a) __device_inline float2 operator-(const float2 a) { - float2 r = {-a.x, -a.y}; - return r; + return make_float2(-a.x, -a.y); } __device_inline float2 operator*(const float2 a, const float2 b) { - float2 r = {a.x*b.x, a.y*b.y}; - return r; + return make_float2(a.x*b.x, a.y*b.y); } __device_inline float2 operator*(const float2 a, float f) { - float2 r = {a.x*f, a.y*f}; - return r; + return make_float2(a.x*f, a.y*f); } __device_inline float2 operator*(float f, const float2 a) { - float2 r = {a.x*f, a.y*f}; - return r; + return make_float2(a.x*f, a.y*f); } __device_inline float2 operator/(float f, const float2 a) { - float2 r = {f/a.x, f/a.y}; - return r; + return make_float2(f/a.x, f/a.y); } __device_inline float2 operator/(const float2 a, float f) { float invf = 1.0f/f; - float2 r = {a.x*invf, a.y*invf}; - return r; + return make_float2(a.x*invf, a.y*invf); } __device_inline float2 operator/(const float2 a, const float2 b) { - float2 r = {a.x/b.x, a.y/b.y}; - return r; + return make_float2(a.x/b.x, a.y/b.y); } __device_inline float2 operator+(const float2 a, const float2 b) { - float2 r = {a.x+b.x, a.y+b.y}; - return r; + return make_float2(a.x+b.x, a.y+b.y); } __device_inline float2 operator-(const float2 a, const float2 b) { - float2 r = {a.x-b.x, a.y-b.y}; - return r; + return make_float2(a.x-b.x, a.y-b.y); } __device_inline float2 operator+=(float2& a, const float2 b) { - a.x += b.x; - a.y += b.y; - return a; + return a = a + b; } __device_inline float2 operator*=(float2& a, const float2 b) { - a.x *= b.x; - a.y *= b.y; - return a; + return a = a * b; } __device_inline float2 operator*=(float2& a, float f) { - a.x *= f; - a.y *= f; - return a; + return a = a * f; } __device_inline float2 operator/=(float2& a, const float2 b) { - a.x /= b.x; - a.y /= b.y; - return a; + return a = a / b; } __device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - return a; + return a = a * invf; } @@ -314,14 +295,12 @@ __device_inline bool operator!=(const float2 a, const float2 b) __device_inline float2 min(float2 a, float2 b) { - float2 r = {min(a.x, b.x), min(a.y, b.y)}; - return r; + return make_float2(min(a.x, b.x), min(a.y, b.y)); } __device_inline float2 max(float2 a, float2 b) { - float2 r = {max(a.x, b.x), max(a.y, b.y)}; - return r; + return make_float2(max(a.x, b.x), max(a.y, b.y)); } __device_inline float2 clamp(float2 a, float2 mn, float2 mx) @@ -361,112 +340,78 @@ __device_inline float2 interp(float2 a, float2 b, float t) /* Float3 Vector */ -__device_inline bool is_zero(const float3 a) -{ - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); -} - -__device_inline float average(const float3 a) -{ - return (a.x + a.y + a.z)*(1.0f/3.0f); -} - #ifndef __KERNEL_OPENCL__ __device_inline float3 operator-(const float3 a) { - float3 r = make_float3(-a.x, -a.y, -a.z); - return r; + return make_float3(-a.x, -a.y, -a.z); } __device_inline float3 operator*(const float3 a, const float3 b) { - float3 r = make_float3(a.x*b.x, a.y*b.y, a.z*b.z); - return r; + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); } __device_inline float3 operator*(const float3 a, float f) { - float3 r = make_float3(a.x*f, a.y*f, a.z*f); - return r; + return make_float3(a.x*f, a.y*f, a.z*f); } __device_inline float3 operator*(float f, const float3 a) { - float3 r = make_float3(a.x*f, a.y*f, a.z*f); - return r; + return make_float3(a.x*f, a.y*f, a.z*f); } __device_inline float3 operator/(float f, const float3 a) { - float3 r = make_float3(f/a.x, f/a.y, f/a.z); - return r; + return make_float3(f/a.x, f/a.y, f/a.z); } __device_inline float3 operator/(const float3 a, float f) { float invf = 1.0f/f; - float3 r = make_float3(a.x*invf, a.y*invf, a.z*invf); - return r; + return make_float3(a.x*invf, a.y*invf, a.z*invf); } __device_inline float3 operator/(const float3 a, const float3 b) { - float3 r = make_float3(a.x/b.x, a.y/b.y, a.z/b.z); - return r; + return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); } __device_inline float3 operator+(const float3 a, const float3 b) { - float3 r = make_float3(a.x+b.x, a.y+b.y, a.z+b.z); - return r; + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); } __device_inline float3 operator-(const float3 a, const float3 b) { - float3 r = make_float3(a.x-b.x, a.y-b.y, a.z-b.z); - return r; + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); } __device_inline float3 operator+=(float3& a, const float3 b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; - return a; + return a = a + b; } __device_inline float3 operator*=(float3& a, const float3 b) { - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - return a; + return a = a * b; } __device_inline float3 operator*=(float3& a, float f) { - a.x *= f; - a.y *= f; - a.z *= f; - return a; + return a = a * f; } __device_inline float3 operator/=(float3& a, const float3 b) { - a.x /= b.x; - a.y /= b.y; - a.z /= b.z; - return a; + return a = a / b; } __device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - a.z *= invf; - return a; + return a = a * invf; } __device_inline float dot(const float3 a, const float3 b) @@ -506,7 +451,11 @@ __device_inline float3 normalize_len(const float3 a, float *t) __device_inline bool operator==(const float3 a, const float3 b) { +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; +#else return (a.x == b.x && a.y == b.y && a.z == b.z); +#endif } __device_inline bool operator!=(const float3 a, const float3 b) @@ -516,14 +465,20 @@ __device_inline bool operator!=(const float3 a, const float3 b) __device_inline float3 min(float3 a, float3 b) { - float3 r = make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); - return r; +#ifdef __KERNEL_SSE__ + return _mm_min_ps(a.m128, b.m128); +#else + return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif } __device_inline float3 max(float3 a, float3 b) { - float3 r = make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); - return r; +#ifdef __KERNEL_SSE__ + return _mm_max_ps(a.m128, b.m128); +#else + return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif } __device_inline float3 clamp(float3 a, float3 mn, float3 mx) @@ -533,7 +488,12 @@ __device_inline float3 clamp(float3 a, float3 mn, float3 mx) __device_inline float3 fabs(float3 a) { +#ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + return _mm_and_ps(a.m128, mask); +#else return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); +#endif } #endif @@ -555,6 +515,16 @@ __device_inline void print_float3(const char *label, const float3& a) printf("%s: %.8f %.8f %.8f\n", label, a.x, a.y, a.z); } +__device_inline float3 rcp(const float3& a) +{ +#ifdef __KERNEL_SSE__ + float4 r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +#else + return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); +#endif +} + #endif __device_inline float3 interp(float3 a, float3 b, float t) @@ -562,122 +532,257 @@ __device_inline float3 interp(float3 a, float3 b, float t) return a + t*(b - a); } +__device_inline bool is_zero(const float3 a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float3(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); +#endif +} + +__device_inline float reduce_add(const float3& a) +{ +#ifdef __KERNEL_SSE__ + return (a.x + a.y + a.z); +#else + return (a.x + a.y + a.z); +#endif +} + +__device_inline float average(const float3 a) +{ + return reduce_add(a)*(1.0f/3.0f); +} + /* Float4 Vector */ -#ifndef __KERNEL_OPENCL__ +#ifdef __KERNEL_SSE__ -__device_inline bool is_zero(const float4& a) +template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> __forceinline const float4 shuffle(const float4& b) { - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); + return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0))); } -__device_inline float average(const float4& a) +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { - return (a.x + a.y + a.z + a.w)*(1.0f/4.0f); + return _mm_moveldup_ps(b); } +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) +{ + return _mm_movehdup_ps(b); +} + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) +{ + return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))); +} + +#endif + +#ifndef __KERNEL_OPENCL__ + __device_inline float4 operator-(const float4& a) { - float4 r = {-a.x, -a.y, -a.z, -a.w}; - return r; +#ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + return _mm_xor_ps(a.m128, mask); +#else + return make_float4(-a.x, -a.y, -a.z, -a.w); +#endif } __device_inline float4 operator*(const float4& a, const float4& b) { - float4 r = {a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_mul_ps(a.m128, b.m128); +#else + return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); +#endif } __device_inline float4 operator*(const float4& a, float f) { - float4 r = {a.x*f, a.y*f, a.z*f, a.w*f}; - return r; +#ifdef __KERNEL_SSE__ + return a * make_float4(f); +#else + return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); +#endif } __device_inline float4 operator*(float f, const float4& a) { - float4 r = {a.x*f, a.y*f, a.z*f, a.w*f}; - return r; + return a * f; +} + +__device_inline float4 rcp(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +#else + return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); +#endif } __device_inline float4 operator/(const float4& a, float f) { - float invf = 1.0f/f; - float4 r = {a.x*invf, a.y*invf, a.z*invf, a.w*invf}; - return r; + return a * (1.0f/f); } __device_inline float4 operator/(const float4& a, const float4& b) { - float4 r = {a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w}; - return r; +#ifdef __KERNEL_SSE__ + return a * rcp(b); +#else + return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); +#endif + } __device_inline float4 operator+(const float4& a, const float4& b) { - float4 r = {a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_add_ps(a.m128, b.m128); +#else + return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); +#endif } __device_inline float4 operator-(const float4& a, const float4& b) { - float4 r = {a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_sub_ps(a.m128, b.m128); +#else + return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); +#endif } __device_inline float4 operator+=(float4& a, const float4& b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; - a.w += b.w; - return a; + return a = a + b; } __device_inline float4 operator*=(float4& a, const float4& b) { - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - a.w *= b.w; - return a; + return a = a * b; } __device_inline float4 operator/=(float4& a, float f) { - float invf = 1.0f/f; - a.x *= invf; - a.y *= invf; - a.z *= invf; - a.w *= invf; - return a; + return a = a / f; } -__device_inline float dot(const float4& a, const float4& b) +__device_inline int4 operator<(const float4& a, const float4& b) { - return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +#endif +} + +__device_inline int4 operator>=(float4 a, float4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +#endif +} + +__device_inline int4 operator<=(const float4& a, const float4& b) +{ +#ifdef __KERNEL_SSE__ + return _mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)); /* todo: avoid cvt */ +#else + return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); +#endif +} + +__device_inline bool operator==(const float4 a, const float4 b) +{ +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; +#else + return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); +#endif } __device_inline float4 cross(const float4& a, const float4& b) { - float4 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f}; - return r; +#ifdef __KERNEL_SSE__ + return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); +#else + return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); +#endif } __device_inline float4 min(float4 a, float4 b) { +#ifdef __KERNEL_SSE__ + return _mm_min_ps(a.m128, b.m128); +#else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +#endif } __device_inline float4 max(float4 a, float4 b) { +#ifdef __KERNEL_SSE__ + return _mm_max_ps(a.m128, b.m128); +#else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +#endif } #endif #ifndef __KERNEL_GPU__ +__device_inline float4 select(const int4& mask, const float4& a, const float4& b) +{ +#ifdef __KERNEL_SSE__ + /* blendv is sse4, and apparently broken on vs2008 */ + return _mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)); /* todo: avoid cvt */ +#else + return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); +#endif +} + +__device_inline float4 reduce_min(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = min(shuffle<1,0,3,2>(a), a); + return min(shuffle<2,3,0,1>(h), h); +#else + return make_float4(min(min(a.x, a.y), min(a.z, a.w))); +#endif +} + +__device_inline float4 reduce_max(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = max(shuffle<1,0,3,2>(a), a); + return max(shuffle<2,3,0,1>(h), h); +#else + return make_float4(max(max(a.x, a.y), max(a.z, a.w))); +#endif +} + +#if 0 +__device_inline float4 reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return shuffle<2,3,0,1>(h) + h; +#else + return make_float4((a.x + a.y) + (a.z + a.w)); +#endif +} +#endif + __device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, a.x, a.y, a.z, a.w); @@ -685,26 +790,77 @@ __device_inline void print_float4(const char *label, const float4& a) #endif +#ifndef __KERNEL_OPENCL__ + +__device_inline bool is_zero(const float4& a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float4(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); +#endif +} + +__device_inline float reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ +#else + return ((a.x + a.y) + (a.z + a.w)); +#endif +} + +__device_inline float average(const float4& a) +{ + return reduce_add(a) * 0.25f; +} + +__device_inline float dot(const float4& a, const float4& b) +{ + return reduce_add(a * b); +} + +#endif + /* Int3 */ #ifndef __KERNEL_OPENCL__ +__device_inline int3 min(int3 a, int3 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_min_epi32(a.m128, b.m128); +#else + return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif +} + __device_inline int3 max(int3 a, int3 b) { - int3 r = {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)}; - return r; +#ifdef __KERNEL_SSE__ + return _mm_max_epi32(a.m128, b.m128); +#else + return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif } __device_inline int3 clamp(const int3& a, int mn, int mx) { - int3 r = {clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)}; - return r; +#ifdef __KERNEL_SSE__ + return min(max(a, make_int3(mn)), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); +#endif } __device_inline int3 clamp(const int3& a, int3& mn, int mx) { - int3 r = {clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)}; - return r; +#ifdef __KERNEL_SSE__ + return min(max(a, mn), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); +#endif } #endif @@ -720,16 +876,63 @@ __device_inline void print_int3(const char *label, const int3& a) /* Int4 */ -#ifndef __KERNEL_OPENCL__ +#ifndef __KERNEL_GPU__ -__device_inline int4 operator>=(float4 a, float4 b) +__device_inline int4 operator+(const int4& a, const int4& b) { - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +#ifdef __KERNEL_SSE__ + return _mm_add_epi32(a.m128, b.m128); +#else + return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); +#endif +} + +__device_inline int4 operator+=(int4& a, const int4& b) +{ + return a = a + b; } +__device_inline int4 operator>>(const int4& a, int i) +{ +#ifdef __KERNEL_SSE__ + return _mm_srai_epi32(a.m128, i); +#else + return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif +} -#ifndef __KERNEL_GPU__ +__device_inline int4 min(int4 a, int4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_min_epi32(a.m128, b.m128); +#else + return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +#endif +} + +__device_inline int4 max(int4 a, int4 b) +{ +#ifdef __KERNEL_SSE__ + return _mm_max_epi32(a.m128, b.m128); +#else + return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +#endif +} + +__device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) +{ + return min(max(a, mn), mx); +} + +__device_inline int4 select(const int4& mask, const int4& a, const int4& b) +{ +#ifdef __KERNEL_SSE__ + __m128 m = _mm_cvtepi32_ps(mask); + return _mm_castps_si128(_mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))); /* todo: avoid cvt */ +#else + return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); +#endif +} __device_inline void print_int4(const char *label, const int4& a) { diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp new file mode 100644 index 00000000000..6da9a70ec0c --- /dev/null +++ b/intern/cycles/util/util_task.cpp @@ -0,0 +1,223 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "util_debug.h" +#include "util_foreach.h" +#include "util_system.h" +#include "util_task.h" + +CCL_NAMESPACE_BEGIN + +/* Task Pool */ + +TaskPool::TaskPool(const TaskRunFunction& run_) +{ + num = 0; + num_done = 0; + + do_cancel = false; + + run = run_; +} + +TaskPool::~TaskPool() +{ + stop(); +} + +void TaskPool::push(Task *task, bool front) +{ + TaskScheduler::Entry entry; + + entry.task = task; + entry.pool = this; + + TaskScheduler::push(entry, front); +} + +void TaskPool::wait() +{ + thread_scoped_lock lock(done_mutex); + + while(num_done != num) + done_cond.wait(lock); +} + +void TaskPool::cancel() +{ + TaskScheduler::clear(this); + + do_cancel = true; + wait(); + do_cancel = false; +} + +void TaskPool::stop() +{ + TaskScheduler::clear(this); + + assert(num_done == num); +} + +bool TaskPool::cancelled() +{ + return do_cancel; +} + +void TaskPool::done_increase(int done) +{ + done_mutex.lock(); + num_done += done; + done_mutex.unlock(); + + assert(num_done <= num); + done_cond.notify_all(); +} + +/* Task Scheduler */ + +thread_mutex TaskScheduler::mutex; +int TaskScheduler::users = 0; +vector<thread*> TaskScheduler::threads; +volatile bool TaskScheduler::do_exit = false; + +list<TaskScheduler::Entry> TaskScheduler::queue; +thread_mutex TaskScheduler::queue_mutex; +thread_condition_variable TaskScheduler::queue_cond; + +void TaskScheduler::init(int num_threads) +{ + thread_scoped_lock lock(mutex); + + /* multiple cycles instances can use this task scheduler, sharing the same + threads, so we keep track of the number of users. */ + if(users == 0) { + do_exit = false; + + /* launch threads that will be waiting for work */ + if(num_threads == 0) + num_threads = system_cpu_thread_count(); + + threads.resize(num_threads); + + for(size_t i = 0; i < threads.size(); i++) + threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i)); + } + + users++; +} + +void TaskScheduler::exit() +{ + thread_scoped_lock lock(mutex); + + users--; + + if(users == 0) { + /* stop all waiting threads */ + do_exit = true; + TaskScheduler::queue_cond.notify_all(); + + /* delete threads */ + foreach(thread *t, threads) { + t->join(); + delete t; + } + + threads.clear(); + } +} + +bool TaskScheduler::thread_wait_pop(Entry& entry) +{ + thread_scoped_lock lock(queue_mutex); + + while(queue.empty() && !do_exit) + queue_cond.wait(lock); + + if(queue.empty()) { + assert(do_exit); + return false; + } + + entry = queue.front(); + queue.pop_front(); + + return true; +} + +void TaskScheduler::thread_run(int thread_id) +{ + Entry entry; + + /* todo: test affinity/denormal mask */ + + /* keep popping off tasks */ + while(thread_wait_pop(entry)) { + /* run task */ + entry.pool->run(entry.task, thread_id); + + /* delete task */ + delete entry.task; + + /* notify pool task was done */ + entry.pool->done_increase(1); + } +} + +void TaskScheduler::push(Entry& entry, bool front) +{ + /* add entry to queue */ + TaskScheduler::queue_mutex.lock(); + if(front) + TaskScheduler::queue.push_front(entry); + else + TaskScheduler::queue.push_back(entry); + entry.pool->num++; + TaskScheduler::queue_mutex.unlock(); + + TaskScheduler::queue_cond.notify_one(); +} + +void TaskScheduler::clear(TaskPool *pool) +{ + thread_scoped_lock lock(TaskScheduler::queue_mutex); + + /* erase all tasks from this pool from the queue */ + list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin(); + int done = 0; + + while(it != TaskScheduler::queue.end()) { + TaskScheduler::Entry& entry = *it; + + if(entry.pool == pool) { + done++; + delete entry.task; + + it = TaskScheduler::queue.erase(it); + } + else + it++; + } + + /* notify done */ + pool->done_increase(done); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h new file mode 100644 index 00000000000..acdb2cb50a2 --- /dev/null +++ b/intern/cycles/util/util_task.h @@ -0,0 +1,122 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __UTIL_TASK_H__ +#define __UTIL_TASK_H__ + +#include "util_list.h" +#include "util_thread.h" +#include "util_vector.h" + +CCL_NAMESPACE_BEGIN + +class Task; +class TaskPool; +class TaskScheduler; + +typedef boost::function<void(Task*,int)> TaskRunFunction; + +/* Task + * + * Base class for tasks to be executed in threads. */ + +class Task +{ +public: + Task() {}; + virtual ~Task() {} +}; + +/* Task Pool + * + * Pool of tasks that will be executed by the central TaskScheduler.For each + * pool, we can wait for all tasks to be done, or cancel them before they are + * done. + * + * The run callback that actually executes the task may be create like this: + * function_bind(&MyClass::task_execute, this, _1, _2) */ + +class TaskPool +{ +public: + TaskPool(const TaskRunFunction& run); + ~TaskPool(); + + void push(Task *task, bool front = false); + + void wait(); /* wait until all tasks are done */ + void cancel(); /* cancel all tasks, keep worker threads running */ + void stop(); /* stop all worker threads */ + + bool cancelled(); /* for worker threads, test if cancelled */ + +protected: + friend class TaskScheduler; + + void done_increase(int done); + + TaskRunFunction run; + + thread_mutex done_mutex; + thread_condition_variable done_cond; + + volatile int num, num_done; + volatile bool do_cancel; +}; + +/* Task Scheduler + * + * Central scheduler that holds running threads ready to execute tasks. A singe + * queue holds the task from all pools. */ + +class TaskScheduler +{ +public: + static void init(int num_threads = 0); + static void exit(); + + static int num_threads() { return threads.size(); } + +protected: + friend class TaskPool; + + struct Entry { + Task *task; + TaskPool *pool; + }; + + static thread_mutex mutex; + static int users; + static vector<thread*> threads; + static volatile bool do_exit; + + static list<Entry> queue; + static thread_mutex queue_mutex; + static thread_condition_variable queue_cond; + + static void thread_run(int thread_id); + static bool thread_wait_pop(Entry& entry); + + static void push(Entry& entry, bool front); + static void clear(TaskPool *pool); +}; + +CCL_NAMESPACE_END + +#endif + diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 6836be203f5..3d15b342fe5 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -69,133 +69,6 @@ protected: bool joined; }; -/* Thread Safe Queue to pass tasks from one thread to another. Tasks should be - * pushed into the queue, while the worker thread waits to pop the next task - * off the queue. Once all tasks are into the queue, calling stop() will stop - * the worker threads from waiting for more tasks once all tasks are done. */ - -template<typename T> class ThreadQueue -{ -public: - ThreadQueue() - { - tot = 0; - tot_done = 0; - do_stop = false; - do_cancel = false; - } - - /* Main thread functions */ - - /* push a task to be executed */ - void push(const T& value) - { - thread_scoped_lock lock(queue_mutex); - queue.push(value); - tot++; - lock.unlock(); - - queue_cond.notify_one(); - } - - /* wait until all tasks are done */ - void wait_done() - { - thread_scoped_lock lock(done_mutex); - - while(tot_done != tot) - done_cond.wait(lock); - } - - /* stop all worker threads */ - void stop() - { - clear(); - do_stop = true; - queue_cond.notify_all(); - } - - /* cancel all tasks, but keep worker threads running */ - void cancel() - { - clear(); - do_cancel = true; - wait_done(); - do_cancel = false; - } - - /* Worker thread functions - * - * while(queue.worker_wait_pop(task)) { - * for(..) { - * ... do work ... - * - * if(queue.worker_cancel()) - * break; - * } - * - * queue.worker_done(); - * } - */ - - bool worker_wait_pop(T& value) - { - thread_scoped_lock lock(queue_mutex); - - while(queue.empty() && !do_stop) - queue_cond.wait(lock); - - if(queue.empty()) - return false; - - value = queue.front(); - queue.pop(); - - return true; - } - - void worker_done() - { - thread_scoped_lock lock(done_mutex); - tot_done++; - lock.unlock(); - - assert(tot_done <= tot); - - done_cond.notify_all(); - } - - bool worker_cancel() - { - return do_cancel; - } - -protected: - void clear() - { - thread_scoped_lock lock(queue_mutex); - - while(!queue.empty()) { - thread_scoped_lock done_lock(done_mutex); - tot_done++; - done_lock.unlock(); - - queue.pop(); - } - - done_cond.notify_all(); - } - - std::queue<T> queue; - thread_mutex queue_mutex; - thread_mutex done_mutex; - thread_condition_variable queue_cond; - thread_condition_variable done_cond; - volatile bool do_stop; - volatile bool do_cancel; - volatile int tot, tot_done; -}; - /* Thread Local Storage * * Boost implementation is a bit slow, and Mac OS X __thread is not supported diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 61bc36ae888..0fd26825911 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -129,23 +129,26 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) Transform transform_inverse(const Transform& tfm) { - union { Transform T; float M[4][4]; } R, M; - - R.T = transform_identity(); - M.T = tfm; + Transform tfmR = transform_identity(); + float M[4][4], R[4][4]; - if(!transform_matrix4_gj_inverse(R.M, M.M)) { + memcpy(R, &tfmR, sizeof(R)); + memcpy(M, &tfm, sizeof(M)); + + if(!transform_matrix4_gj_inverse(R, M)) { /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should never be in this situation, but try to invert it anyway with tweak */ - M.M[0][0] += 1e-8f; - M.M[1][1] += 1e-8f; - M.M[2][2] += 1e-8f; + M[0][0] += 1e-8f; + M[1][1] += 1e-8f; + M[2][2] += 1e-8f; - if(!transform_matrix4_gj_inverse(R.M, M.M)) + if(!transform_matrix4_gj_inverse(R, M)) return transform_identity(); } - return R.T; + memcpy(&tfmR, R, sizeof(R)); + + return tfmR; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index efdda98571a..cf167707e47 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -36,23 +36,37 @@ #define __shared #define __constant -#ifdef __GNUC__ -#define __device_inline static inline __attribute__((always_inline)) -#else +#ifdef _WIN32 #define __device_inline static __forceinline +#define __align(...) __declspec(align(__VA_ARGS__)) +#else +#define __device_inline static inline __attribute__((always_inline)) +#define __forceinline inline __attribute__((always_inline)) +#define __align(...) __attribute__((aligned(__VA_ARGS__))) #endif #endif +/* Bitness */ + +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) +#define __KERNEL_64_BIT__ +#endif + /* SIMD Types */ -/* not needed yet, will be for qbvh -#ifndef __KERNEL_GPU__ +/* not enabled, globally applying it just gives slowdown, + * but useful for testing. */ +//#define __KERNEL_SSE__ +#ifdef __KERNEL_SSE__ -#include <emmintrin.h> -#include <xmmintrin.h> +#include <xmmintrin.h> /* SSE 1 */ +#include <emmintrin.h> /* SSE 2 */ +#include <pmmintrin.h> /* SSE 3 */ +#include <tmmintrin.h> /* SSE 3 */ +#include <smmintrin.h> /* SSE 4 */ -#endif*/ +#endif #ifndef _WIN32 #ifndef __KERNEL_GPU__ @@ -97,6 +111,12 @@ typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; +#ifdef __KERNEL_64_BIT__ +typedef int64_t ssize_t; +#else +typedef int32_t ssize_t; +#endif + #endif /* Generic Memory Pointer */ @@ -108,89 +128,137 @@ typedef uint64_t device_ptr; struct uchar2 { uchar x, y; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar3 { uchar x, y, z; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar4 { uchar x, y, z, w; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct int2 { int x, y; - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int3 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int3() {} + __forceinline int3(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int3 { - int x, y, z; + int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int4 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int4() {} + __forceinline int4(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int4 { int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; struct uint2 { uint x, y; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint3 { uint x, y, z; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint4 { uint x, y, z, w; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct float2 { float x, y; - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float3 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float3() {} + __forceinline float3(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float3 { - float x, y, z; - -#ifdef WITH_OPENCL - float w; + float x, y, z, w; #endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float4 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float4() {} + __forceinline float4(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float4 { float x, y, z, w; +#endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; #endif @@ -201,87 +269,179 @@ struct float4 { * * OpenCL does not support C++ class, so we use these instead. */ -__device uchar2 make_uchar2(uchar x, uchar y) +__device_inline uchar2 make_uchar2(uchar x, uchar y) { uchar2 a = {x, y}; return a; } -__device uchar3 make_uchar3(uchar x, uchar y, uchar z) +__device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) { uchar3 a = {x, y, z}; return a; } -__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) +__device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) { uchar4 a = {x, y, z, w}; return a; } -__device int2 make_int2(int x, int y) +__device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } -__device int3 make_int3(int x, int y, int z) +__device_inline int3 make_int3(int x, int y, int z) { - int3 a = {x, y, z}; +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set_epi32(0, z, y, x); +#else + int3 a = {x, y, z, 0}; +#endif + return a; } -__device int4 make_int4(int x, int y, int z, int w) +__device_inline int4 make_int4(int x, int y, int z, int w) { +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set_epi32(w, z, y, x); +#else int4 a = {x, y, z, w}; +#endif + return a; } -__device uint2 make_uint2(uint x, uint y) +__device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } -__device uint3 make_uint3(uint x, uint y, uint z) +__device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } -__device uint4 make_uint4(uint x, uint y, uint z, uint w) +__device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } -__device float2 make_float2(float x, float y) +__device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } -__device float3 make_float3(float x, float y, float z) +__device_inline float3 make_float3(float x, float y, float z) { -#ifdef WITH_OPENCL - float3 a = {x, y, z, 0.0f}; +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set_ps(0.0f, z, y, x); #else - float3 a = {x, y, z}; + float3 a = {x, y, z, 0.0f}; #endif + return a; } -__device float4 make_float4(float x, float y, float z, float w) +__device_inline float4 make_float4(float x, float y, float z, float w) { +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set_ps(w, z, y, x); +#else float4 a = {x, y, z, w}; +#endif + return a; } -__device int align_up(int offset, int alignment) +__device_inline int align_up(int offset, int alignment) { return (offset + alignment - 1) & ~(alignment - 1); } +__device_inline int3 make_int3(int i) +{ +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set1_epi32(i); +#else + int3 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline int4 make_int4(int i) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set1_epi32(i); +#else + int4 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline float3 make_float3(float f) +{ +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set1_ps(f); +#else + float3 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(float f) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set1_ps(f); +#else + float4 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(const int4& i) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_cvtepi32_ps(i.m128); +#else + float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; +#endif + + return a; +} + +__device_inline int4 make_int4(const float3& f) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_cvtps_epi32(f.m128); +#else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +#endif + + return a; +} + #endif CCL_NAMESPACE_END diff --git a/intern/ghost/GHOST_Rect.h b/intern/ghost/GHOST_Rect.h index bcbcaded364..30d9d16b701 100644 --- a/intern/ghost/GHOST_Rect.h +++ b/intern/ghost/GHOST_Rect.h @@ -241,8 +241,10 @@ inline void GHOST_Rect::wrapPoint(GHOST_TInt32 &x, GHOST_TInt32 &y, GHOST_TInt32 GHOST_TInt32 h= getHeight(); /* highly unlikely but avoid eternal loop */ - if(w-ofs*2 <= 0 || h-ofs*2 <= 0) + if (w-ofs*2 <= 0 || h-ofs*2 <= 0) { return; + } + while(x-ofs < m_l) x+= w-(ofs*2); while(y-ofs < m_t) y+= h-(ofs*2); while(x+ofs > m_r) x-= w-(ofs*2); diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c index 2a6a0df0ff4..bb3a1c66ddc 100644 --- a/intern/guardedalloc/intern/mallocn.c +++ b/intern/guardedalloc/intern/mallocn.c @@ -243,7 +243,7 @@ void *MEM_dupallocN(void *vmemh) MemHead *memh= vmemh; memh--; - if(memh->mmap) + if (memh->mmap) newp= MEM_mapallocN(memh->len, "dupli_mapalloc"); else newp= MEM_mallocN(memh->len, "dupli_alloc"); @@ -265,8 +265,8 @@ void *MEM_reallocN(void *vmemh, size_t len) memh--; newp= MEM_mallocN(len, memh->name); - if(newp) { - if(len < memh->len) + if (newp) { + if (len < memh->len) memcpy(newp, vmemh, len); else memcpy(newp, vmemh, memh->len); @@ -311,14 +311,14 @@ void *MEM_mallocN(size_t len, const char *str) memh= (MemHead *)malloc(len+sizeof(MemHead)+sizeof(MemTail)); - if(memh) { + if (memh) { make_memhead_header(memh, len, str); mem_unlock_thread(); - if(malloc_debug_memset && len) + if (malloc_debug_memset && len) memset(memh+1, 255, len); #ifdef DEBUG_MEMCOUNTER - if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) + if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) memcount_raise(__func__); memh->_count= _mallocn_count++; #endif @@ -339,11 +339,11 @@ void *MEM_callocN(size_t len, const char *str) memh= (MemHead *)calloc(len+sizeof(MemHead)+sizeof(MemTail),1); - if(memh) { + if (memh) { make_memhead_header(memh, len, str); mem_unlock_thread(); #ifdef DEBUG_MEMCOUNTER - if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) + if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) memcount_raise(__func__); memh->_count= _mallocn_count++; #endif @@ -366,14 +366,14 @@ void *MEM_mapallocN(size_t len, const char *str) memh= mmap(NULL, len+sizeof(MemHead)+sizeof(MemTail), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0); - if(memh!=(MemHead *)-1) { + if (memh!=(MemHead *)-1) { make_memhead_header(memh, len, str); memh->mmap= 1; mmap_in_use += len; peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem; mem_unlock_thread(); #ifdef DEBUG_MEMCOUNTER - if(_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) + if (_mallocn_count==DEBUG_MEMCOUNTER_ERROR_VAL) memcount_raise(__func__); memh->_count= _mallocn_count++; #endif @@ -406,9 +406,9 @@ static int compare_len(const void *p1, const void *p2) const MemPrintBlock *pb1= (const MemPrintBlock*)p1; const MemPrintBlock *pb2= (const MemPrintBlock*)p2; - if(pb1->len < pb2->len) + if (pb1->len < pb2->len) return 1; - else if(pb1->len == pb2->len) + else if (pb1->len == pb2->len) return 0; else return -1; @@ -431,7 +431,7 @@ void MEM_printmemlist_stats(void) membl = membase->first; if (membl) membl = MEMNEXT(membl); - while(membl) { + while (membl) { pb->name= membl->name; pb->len= membl->len; pb->items= 1; @@ -439,18 +439,18 @@ void MEM_printmemlist_stats(void) totpb++; pb++; - if(membl->next) + if (membl->next) membl= MEMNEXT(membl->next); else break; } /* sort by name and add together blocks with the same name */ qsort(printblock, totpb, sizeof(MemPrintBlock), compare_name); - for(a=0, b=0; a<totpb; a++) { - if(a == b) { + for (a = 0, b=0; a<totpb; a++) { + if (a == b) { continue; } - else if(strcmp(printblock[a].name, printblock[b].name) == 0) { + else if (strcmp(printblock[a].name, printblock[b].name) == 0) { printblock[b].len += printblock[a].len; printblock[b].items++; } @@ -465,7 +465,7 @@ void MEM_printmemlist_stats(void) qsort(printblock, totpb, sizeof(MemPrintBlock), compare_len); printf("\ntotal memory len: %.3f MB\n", (double)mem_in_use/(double)(1024*1024)); printf(" ITEMS TOTAL-MiB AVERAGE-KiB TYPE\n"); - for(a=0, pb=printblock; a<totpb; a++, pb++) + for (a = 0, pb=printblock; a<totpb; a++, pb++) printf("%6d (%8.3f %8.3f) %s\n", pb->items, (double)pb->len/(double)(1024*1024), (double)pb->len/1024.0/(double)pb->items, pb->name); free(printblock); @@ -491,7 +491,7 @@ static void MEM_printmemlist_internal( int pydict ) print_error("# membase_debug.py\n"); print_error("membase = [\\\n"); } - while(membl) { + while (membl) { if (pydict) { fprintf(stderr, "{'len':" SIZET_FORMAT ", 'name':'''%s''', 'pointer':'%p'},\\\n", SIZET_ARG(membl->len), membl->name, (void *)(membl+1)); } else { @@ -501,7 +501,7 @@ static void MEM_printmemlist_internal( int pydict ) print_error("%s len: " SIZET_FORMAT " %p\n", membl->name, SIZET_ARG(membl->len), membl+1); #endif } - if(membl->next) + if (membl->next) membl= MEMNEXT(membl->next); else break; } @@ -536,9 +536,9 @@ void MEM_callbackmemlist(void (*func)(void*)) { membl = membase->first; if (membl) membl = MEMNEXT(membl); - while(membl) { + while (membl) { func(membl+1); - if(membl->next) + if (membl->next) membl= MEMNEXT(membl->next); else break; } @@ -554,13 +554,13 @@ short MEM_testN(void *vmemh) { membl = membase->first; if (membl) membl = MEMNEXT(membl); - while(membl) { + while (membl) { if (vmemh == membl+1) { mem_unlock_thread(); return 1; } - if(membl->next) + if (membl->next) membl= MEMNEXT(membl->next); else break; } @@ -585,13 +585,13 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */ MemHead *memh= vmemh; const char *name; - if (memh == NULL){ + if (memh == NULL) { MemorY_ErroR("free","attempt to free NULL pointer"); /* print_error(err_stream, "%d\n", (memh+4000)->tag1); */ return(-1); } - if(sizeof(intptr_t)==8) { + if (sizeof(intptr_t)==8) { if (((intptr_t) memh) & 0x7) { MemorY_ErroR("free","attempt to free illegal pointer"); return(-1); @@ -605,7 +605,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */ } memh--; - if(memh->tag1 == MEMFREE && memh->tag2 == MEMFREE) { + if (memh->tag1 == MEMFREE && memh->tag2 == MEMFREE) { MemorY_ErroR(memh->name,"double free"); return(-1); } @@ -613,7 +613,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */ mem_lock_thread(); if ((memh->tag1 == MEMTAG1) && (memh->tag2 == MEMTAG2) && ((memh->len & 0x3) == 0)) { memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + memh->len); - if (memt->tag3 == MEMTAG3){ + if (memt->tag3 == MEMTAG3) { memh->tag1 = MEMFREE; memh->tag2 = MEMFREE; @@ -628,7 +628,7 @@ short MEM_freeN(void *vmemh) /* anders compileertie niet meer */ error = 2; MemorY_ErroR(memh->name,"end corrupt"); name = check_memlist(memh); - if (name != NULL){ + if (name != NULL) { if (name != memh->name) MemorY_ErroR(name,"is also corrupt"); } } else{ @@ -694,13 +694,13 @@ static void rem_memblock(MemHead *memh) totblock--; mem_in_use -= memh->len; - if(memh->mmap) { + if (memh->mmap) { mmap_in_use -= memh->len; if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail))) printf("Couldn't unmap memory %s\n", memh->name); } else { - if(malloc_debug_memset && memh->len) + if (malloc_debug_memset && memh->len) memset(memh+1, 255, memh->len); free(memh); } @@ -723,7 +723,7 @@ static const char *check_memlist(MemHead *memh) forw = membase->first; if (forw) forw = MEMNEXT(forw); forwok = NULL; - while(forw){ + while (forw) { if (forw->tag1 != MEMTAG1 || forw->tag2 != MEMTAG2) break; forwok = forw; if (forw->next) forw = MEMNEXT(forw->next); @@ -733,7 +733,7 @@ static const char *check_memlist(MemHead *memh) back = (MemHead *) membase->last; if (back) back = MEMNEXT(back); backok = NULL; - while(back){ + while (back) { if (back->tag1 != MEMTAG1 || back->tag2 != MEMTAG2) break; backok = back; if (back->prev) back = MEMNEXT(back->prev); @@ -742,13 +742,13 @@ static const char *check_memlist(MemHead *memh) if (forw != back) return ("MORE THAN 1 MEMORYBLOCK CORRUPT"); - if (forw == NULL && back == NULL){ + if (forw == NULL && back == NULL) { /* geen foute headers gevonden dan maar op zoek naar memblock*/ forw = membase->first; if (forw) forw = MEMNEXT(forw); forwok = NULL; - while(forw){ + while (forw) { if (forw == memh) break; if (forw->tag1 != MEMTAG1 || forw->tag2 != MEMTAG2) break; forwok = forw; @@ -760,7 +760,7 @@ static const char *check_memlist(MemHead *memh) back = (MemHead *) membase->last; if (back) back = MEMNEXT(back); backok = NULL; - while(back){ + while (back) { if (back == memh) break; if (back->tag1 != MEMTAG1 || back->tag2 != MEMTAG2) break; backok = back; @@ -772,10 +772,10 @@ static const char *check_memlist(MemHead *memh) if (forwok) name = forwok->nextname; else name = "No name found"; - if (forw == memh){ + if (forw == memh) { /* voor alle zekerheid wordt dit block maar uit de lijst gehaald */ - if (forwok){ - if (backok){ + if (forwok) { + if (backok) { forwok->next = (MemHead *)&backok->next; backok->prev = (MemHead *)&forwok->next; forwok->nextname = backok->name; @@ -785,7 +785,7 @@ static const char *check_memlist(MemHead *memh) /* membase->last = (struct Link *) &forwok->next; */ } } else{ - if (backok){ + if (backok) { backok->prev = NULL; membase->first = &backok->next; } else{ diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c index 2036e601bcb..24c77c439a7 100644 --- a/intern/mikktspace/mikktspace.c +++ b/intern/mikktspace/mikktspace.c @@ -193,7 +193,7 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1) // this if is important. Due to floating point precision // averaging when ts0==ts1 will cause a slight difference // which results in tangent space splits later on - if(pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT && + if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT && veq(pTS0->vOs,pTS1->vOs) && veq(pTS0->vOt, pTS1->vOt)) { ts_res.fMagS = pTS0->fMagS; @@ -207,8 +207,8 @@ static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1) ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT); ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs); ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt); - if( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs); - if( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt); + if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs); + if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt); } return ts_res; @@ -246,7 +246,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f); // verify all call-backs have been set - if( pContext->m_pInterface->m_getNumFaces==NULL || + if ( pContext->m_pInterface->m_getNumFaces==NULL || pContext->m_pInterface->m_getNumVerticesOfFace==NULL || pContext->m_pInterface->m_getPosition==NULL || pContext->m_pInterface->m_getNormal==NULL || @@ -254,21 +254,21 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre return TFALSE; // count triangles on supported faces - for(f=0; f<iNrFaces; f++) + for (f=0; f<iNrFaces; f++) { const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); - if(verts==3) ++iNrTrianglesIn; + if (verts==3) ++iNrTrianglesIn; else if(verts==4) iNrTrianglesIn += 2; } - if(iNrTrianglesIn<=0) return TFALSE; + if (iNrTrianglesIn<=0) return TFALSE; // allocate memory for an index list piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn); pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn); - if(piTriListIn==NULL || pTriInfos==NULL) + if (piTriListIn==NULL || pTriInfos==NULL) { - if(piTriListIn!=NULL) free(piTriListIn); - if(pTriInfos!=NULL) free(pTriInfos); + if (piTriListIn!=NULL) free(piTriListIn); + if (pTriInfos!=NULL) free(pTriInfos); return TFALSE; } @@ -283,7 +283,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre // Mark all degenerate triangles iTotTris = iNrTrianglesIn; iDegenTriangles = 0; - for(t=0; t<iTotTris; t++) + for (t=0; t<iTotTris; t++) { const int i0 = piTriListIn[t*3+0]; const int i1 = piTriListIn[t*3+1]; @@ -291,7 +291,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre const SVec3 p0 = GetPosition(pContext, i0); const SVec3 p1 = GetPosition(pContext, i1); const SVec3 p2 = GetPosition(pContext, i2); - if(veq(p0,p1) || veq(p0,p2) || veq(p1,p2)) // degenerate + if (veq(p0,p1) || veq(p0,p2) || veq(p1,p2)) // degenerate { pTriInfos[t].iFlag |= MARK_DEGENERATE; ++iDegenTriangles; @@ -317,10 +317,10 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre iNrMaxGroups = iNrTrianglesIn*3; pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups); piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3); - if(pGroups==NULL || piGroupTrianglesBuffer==NULL) + if (pGroups==NULL || piGroupTrianglesBuffer==NULL) { - if(pGroups!=NULL) free(pGroups); - if(piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer); + if (pGroups!=NULL) free(pGroups); + if (piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer); free(piTriListIn); free(pTriInfos); return TFALSE; @@ -333,7 +333,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre // psTspace = (STSpace *) malloc(sizeof(STSpace)*iNrTSPaces); - if(psTspace==NULL) + if (psTspace==NULL) { free(piTriListIn); free(pTriInfos); @@ -342,7 +342,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre return TFALSE; } memset(psTspace, 0, sizeof(STSpace)*iNrTSPaces); - for(t=0; t<iNrTSPaces; t++) + for (t=0; t<iNrTSPaces; t++) { psTspace[t].vOs.x=1.0f; psTspace[t].vOs.y=0.0f; psTspace[t].vOs.z=0.0f; psTspace[t].fMagS = 1.0f; psTspace[t].vOt.x=0.0f; psTspace[t].vOt.y=1.0f; psTspace[t].vOt.z=0.0f; psTspace[t].fMagT = 1.0f; @@ -359,7 +359,7 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre free(pGroups); free(piGroupTrianglesBuffer); - if(!bRes) // if an allocation in GenerateTSpaces() failed + if (!bRes) // if an allocation in GenerateTSpaces() failed { // clean up and return false free(pTriInfos); free(piTriListIn); free(psTspace); @@ -376,10 +376,10 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre free(pTriInfos); free(piTriListIn); index = 0; - for(f=0; f<iNrFaces; f++) + for (f=0; f<iNrFaces; f++) { const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); - if(verts!=3 && verts!=4) continue; + if (verts!=3 && verts!=4) continue; // I've decided to let degenerate triangles and group-with-anythings @@ -390,28 +390,28 @@ tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThre // (this is already the case for good triangles but not for // degenerate ones and those with bGroupWithAnything==true) bool bOrient = psTspace[index].bOrient; - if(psTspace[index].iCounter == 0) // tspace was not derived from a group + if (psTspace[index].iCounter == 0) // tspace was not derived from a group { // look for a space created in GenerateTSpaces() by iCounter>0 bool bNotFound = true; int i=1; - while(i<verts && bNotFound) + while (i<verts && bNotFound) { - if(psTspace[index+i].iCounter > 0) bNotFound=false; + if (psTspace[index+i].iCounter > 0) bNotFound=false; else ++i; } - if(!bNotFound) bOrient = psTspace[index+i].bOrient; + if (!bNotFound) bOrient = psTspace[index+i].bOrient; }*/ // set data - for(i=0; i<verts; i++) + for (i=0; i<verts; i++) { const STSpace * pTSpace = &psTspace[index]; float tang[] = {pTSpace->vOs.x, pTSpace->vOs.y, pTSpace->vOs.z}; float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z}; - if(pContext->m_pInterface->m_setTSpace!=NULL) + if (pContext->m_pInterface->m_setTSpace!=NULL) pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i); - if(pContext->m_pInterface->m_setTSpaceBasic!=NULL) + if (pContext->m_pInterface->m_setTSpaceBasic!=NULL) pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i); ++index; @@ -464,23 +464,23 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM int iMaxCount=0; SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; float fMin, fMax; - for(i=1; i<(iNrTrianglesIn*3); i++) + for (i=1; i<(iNrTrianglesIn*3); i++) { const int index = piTriList_in_and_out[i]; const SVec3 vP = GetPosition(pContext, index); - if(vMin.x > vP.x) vMin.x = vP.x; + if (vMin.x > vP.x) vMin.x = vP.x; else if(vMax.x < vP.x) vMax.x = vP.x; - if(vMin.y > vP.y) vMin.y = vP.y; + if (vMin.y > vP.y) vMin.y = vP.y; else if(vMax.y < vP.y) vMax.y = vP.y; - if(vMin.z > vP.z) vMin.z = vP.z; + if (vMin.z > vP.z) vMin.z = vP.z; else if(vMax.z < vP.z) vMax.z = vP.z; } vDim = vsub(vMax,vMin); iChannel = 0; fMin = vMin.x; fMax=vMax.x; - if(vDim.y>vDim.x && vDim.y>vDim.z) + if (vDim.y>vDim.x && vDim.y>vDim.z) { iChannel=1; fMin = vMin.y, fMax=vMax.y; @@ -497,12 +497,12 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); - if(piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) + if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) { - if(piHashTable!=NULL) free(piHashTable); - if(piHashCount!=NULL) free(piHashCount); - if(piHashOffsets!=NULL) free(piHashOffsets); - if(piHashCount2!=NULL) free(piHashCount2); + if (piHashTable!=NULL) free(piHashTable); + if (piHashCount!=NULL) free(piHashCount); + if (piHashOffsets!=NULL) free(piHashOffsets); + if (piHashCount2!=NULL) free(piHashCount2); GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); return; } @@ -510,7 +510,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM memset(piHashCount2, 0, sizeof(int)*g_iCells); // count amount of elements in each cell unit - for(i=0; i<(iNrTrianglesIn*3); i++) + for (i=0; i<(iNrTrianglesIn*3); i++) { const int index = piTriList_in_and_out[i]; const SVec3 vP = GetPosition(pContext, index); @@ -521,11 +521,11 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM // evaluate start index of each cell. piHashOffsets[0]=0; - for(k=1; k<g_iCells; k++) + for (k=1; k<g_iCells; k++) piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1]; // insert vertices - for(i=0; i<(iNrTrianglesIn*3); i++) + for (i=0; i<(iNrTrianglesIn*3); i++) { const int index = piTriList_in_and_out[i]; const SVec3 vP = GetPosition(pContext, index); @@ -538,29 +538,29 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM pTable[piHashCount2[iCell]] = i; // vertex i has been inserted. ++piHashCount2[iCell]; } - for(k=0; k<g_iCells; k++) + for (k=0; k<g_iCells; k++) assert(piHashCount2[k] == piHashCount[k]); // verify the count free(piHashCount2); // find maximum amount of entries in any hash entry iMaxCount = piHashCount[0]; - for(k=1; k<g_iCells; k++) - if(iMaxCount<piHashCount[k]) + for (k=1; k<g_iCells; k++) + if (iMaxCount<piHashCount[k]) iMaxCount=piHashCount[k]; pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount); // complete the merge - for(k=0; k<g_iCells; k++) + for (k=0; k<g_iCells; k++) { // extract table of cell k and amount of entries in it int * pTable = &piHashTable[piHashOffsets[k]]; const int iEntries = piHashCount[k]; - if(iEntries < 2) continue; + if (iEntries < 2) continue; - if(pTmpVert!=NULL) + if (pTmpVert!=NULL) { - for(e=0; e<iEntries; e++) + for (e=0; e<iEntries; e++) { int i = pTable[e]; const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]); @@ -573,7 +573,7 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SM MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries); } - if(pTmpVert!=NULL) { free(pTmpVert); } + if (pTmpVert!=NULL) { free(pTmpVert); } free(piHashTable); free(piHashCount); free(piHashOffsets); @@ -585,11 +585,11 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons int c=0, l=0, channel=0; float fvMin[3], fvMax[3]; float dx=0, dy=0, dz=0, fSep=0; - for(c=0; c<3; c++) + for (c=0; c<3; c++) { fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; } - for(l=(iL_in+1); l<=iR_in; l++) - for(c=0; c<3; c++) - if(fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; + for (l=(iL_in+1); l<=iR_in; l++) + for (c=0; c<3; c++) + if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; else if(fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c]; dx = fvMax[0]-fvMin[0]; @@ -597,17 +597,17 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons dz = fvMax[2]-fvMin[2]; channel = 0; - if(dy>dx && dy>dz) channel=1; + if (dy>dx && dy>dz) channel=1; else if(dz>dx) channel=2; fSep = 0.5f*(fvMax[channel]+fvMin[channel]); // terminate recursion when the separation/average value // is no longer strictly between fMin and fMax values. - if(fSep>=fvMax[channel] || fSep<=fvMin[channel]) + if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) { // complete the weld - for(l=iL_in; l<=iR_in; l++) + for (l=iL_in; l<=iR_in; l++) { int i = pTmpVert[l].index; const int index = piTriList_in_and_out[i]; @@ -617,7 +617,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons tbool bNotFound = TTRUE; int l2=iL_in, i2rec=-1; - while(l2<l && bNotFound) + while (l2<l && bNotFound) { const int i2 = pTmpVert[l2].index; const int index2 = piTriList_in_and_out[i2]; @@ -627,7 +627,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons i2rec=i2; //if(vP==vP2 && vN==vN2 && vT==vT2) - if(vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z && + if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z && vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z && vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z) bNotFound = TFALSE; @@ -636,7 +636,7 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons } // merge if previously found - if(!bNotFound) + if (!bNotFound) piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; } } @@ -646,24 +646,24 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons assert((iR_in-iL_in)>0); // at least 2 entries // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] - while(iL < iR) + while (iL < iR) { tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; - while((!bReadyLeftSwap) && iL<iR) + while ((!bReadyLeftSwap) && iL<iR) { assert(iL>=iL_in && iL<=iR_in); bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep); - if(!bReadyLeftSwap) ++iL; + if (!bReadyLeftSwap) ++iL; } - while((!bReadyRightSwap) && iL<iR) + while ((!bReadyRightSwap) && iL<iR) { assert(iR>=iL_in && iR<=iR_in); bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if(!bReadyRightSwap) --iR; + if (!bReadyRightSwap) --iR; } assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) ); - if(bReadyLeftSwap && bReadyRightSwap) + if (bReadyLeftSwap && bReadyRightSwap) { const STmpVert sTmp = pTmpVert[iL]; assert(iL<iR); @@ -674,17 +674,17 @@ static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], cons } assert(iL==(iR+1) || (iL==iR)); - if(iL==iR) + if (iL==iR) { const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if(bReadyRightSwap) ++iL; + if (bReadyRightSwap) ++iL; else --iR; } // only need to weld when there is more than 1 instance of the (x,y,z) - if(iL_in < iR) + if (iL_in < iR) MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR); // weld all left of fSep - if(iL < iR_in) + if (iL < iR_in) MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in); // weld all right of (or equal to) fSep } } @@ -693,7 +693,7 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext { // this can be optimized further using a tree structure or more hashing. int e=0; - for(e=0; e<iEntries; e++) + for (e=0; e<iEntries; e++) { int i = pTable[e]; const int index = piTriList_in_and_out[i]; @@ -703,7 +703,7 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext tbool bNotFound = TTRUE; int e2=0, i2rec=-1; - while(e2<e && bNotFound) + while (e2<e && bNotFound) { const int i2 = pTable[e2]; const int index2 = piTriList_in_and_out[i2]; @@ -712,14 +712,14 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext const SVec3 vT2 = GetTexCoord(pContext, index2); i2rec = i2; - if(veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) + if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) bNotFound = TFALSE; else ++e2; } // merge if previously found - if(!bNotFound) + if (!bNotFound) piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; } } @@ -727,9 +727,9 @@ static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) { int iNumUniqueVerts = 0, t=0, i=0; - for(t=0; t<iNrTrianglesIn; t++) + for (t=0; t<iNrTrianglesIn; t++) { - for(i=0; i<3; i++) + for (i=0; i<3; i++) { const int offs = t*3 + i; const int index = piTriList_in_and_out[offs]; @@ -740,27 +740,27 @@ static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], cons tbool bFound = TFALSE; int t2=0, index2rec=-1; - while(!bFound && t2<=t) + while (!bFound && t2<=t) { int j=0; - while(!bFound && j<3) + while (!bFound && j<3) { const int index2 = piTriList_in_and_out[t2*3 + j]; const SVec3 vP2 = GetPosition(pContext, index2); const SVec3 vN2 = GetNormal(pContext, index2); const SVec3 vT2 = GetTexCoord(pContext, index2); - if(veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) + if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) bFound = TTRUE; else ++j; } - if(!bFound) ++t2; + if (!bFound) ++t2; } assert(bFound); // if we found our own - if(index2rec == index) { ++iNumUniqueVerts; } + if (index2rec == index) { ++iNumUniqueVerts; } piTriList_in_and_out[offs] = index2rec; } @@ -771,15 +771,15 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_ { int iTSpacesOffs = 0, f=0, t=0; int iDstTriIndex = 0; - for(f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++) + for (f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++) { const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); - if(verts!=3 && verts!=4) continue; + if (verts!=3 && verts!=4) continue; pTriInfos[iDstTriIndex].iOrgFaceNumber = f; pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs; - if(verts==3) + if (verts==3) { unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num; pVerts[0]=0; pVerts[1]=1; pVerts[2]=2; @@ -810,7 +810,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_ const float distSQ_02 = LengthSquared(vsub(T2,T0)); const float distSQ_13 = LengthSquared(vsub(T3,T1)); tbool bQuadDiagIs_02; - if(distSQ_02<distSQ_13) + if (distSQ_02<distSQ_13) bQuadDiagIs_02 = TTRUE; else if(distSQ_13<distSQ_02) bQuadDiagIs_02 = TFALSE; @@ -826,7 +826,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_ bQuadDiagIs_02 = distSQ_13<distSQ_02 ? TFALSE : TTRUE; } - if(bQuadDiagIs_02) + if (bQuadDiagIs_02) { { unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num; @@ -871,7 +871,7 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_ assert(iDstTriIndex<=iNrTrianglesIn); } - for(t=0; t<iNrTrianglesIn; t++) + for (t=0; t<iNrTrianglesIn; t++) pTriInfos[t].iFlag = 0; // return total amount of tspaces @@ -946,8 +946,8 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi // pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function. // generate neighbor info list - for(f=0; f<iNrTrianglesIn; f++) - for(i=0; i<3; i++) + for (f=0; f<iNrTrianglesIn; f++) + for (i=0; i<3; i++) { pTriInfos[f].FaceNeighbors[i] = -1; pTriInfos[f].AssignedGroup[i] = NULL; @@ -962,7 +962,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi } // evaluate first order derivatives - for(f=0; f<iNrTrianglesIn; f++) + for (f=0; f<iNrTrianglesIn; f++) { // initial values const SVec3 v1 = GetPosition(pContext, piTriListIn[f*3+0]); @@ -986,47 +986,47 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi pTriInfos[f].iFlag |= (fSignedAreaSTx2>0 ? ORIENT_PRESERVING : 0); - if( NotZero(fSignedAreaSTx2) ) + if ( NotZero(fSignedAreaSTx2) ) { const float fAbsArea = fabsf(fSignedAreaSTx2); const float fLenOs = Length(vOs); const float fLenOt = Length(vOt); const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f; - if( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs); - if( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt); + if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs); + if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt); // evaluate magnitudes prior to normalization of vOs and vOt pTriInfos[f].fMagS = fLenOs / fAbsArea; pTriInfos[f].fMagT = fLenOt / fAbsArea; // if this is a good triangle - if( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT)) + if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT)) pTriInfos[f].iFlag &= (~GROUP_WITH_ANY); } } // force otherwise healthy quads to a fixed orientation - while(t<(iNrTrianglesIn-1)) + while (t<(iNrTrianglesIn-1)) { const int iFO_a = pTriInfos[t].iOrgFaceNumber; const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; - if(iFO_a==iFO_b) // this is a quad + if (iFO_a==iFO_b) // this is a quad { const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; // bad triangles should already have been removed by // DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false - if((bIsDeg_a||bIsDeg_b)==TFALSE) + if ((bIsDeg_a||bIsDeg_b)==TFALSE) { const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; // if this happens the quad has extremely bad mapping!! - if(bOrientA!=bOrientB) + if (bOrientA!=bOrientB) { //printf("found quad with bad mapping\n"); tbool bChooseOrientFirstTri = TFALSE; - if((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE; + if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE; else if( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) ) bChooseOrientFirstTri = TTRUE; @@ -1048,7 +1048,7 @@ static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMi // match up edge pairs { SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3); - if(pEdges==NULL) + if (pEdges==NULL) BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn); else { @@ -1070,12 +1070,12 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT const int iNrMaxGroups = iNrTrianglesIn*3; int iNrActiveGroups = 0; int iOffset = 0, f=0, i=0; - for(f=0; f<iNrTrianglesIn; f++) + for (f=0; f<iNrTrianglesIn; f++) { - for(i=0; i<3; i++) + for (i=0; i<3; i++) { // if not assigned to a group - if((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL) + if ((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL) { tbool bOrPre; int neigh_indexL, neigh_indexR; @@ -1092,7 +1092,7 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; neigh_indexL = pTriInfos[f].FaceNeighbors[i]; neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2]; - if(neigh_indexL>=0) // neighbor + if (neigh_indexL>=0) // neighbor { const tbool bAnswer = AssignRecur(piTriListIn, pTriInfos, neigh_indexL, @@ -1102,7 +1102,7 @@ static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupT const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE; assert(bAnswer || bDiff); } - if(neigh_indexR>=0) // neighbor + if (neigh_indexR>=0) // neighbor { const tbool bAnswer = AssignRecur(piTriListIn, pTriInfos, neigh_indexR, @@ -1141,20 +1141,20 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iVertRep = pGroup->iVertexRepresentitive; const int * pVerts = &piTriListIn[3*iMyTriIndex+0]; int i=-1; - if(pVerts[0]==iVertRep) i=0; + if (pVerts[0]==iVertRep) i=0; else if(pVerts[1]==iVertRep) i=1; else if(pVerts[2]==iVertRep) i=2; assert(i>=0 && i<3); // early out - if(pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE; + if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE; else if(pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE; - if((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0) + if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0) { // first to group with a group-with-anything triangle // determines it's orientation. // This is the only existing order dependency in the code!! - if( pMyTriInfo->AssignedGroup[0] == NULL && + if ( pMyTriInfo->AssignedGroup[0] == NULL && pMyTriInfo->AssignedGroup[1] == NULL && pMyTriInfo->AssignedGroup[2] == NULL ) { @@ -1164,7 +1164,7 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], } { const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; - if(bOrient != pGroup->bOrientPreservering) return TFALSE; + if (bOrient != pGroup->bOrientPreservering) return TFALSE; } AddTriToGroup(pGroup, iMyTriIndex); @@ -1173,9 +1173,9 @@ static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], { const int neigh_indexL = pMyTriInfo->FaceNeighbors[i]; const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2]; - if(neigh_indexL>=0) + if (neigh_indexL>=0) AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup); - if(neigh_indexR>=0) + if (neigh_indexR>=0) AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup); } @@ -1199,39 +1199,39 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con SSubGroup * pUniSubGroups = NULL; int * pTmpMembers = NULL; int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0; - for(g=0; g<iNrActiveGroups; g++) - if(iMaxNrFaces < pGroups[g].iNrFaces) + for (g=0; g<iNrActiveGroups; g++) + if (iMaxNrFaces < pGroups[g].iNrFaces) iMaxNrFaces = pGroups[g].iNrFaces; - if(iMaxNrFaces == 0) return TTRUE; + if (iMaxNrFaces == 0) return TTRUE; // make initial allocations pSubGroupTspace = (STSpace *) malloc(sizeof(STSpace)*iMaxNrFaces); pUniSubGroups = (SSubGroup *) malloc(sizeof(SSubGroup)*iMaxNrFaces); pTmpMembers = (int *) malloc(sizeof(int)*iMaxNrFaces); - if(pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL) + if (pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL) { - if(pSubGroupTspace!=NULL) free(pSubGroupTspace); - if(pUniSubGroups!=NULL) free(pUniSubGroups); - if(pTmpMembers!=NULL) free(pTmpMembers); + if (pSubGroupTspace!=NULL) free(pSubGroupTspace); + if (pUniSubGroups!=NULL) free(pUniSubGroups); + if (pTmpMembers!=NULL) free(pTmpMembers); return TFALSE; } iUniqueTspaces = 0; - for(g=0; g<iNrActiveGroups; g++) + for (g=0; g<iNrActiveGroups; g++) { const SGroup * pGroup = &pGroups[g]; int iUniqueSubGroups = 0, s=0; - for(i=0; i<pGroup->iNrFaces; i++) // triangles + for (i=0; i<pGroup->iNrFaces; i++) // triangles { const int f = pGroup->pFaceIndices[i]; // triangle number int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0; SSubGroup tmp_group; tbool bFound; SVec3 n, vOs, vOt; - if(pTriInfos[f].AssignedGroup[0]==pGroup) index=0; + if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0; else if(pTriInfos[f].AssignedGroup[1]==pGroup) index=1; else if(pTriInfos[f].AssignedGroup[2]==pGroup) index=2; assert(index>=0 && index<3); @@ -1245,14 +1245,14 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con // project vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); - if( VNotZero(vOs) ) vOs = Normalize(vOs); - if( VNotZero(vOt) ) vOt = Normalize(vOt); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); // original face number iOF_1 = pTriInfos[f].iOrgFaceNumber; iMembers = 0; - for(j=0; j<pGroup->iNrFaces; j++) + for (j=0; j<pGroup->iNrFaces; j++) { const int t = pGroup->pFaceIndices[j]; // triangle number const int iOF_2 = pTriInfos[t].iOrgFaceNumber; @@ -1260,8 +1260,8 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con // project SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n)); SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n)); - if( VNotZero(vOs2) ) vOs2 = Normalize(vOs2); - if( VNotZero(vOt2) ) vOt2 = Normalize(vOt2); + if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2); + if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2); { const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE; @@ -1272,7 +1272,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con const float fCosT = vdot(vOt,vOt2); assert(f!=t || bSameOrgFace); // sanity check - if(bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos)) + if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos)) pTmpMembers[iMembers++] = t; } } @@ -1280,7 +1280,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con // sort pTmpMembers tmp_group.iNrFaces = iMembers; tmp_group.pTriMembers = pTmpMembers; - if(iMembers>1) + if (iMembers>1) { unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? QuickSort(pTmpMembers, 0, iMembers-1, uSeed); @@ -1289,10 +1289,10 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con // look for an existing match bFound = TFALSE; l=0; - while(l<iUniqueSubGroups && !bFound) + while (l<iUniqueSubGroups && !bFound) { bFound = CompareSubGroups(&tmp_group, &pUniSubGroups[l]); - if(!bFound) ++l; + if (!bFound) ++l; } // assign tangent space index @@ -1300,15 +1300,15 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con //piTempTangIndices[f*3+index] = iUniqueTspaces+l; // if no match was found we allocate a new subgroup - if(!bFound) + if (!bFound) { // insert new subgroup int * pIndices = (int *) malloc(sizeof(int)*iMembers); - if(pIndices==NULL) + if (pIndices==NULL) { // clean up and return false int s=0; - for(s=0; s<iUniqueSubGroups; s++) + for (s=0; s<iUniqueSubGroups; s++) free(pUniSubGroups[s].pTriMembers); free(pUniSubGroups); free(pTmpMembers); @@ -1330,7 +1330,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con STSpace * pTS_out = &psTspace[iOffs+iVert]; assert(pTS_out->iCounter<2); assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering); - if(pTS_out->iCounter==1) + if (pTS_out->iCounter==1) { *pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]); pTS_out->iCounter = 2; // update counter @@ -1347,7 +1347,7 @@ static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], con } // clean up and offset iUniqueTspaces - for(s=0; s<iUniqueSubGroups; s++) + for (s=0; s<iUniqueSubGroups; s++) free(pUniSubGroups[s].pTriMembers); iUniqueTspaces += iUniqueSubGroups; } @@ -1370,17 +1370,17 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL res.vOt.x=0.0f; res.vOt.y=0.0f; res.vOt.z=0.0f; res.fMagS = 0; res.fMagT = 0; - for(face=0; face<iFaces; face++) + for (face=0; face<iFaces; face++) { const int f = face_indices[face]; // only valid triangles get to add their contribution - if( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 ) + if ( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 ) { SVec3 n, vOs, vOt, p0, p1, p2, v1, v2; float fCos, fAngle, fMagS, fMagT; int i=-1, index=-1, i0=-1, i1=-1, i2=-1; - if(piTriListIn[3*f+0]==iVertexRepresentitive) i=0; + if (piTriListIn[3*f+0]==iVertexRepresentitive) i=0; else if(piTriListIn[3*f+1]==iVertexRepresentitive) i=1; else if(piTriListIn[3*f+2]==iVertexRepresentitive) i=2; assert(i>=0 && i<3); @@ -1390,8 +1390,8 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL n = GetNormal(pContext, index); vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); - if( VNotZero(vOs) ) vOs = Normalize(vOs); - if( VNotZero(vOt) ) vOt = Normalize(vOt); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); i2 = piTriListIn[3*f + (i<2?(i+1):0)]; i1 = piTriListIn[3*f + i]; @@ -1423,9 +1423,9 @@ static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriL } // normalize - if( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs); - if( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt); - if(fAngleSum>0) + if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs); + if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt); + if (fAngleSum>0) { res.fMagS /= fAngleSum; res.fMagT /= fAngleSum; @@ -1438,11 +1438,11 @@ static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2) { tbool bStillSame=TTRUE; int i=0; - if(pg1->iNrFaces!=pg2->iNrFaces) return TFALSE; - while(i<pg1->iNrFaces && bStillSame) + if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE; + while (i<pg1->iNrFaces && bStillSame) { bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE; - if(bStillSame) ++i; + if (bStillSame) ++i; } return bStillSame; } @@ -1467,12 +1467,12 @@ static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSee do { - while(pSortBuffer[iL] < iMid) + while (pSortBuffer[iL] < iMid) ++iL; - while(pSortBuffer[iR] > iMid) + while (pSortBuffer[iR] > iMid) --iR; - if(iL <= iR) + if (iL <= iR) { iTmp = pSortBuffer[iL]; pSortBuffer[iL] = pSortBuffer[iR]; @@ -1480,11 +1480,11 @@ static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSee ++iL; --iR; } } - while(iL <= iR); + while (iL <= iR); - if(iLeft < iR) + if (iLeft < iR) QuickSort(pSortBuffer, iLeft, iR, uSeed); - if(iL < iRight) + if (iL < iRight) QuickSort(pSortBuffer, iL, iRight, uSeed); } @@ -1499,8 +1499,8 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p // build array of edges unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? int iEntries=0, iCurStartIndex=-1, f=0, i=0; - for(f=0; f<iNrTrianglesIn; f++) - for(i=0; i<3; i++) + for (f=0; f<iNrTrianglesIn; f++) + for (i=0; i<3; i++) { const int i0 = piTriListIn[f*3+i]; const int i1 = piTriListIn[f*3+(i<2?(i+1):0)]; @@ -1517,9 +1517,9 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p // with i0 as msb in the quicksort call above. iEntries = iNrTrianglesIn*3; iCurStartIndex = 0; - for(i=1; i<iEntries; i++) + for (i=1; i<iEntries; i++) { - if(pEdges[iCurStartIndex].i0 != pEdges[i].i0) + if (pEdges[iCurStartIndex].i0 != pEdges[i].i0) { const int iL = iCurStartIndex; const int iR = i-1; @@ -1533,9 +1533,9 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p // this step is to remain compliant with BuildNeighborsSlow() when // more than 2 triangles use the same edge (such as a butterfly topology). iCurStartIndex = 0; - for(i=1; i<iEntries; i++) + for (i=1; i<iEntries; i++) { - if(pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1) + if (pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1) { const int iL = iCurStartIndex; const int iR = i-1; @@ -1546,7 +1546,7 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p } // pair up, adjacent triangles - for(i=0; i<iEntries; i++) + for (i=0; i<iEntries; i++) { const int i0=pEdges[i].i0; const int i1=pEdges[i].i1; @@ -1558,12 +1558,12 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p GetEdge(&i0_A, &i1_A, &edgenum_A, &piTriListIn[f*3], i0, i1); // resolve index ordering and edge_num bUnassigned_A = pTriInfos[f].FaceNeighbors[edgenum_A] == -1 ? TTRUE : TFALSE; - if(bUnassigned_A) + if (bUnassigned_A) { // get true index ordering int j=i+1, t; tbool bNotFound = TTRUE; - while(j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound) + while (j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound) { tbool bUnassigned_B; int i0_B, i1_B; @@ -1572,13 +1572,13 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p GetEdge(&i1_B, &i0_B, &edgenum_B, &piTriListIn[t*3], pEdges[j].i0, pEdges[j].i1); // resolve index ordering and edge_num //assert(!(i0_A==i1_B && i1_A==i0_B)); bUnassigned_B = pTriInfos[t].FaceNeighbors[edgenum_B]==-1 ? TTRUE : TFALSE; - if(i0_A==i0_B && i1_A==i1_B && bUnassigned_B) + if (i0_A==i0_B && i1_A==i1_B && bUnassigned_B) bNotFound = TFALSE; else ++j; } - if(!bNotFound) + if (!bNotFound) { int t = pEdges[j].f; pTriInfos[f].FaceNeighbors[edgenum_A] = t; @@ -1592,12 +1592,12 @@ static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int p static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn) { int f=0, i=0; - for(f=0; f<iNrTrianglesIn; f++) + for (f=0; f<iNrTrianglesIn; f++) { - for(i=0; i<3; i++) + for (i=0; i<3; i++) { // if unassigned - if(pTriInfos[f].FaceNeighbors[i] == -1) + if (pTriInfos[f].FaceNeighbors[i] == -1) { const int i0_A = piTriListIn[f*3+i]; const int i1_A = piTriListIn[f*3+(i<2?(i+1):0)]; @@ -1605,29 +1605,29 @@ static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], co // search for a neighbor tbool bFound = TFALSE; int t=0, j=0; - while(!bFound && t<iNrTrianglesIn) + while (!bFound && t<iNrTrianglesIn) { - if(t!=f) + if (t!=f) { j=0; - while(!bFound && j<3) + while (!bFound && j<3) { // in rev order const int i1_B = piTriListIn[t*3+j]; const int i0_B = piTriListIn[t*3+(j<2?(j+1):0)]; //assert(!(i0_A==i1_B && i1_A==i0_B)); - if(i0_A==i0_B && i1_A==i1_B) + if (i0_A==i0_B && i1_A==i1_B) bFound = TTRUE; else ++j; } } - if(!bFound) ++t; + if (!bFound) ++t; } // assign neighbors - if(bFound) + if (bFound) { pTriInfos[f].FaceNeighbors[i] = t; //assert(pTriInfos[t].FaceNeighbors[j]==-1); @@ -1646,10 +1646,10 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int // early out SEdge sTmp; const int iElems = iRight-iLeft+1; - if(iElems<2) return; + if (iElems<2) return; else if(iElems==2) { - if(pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel]) + if (pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel]) { sTmp = pSortBuffer[iLeft]; pSortBuffer[iLeft] = pSortBuffer[iRight]; @@ -1673,12 +1673,12 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int do { - while(pSortBuffer[iL].array[channel] < iMid) + while (pSortBuffer[iL].array[channel] < iMid) ++iL; - while(pSortBuffer[iR].array[channel] > iMid) + while (pSortBuffer[iR].array[channel] > iMid) --iR; - if(iL <= iR) + if (iL <= iR) { sTmp = pSortBuffer[iL]; pSortBuffer[iL] = pSortBuffer[iR]; @@ -1686,11 +1686,11 @@ static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int ++iL; --iR; } } - while(iL <= iR); + while (iL <= iR); - if(iLeft < iR) + if (iLeft < iR) QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed); - if(iL < iRight) + if (iL < iRight) QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed); } @@ -1700,10 +1700,10 @@ static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int ind *edgenum_out = -1; // test if first index is on the edge - if(indices[0]==i0_in || indices[0]==i1_in) + if (indices[0]==i0_in || indices[0]==i1_in) { // test if second index is on the edge - if(indices[1]==i0_in || indices[1]==i1_in) + if (indices[1]==i0_in || indices[1]==i1_in) { edgenum_out[0]=0; // first edge i0_out[0]=indices[0]; @@ -1736,15 +1736,15 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i // locate quads with only one good triangle int t=0; - while(t<(iTotTris-1)) + while (t<(iTotTris-1)) { const int iFO_a = pTriInfos[t].iOrgFaceNumber; const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; - if(iFO_a==iFO_b) // this is a quad + if (iFO_a==iFO_b) // this is a quad { const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; - if((bIsDeg_a^bIsDeg_b)!=0) + if ((bIsDeg_a^bIsDeg_b)!=0) { pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI; pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI; @@ -1760,12 +1760,12 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i iNextGoodTriangleSearchIndex = 1; t=0; bStillFindingGoodOnes = TTRUE; - while(t<iNrTrianglesIn && bStillFindingGoodOnes) + while (t<iNrTrianglesIn && bStillFindingGoodOnes) { const tbool bIsGood = (pTriInfos[t].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE; - if(bIsGood) + if (bIsGood) { - if(iNextGoodTriangleSearchIndex < (t+2)) + if (iNextGoodTriangleSearchIndex < (t+2)) iNextGoodTriangleSearchIndex = t+2; } else @@ -1773,10 +1773,10 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i int t0, t1; // search for the first good triangle. tbool bJustADegenerate = TTRUE; - while(bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris) + while (bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris) { const tbool bIsGood = (pTriInfos[iNextGoodTriangleSearchIndex].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE; - if(bIsGood) bJustADegenerate=TFALSE; + if (bIsGood) bJustADegenerate=TFALSE; else ++iNextGoodTriangleSearchIndex; } @@ -1786,10 +1786,10 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i assert(iNextGoodTriangleSearchIndex > (t+1)); // swap triangle t0 and t1 - if(!bJustADegenerate) + if (!bJustADegenerate) { int i=0; - for(i=0; i<3; i++) + for (i=0; i<3; i++) { const int index = piTriList_out[t0*3+i]; piTriList_out[t0*3+i] = piTriList_out[t1*3+i]; @@ -1805,7 +1805,7 @@ static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int i bStillFindingGoodOnes = TFALSE; // this is not supposed to happen } - if(bStillFindingGoodOnes) ++t; + if (bStillFindingGoodOnes) ++t; } assert(bStillFindingGoodOnes); // code will still work. @@ -1817,28 +1817,28 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis int t=0, i=0; // deal with degenerate triangles // punishment for degenerate triangles is O(N^2) - for(t=iNrTrianglesIn; t<iTotTris; t++) + for (t=iNrTrianglesIn; t<iTotTris; t++) { // degenerate triangles on a quad with one good triangle are skipped // here but processed in the next loop const tbool bSkip = (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ? TTRUE : TFALSE; - if(!bSkip) + if (!bSkip) { - for(i=0; i<3; i++) + for (i=0; i<3; i++) { const int index1 = piTriListIn[t*3+i]; // search through the good triangles tbool bNotFound = TTRUE; int j=0; - while(bNotFound && j<(3*iNrTrianglesIn)) + while (bNotFound && j<(3*iNrTrianglesIn)) { const int index2 = piTriListIn[j]; - if(index1==index2) bNotFound=TFALSE; + if (index1==index2) bNotFound=TFALSE; else ++j; } - if(!bNotFound) + if (!bNotFound) { const int iTri = j/3; const int iVert = j%3; @@ -1855,11 +1855,11 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis } // deal with degenerate quads with one good triangle - for(t=0; t<iNrTrianglesIn; t++) + for (t=0; t<iNrTrianglesIn; t++) { // this triangle belongs to a quad where the // other triangle is degenerate - if( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ) + if ( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ) { SVec3 vDstP; int iOrgF=-1, i=0; @@ -1867,7 +1867,7 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis unsigned char * pV = pTriInfos[t].vert_num; int iFlag = (1<<pV[0]) | (1<<pV[1]) | (1<<pV[2]); int iMissingIndex = 0; - if((iFlag&2)==0) iMissingIndex=1; + if ((iFlag&2)==0) iMissingIndex=1; else if((iFlag&4)==0) iMissingIndex=2; else if((iFlag&8)==0) iMissingIndex=3; @@ -1875,11 +1875,11 @@ static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriLis vDstP = GetPosition(pContext, MakeIndex(iOrgF, iMissingIndex)); bNotFound = TTRUE; i=0; - while(bNotFound && i<3) + while (bNotFound && i<3) { const int iVert = pV[i]; const SVec3 vSrcP = GetPosition(pContext, MakeIndex(iOrgF, iVert)); - if(veq(vSrcP, vDstP)==TTRUE) + if (veq(vSrcP, vDstP)==TTRUE) { const int iOffs = pTriInfos[t].iTSpacesOffs; psTspace[iOffs+iMissingIndex] = psTspace[iOffs+iVert]; diff --git a/intern/smoke/extern/smoke_API.h b/intern/smoke/extern/smoke_API.h index 9d5dfd98823..a0eb1bf38e0 100644 --- a/intern/smoke/extern/smoke_API.h +++ b/intern/smoke/extern/smoke_API.h @@ -41,11 +41,11 @@ struct FLUID_3D; void smoke_export(struct FLUID_3D *fluid, float *dt, float *dx, float **dens, float **densold, float **heat, float **heatold, float **vx, float **vy, float **vz, float **vxold, float **vyold, float **vzold, unsigned char **obstacles); // low res -struct FLUID_3D *smoke_init(int *res, float *p0); +struct FLUID_3D *smoke_init(int *res, float *p0, float dtdef); void smoke_free(struct FLUID_3D *fluid); void smoke_initBlenderRNA(struct FLUID_3D *fluid, float *alpha, float *beta, float *dt_factor, float *vorticity, int *border_colli); -void smoke_step(struct FLUID_3D *fluid, size_t framenr, float fps); +void smoke_step(struct FLUID_3D *fluid, float dtSubdiv); float *smoke_get_density(struct FLUID_3D *fluid); float *smoke_get_heat(struct FLUID_3D *fluid); @@ -53,6 +53,9 @@ float *smoke_get_velocity_x(struct FLUID_3D *fluid); float *smoke_get_velocity_y(struct FLUID_3D *fluid); float *smoke_get_velocity_z(struct FLUID_3D *fluid); +/* Moving obstacle velocity provided by blender */ +void smoke_get_ob_velocity(struct FLUID_3D *fluid, float **x, float **y, float **z); + float *smoke_get_force_x(struct FLUID_3D *fluid); float *smoke_get_force_y(struct FLUID_3D *fluid); float *smoke_get_force_z(struct FLUID_3D *fluid); diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp index 9f036cc6d2f..04971f898e9 100644 --- a/intern/smoke/intern/FLUID_3D.cpp +++ b/intern/smoke/intern/FLUID_3D.cpp @@ -34,6 +34,8 @@ #include "SPHERE.h" #include <zlib.h> +#include "float.h" + #if PARALLEL==1 #include <omp.h> #endif // PARALLEL @@ -42,11 +44,11 @@ // Construction/Destruction ////////////////////////////////////////////////////////////////////// -FLUID_3D::FLUID_3D(int *res, float *p0) : +FLUID_3D::FLUID_3D(int *res, float *p0, float dtdef) : _xRes(res[0]), _yRes(res[1]), _zRes(res[2]), _res(0.0f) { // set simulation consts - _dt = DT_DEFAULT; // just in case. set in step from a RNA factor + _dt = dtdef; // just in case. set in step from a RNA factor // start point of array _p0[0] = p0[0]; @@ -81,6 +83,9 @@ FLUID_3D::FLUID_3D(int *res, float *p0) : _xVelocity = new float[_totalCells]; _yVelocity = new float[_totalCells]; _zVelocity = new float[_totalCells]; + _xVelocityOb = new float[_totalCells]; + _yVelocityOb = new float[_totalCells]; + _zVelocityOb = new float[_totalCells]; _xVelocityOld = new float[_totalCells]; _yVelocityOld = new float[_totalCells]; _zVelocityOld = new float[_totalCells]; @@ -111,6 +116,9 @@ FLUID_3D::FLUID_3D(int *res, float *p0) : _xVelocity[x] = 0.0f; _yVelocity[x] = 0.0f; _zVelocity[x] = 0.0f; + _xVelocityOb[x] = 0.0f; + _yVelocityOb[x] = 0.0f; + _zVelocityOb[x] = 0.0f; _xVelocityOld[x] = 0.0f; _yVelocityOld[x] = 0.0f; _zVelocityOld[x] = 0.0f; @@ -131,9 +139,15 @@ FLUID_3D::FLUID_3D(int *res, float *p0) : _colloPrev = 1; // default value + setBorderObstacles(); // walls + +} +void FLUID_3D::setBorderObstacles() +{ + // set side obstacles - int index; + unsigned int index; for (int y = 0; y < _yRes; y++) for (int x = 0; x < _xRes; x++) { @@ -169,7 +183,6 @@ FLUID_3D::FLUID_3D(int *res, float *p0) : index += _xRes - 1; if(_domainBcRight==1) _obstacles[index] = 1; } - } FLUID_3D::~FLUID_3D() @@ -177,6 +190,9 @@ FLUID_3D::~FLUID_3D() if (_xVelocity) delete[] _xVelocity; if (_yVelocity) delete[] _yVelocity; if (_zVelocity) delete[] _zVelocity; + if (_xVelocityOb) delete[] _xVelocityOb; + if (_yVelocityOb) delete[] _yVelocityOb; + if (_zVelocityOb) delete[] _zVelocityOb; if (_xVelocityOld) delete[] _xVelocityOld; if (_yVelocityOld) delete[] _yVelocityOld; if (_zVelocityOld) delete[] _zVelocityOld; @@ -214,10 +230,18 @@ void FLUID_3D::initBlenderRNA(float *alpha, float *beta, float *dt_factor, float ////////////////////////////////////////////////////////////////////// void FLUID_3D::step(float dt) { +#if 0 // If border rules have been changed if (_colloPrev != *_borderColli) { + printf("Border collisions changed\n"); + + // DG TODO: Need to check that no animated obstacle flags are overwritten setBorderCollisions(); } +#endif + + // DG: TODO for the moment redo border for every timestep since it's been deleted every time by moving obstacles + setBorderCollisions(); // set delta time by dt_factor @@ -786,6 +810,7 @@ void FLUID_3D::project() memset(_pressure, 0, sizeof(float)*_totalCells); memset(_divergence, 0, sizeof(float)*_totalCells); + // set velocity and pressure inside of obstacles to zero setObstacleBoundaries(_pressure, 0, _zRes); // copy out the boundaries @@ -798,12 +823,49 @@ void FLUID_3D::project() if(_domainBcTop == 0) setNeumannZ(_zVelocity, _res, 0, _zRes); else setZeroZ(_zVelocity, _res, 0, _zRes); + /* + { + float maxx = 0, maxy = 0, maxz = 0; + for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++) + { + if(_xVelocity[i] > maxx) + maxx = _xVelocity[i]; + if(_yVelocity[i] > maxy) + maxy = _yVelocity[i]; + if(_zVelocity[i] > maxz) + maxz = _zVelocity[i]; + } + printf("Max velx: %f, vely: %f, velz: %f\n", maxx, maxy, maxz); + } + */ + + /* + { + float maxvalue = 0; + for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++) + { + if(_heat[i] > maxvalue) + maxvalue = _heat[i]; + + } + printf("Max heat: %f\n", maxvalue); + } + */ + // calculate divergence index = _slabSize + _xRes + 1; for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) { + + if(_obstacles[index]) + { + _divergence[index] = 0.0f; + continue; + } + + float xright = _xVelocity[index + 1]; float xleft = _xVelocity[index - 1]; float yup = _yVelocity[index + _xRes]; @@ -811,26 +873,82 @@ void FLUID_3D::project() float ztop = _zVelocity[index + _slabSize]; float zbottom = _zVelocity[index - _slabSize]; - if(_obstacles[index+1]) xright = - _xVelocity[index]; + if(_obstacles[index+1]) xright = - _xVelocity[index]; // DG: += if(_obstacles[index-1]) xleft = - _xVelocity[index]; if(_obstacles[index+_xRes]) yup = - _yVelocity[index]; if(_obstacles[index-_xRes]) ydown = - _yVelocity[index]; if(_obstacles[index+_slabSize]) ztop = - _zVelocity[index]; if(_obstacles[index-_slabSize]) zbottom = - _zVelocity[index]; + if(_obstacles[index+1] & 8) xright += _xVelocityOb[index + 1]; + if(_obstacles[index-1] & 8) xleft += _xVelocityOb[index - 1]; + if(_obstacles[index+_xRes] & 8) yup += _yVelocityOb[index + _xRes]; + if(_obstacles[index-_xRes] & 8) ydown += _yVelocityOb[index - _xRes]; + if(_obstacles[index+_slabSize] & 8) ztop += _zVelocityOb[index + _slabSize]; + if(_obstacles[index-_slabSize] & 8) zbottom += _zVelocityOb[index - _slabSize]; + _divergence[index] = -_dx * 0.5f * ( xright - xleft + yup - ydown + ztop - zbottom ); - // DG: commenting this helps CG to get a better start, 10-20% speed improvement - // _pressure[index] = 0.0f; + // Pressure is zero anyway since now a local array is used + _pressure[index] = 0.0f; } + + + /* + { + float maxvalue = 0; + for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++) + { + if(_divergence[i] > maxvalue) + maxvalue = _divergence[i]; + + } + printf("Max divergence: %f\n", maxvalue); + } + */ + copyBorderAll(_pressure, 0, _zRes); + /* + { + float maxvalue = 0; + for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++) + { + if(_pressure[i] > maxvalue) + maxvalue = _pressure[i]; + } + printf("Max pressure BEFORE: %f\n", maxvalue); + } + */ + // solve Poisson equation solvePressurePre(_pressure, _divergence, _obstacles); + { + float maxvalue = 0; + for(unsigned int i = 0; i < _xRes * _yRes * _zRes; i++) + { + if(_pressure[i] > maxvalue) + maxvalue = _pressure[i]; + + /* HACK: Animated collision object sometimes result in a non converging solvePressurePre() */ + if(_pressure[i] > _dx * _dt) + _pressure[i] = _dx * _dt; + else if(_pressure[i] < -_dx * _dt) + _pressure[i] = -_dx * _dt; + + // if(_obstacle[i] && _pressure[i] != 0.0) + // printf("BAD PRESSURE i\n"); + + // if(_pressure[i]>1) + // printf("index: %d\n", i); + } + // printf("Max pressure: %f, dx: %f\n", maxvalue, _dx); + } + setObstaclePressure(_pressure, 0, _zRes); // project out solution @@ -848,12 +966,74 @@ void FLUID_3D::project() } } + setObstacleVelocity(0, _zRes); + if (_pressure) delete[] _pressure; if (_divergence) delete[] _divergence; } +////////////////////////////////////////////////////////////////////// +// calculate the obstacle velocity at boundary +////////////////////////////////////////////////////////////////////// +void FLUID_3D::setObstacleVelocity(int zBegin, int zEnd) +{ + + // completely TODO <-- who wrote this and what is here TODO? DG + const size_t index_ = _slabSize + _xRes + 1; + + //int vIndex=_slabSize + _xRes + 1; + + int bb=0; + int bt=0; + + if (zBegin == 0) {bb = 1;} + if (zEnd == _zRes) {bt = 1;} + // tag remaining obstacle blocks + for (int z = zBegin + bb; z < zEnd - bt; z++) + { + size_t index = index_ +(z-1)*_slabSize; + + for (int y = 1; y < _yRes - 1; y++, index += 2) + { + for (int x = 1; x < _xRes - 1; x++, index++) + { + if (!_obstacles[index]) + { + // if(_obstacles[index+1]) xright = - _xVelocityOb[index]; + if((_obstacles[index - 1] & 8) && abs(_xVelocityOb[index - 1]) > FLT_EPSILON ) + { + // printf("velocity x!\n"); + _xVelocity[index] = _xVelocityOb[index - 1]; + _xVelocity[index - 1] = _xVelocityOb[index - 1]; + } + // if(_obstacles[index+_xRes]) yup = - _yVelocityOb[index]; + if((_obstacles[index - _xRes] & 8) && abs(_yVelocityOb[index - _xRes]) > FLT_EPSILON) + { + // printf("velocity y!\n"); + _yVelocity[index] = _yVelocityOb[index - _xRes]; + _yVelocity[index - _xRes] = _yVelocityOb[index - _xRes]; + } + // if(_obstacles[index+_slabSize]) ztop = - _zVelocityOb[index]; + if((_obstacles[index - _slabSize] & 8) && abs(_zVelocityOb[index - _slabSize]) > FLT_EPSILON) + { + // printf("velocity z!\n"); + _zVelocity[index] = _zVelocityOb[index - _slabSize]; + _zVelocity[index - _slabSize] = _zVelocityOb[index - _slabSize]; + } + } + else + { + _density[index] = 0; + } + //vIndex++; + } // x loop + //vIndex += 2; + } // y loop + //vIndex += 2 * _xRes; + } // z loop +} ////////////////////////////////////////////////////////////////////// // diffuse heat @@ -892,7 +1072,7 @@ void FLUID_3D::addObstacle(OBSTACLE* obstacle) void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd) { - // compleately TODO + // completely TODO <-- who wrote this and what is here TODO? DG const size_t index_ = _slabSize + _xRes + 1; @@ -914,7 +1094,7 @@ void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd) for (int x = 1; x < _xRes - 1; x++, index++) { // could do cascade of ifs, but they are a pain - if (_obstacles[index]) + if (_obstacles[index] /* && !(_obstacles[index] & 8) DG TODO TEST THIS CONDITION */) { const int top = _obstacles[index + _slabSize]; const int bottom= _obstacles[index - _slabSize]; @@ -928,9 +1108,11 @@ void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd) // const bool fully = (up && down); //const bool fullx = (left && right); + /* _xVelocity[index] = _yVelocity[index] = _zVelocity[index] = 0.0f; + */ _pressure[index] = 0.0f; // average pressure neighbors @@ -1253,7 +1435,35 @@ void FLUID_3D::advectMacCormackEnd2(int zBegin, int zEnd) setZeroBorder(_density, res, zBegin, zEnd); setZeroBorder(_heat, res, zBegin, zEnd); +#if 0 + { + const size_t index_ = _slabSize + _xRes + 1; + int bb=0; + int bt=0; + + if (zBegin == 0) {bb = 1;} + if (zEnd == _zRes) {bt = 1;} + + for (int z = zBegin + bb; z < zEnd - bt; z++) + { + size_t index = index_ +(z-1)*_slabSize; + for (int y = 1; y < _yRes - 1; y++, index += 2) + { + for (int x = 1; x < _xRes - 1; x++, index++) + { + // clean custom velocities from moving obstacles again + if (_obstacles[index]) + { + _xVelocity[index] = + _yVelocity[index] = + _zVelocity[index] = 0.0f; + } + } + } + } + } +#endif /*int begin=zBegin * _slabSize; int end=begin + (zEnd - zBegin) * _slabSize; diff --git a/intern/smoke/intern/FLUID_3D.h b/intern/smoke/intern/FLUID_3D.h index c9e18926fb2..5704cba3ed4 100644 --- a/intern/smoke/intern/FLUID_3D.h +++ b/intern/smoke/intern/FLUID_3D.h @@ -39,9 +39,6 @@ // #include "WTURBULENCE.h" #include "VEC3.h" -// timestep default value for nice appearance -#define DT_DEFAULT 0.1f; - using namespace std; using namespace BasicVector; class WTURBULENCE; @@ -49,7 +46,7 @@ class WTURBULENCE; class FLUID_3D { public: - FLUID_3D(int *res, /* int amplify, */ float *p0); + FLUID_3D(int *res, /* int amplify, */ float *p0, float dtdef); FLUID_3D() {}; virtual ~FLUID_3D(); @@ -72,7 +69,7 @@ class FLUID_3D int yRes() const { return _yRes; }; int zRes() const { return _zRes; }; - public: + public: // dimensions int _xRes, _yRes, _zRes, _maxRes; Vec3Int _res; @@ -89,6 +86,8 @@ class FLUID_3D void artificialDampingSL(int zBegin, int zEnd); void artificialDampingExactSL(int pos); + void setBorderObstacles(); + // fields float* _density; float* _densityOld; @@ -97,13 +96,17 @@ class FLUID_3D float* _xVelocity; float* _yVelocity; float* _zVelocity; + float* _xVelocityOb; + float* _yVelocityOb; + float* _zVelocityOb; float* _xVelocityOld; float* _yVelocityOld; float* _zVelocityOld; float* _xForce; float* _yForce; float* _zForce; - unsigned char* _obstacles; + unsigned char* _obstacles; /* only used (usefull) for static obstacles like domain boundaries */ + unsigned char* _obstaclesAnim; // Required for proper threading: float* _xVelocityTemp; @@ -137,6 +140,8 @@ class FLUID_3D // have to recalibrate borders if nothing has changed void setBorderCollisions(); + void setObstacleVelocity(int zBegin, int zEnd); + // WTURBULENCE object, if active // WTURBULENCE* _wTurbulence; diff --git a/intern/smoke/intern/OBSTACLE.h b/intern/smoke/intern/OBSTACLE.h index 61d47b727f0..da8ec6be024 100644 --- a/intern/smoke/intern/OBSTACLE.h +++ b/intern/smoke/intern/OBSTACLE.h @@ -27,9 +27,11 @@ #define OBSTACLE_H enum OBSTACLE_FLAGS { - EMPTY = 0, + EMPTY = 0, + /* 1 is used to flag an object cell */ MARCHED = 2, - RETIRED = 4 + RETIRED = 4, + ANIMATED = 8, }; class OBSTACLE diff --git a/intern/smoke/intern/WTURBULENCE.cpp b/intern/smoke/intern/WTURBULENCE.cpp index cd18cf7b344..83bec466c9f 100644 --- a/intern/smoke/intern/WTURBULENCE.cpp +++ b/intern/smoke/intern/WTURBULENCE.cpp @@ -431,8 +431,11 @@ void WTURBULENCE::decomposeEnergy(float *_energy, float *_highFreqEnergy) // compute velocity from energies and march into obstacles // for wavelet decomposition ////////////////////////////////////////////////////////////////////// -void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* zvel, unsigned char *obstacles) +void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* zvel, unsigned char *origObstacles) { + unsigned char *obstacles = new unsigned char[_totalCellsSm]; + memcpy(obstacles, origObstacles, sizeof(unsigned char) * _totalCellsSm); + // compute everywhere for (int x = 0; x < _totalCellsSm; x++) _energy[x] = 0.5f * (xvel[x] * xvel[x] + yvel[x] * yvel[x] + zvel[x] * zvel[x]); @@ -506,7 +509,9 @@ void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* for (int y = 1; y < _yResSm - 1; y++, index += 2) for (int x = 1; x < _xResSm - 1; x++, index++) if (obstacles[index]) - obstacles[index] = 1; + obstacles[index] = 1; // DG TODO ? animated obstacle flag? + + free(obstacles); } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/intern/smoke/intern/smoke_API.cpp b/intern/smoke/intern/smoke_API.cpp index a2f3c21bbbf..78f7d35360a 100644 --- a/intern/smoke/intern/smoke_API.cpp +++ b/intern/smoke/intern/smoke_API.cpp @@ -19,6 +19,7 @@ * All rights reserved. * * Contributor(s): Daniel Genrich + * Blender Foundation * * ***** END GPL LICENSE BLOCK ***** */ @@ -36,10 +37,10 @@ #include <math.h> // y in smoke is z in blender -extern "C" FLUID_3D *smoke_init(int *res, float *p0) +extern "C" FLUID_3D *smoke_init(int *res, float *p0, float dtdef) { // smoke lib uses y as top-bottom/vertical axis where blender uses z - FLUID_3D *fluid = new FLUID_3D(res, p0); + FLUID_3D *fluid = new FLUID_3D(res, p0, dtdef); // printf("xres: %d, yres: %d, zres: %d\n", res[0], res[1], res[2]); @@ -78,41 +79,9 @@ extern "C" size_t smoke_get_index2d(int x, int max_x, int y /*, int max_y, int z return x + y * max_x; } -extern "C" void smoke_step(FLUID_3D *fluid, size_t framenr, float fps) +extern "C" void smoke_step(FLUID_3D *fluid, float dtSubdiv) { - /* stability values copied from wturbulence.cpp */ - const int maxSubSteps = 25; - const float maxVel = 0.5f; /* TODO: maybe 0.5 is still too high, please confirm! -dg */ - - float dt = DT_DEFAULT; - float maxVelMag = 0.0f; - int totalSubsteps; - int substep = 0; - float dtSubdiv; - - /* get max velocity and lower the dt value if it is too high */ - size_t size= fluid->_xRes * fluid->_yRes * fluid->_zRes; - - for(size_t i = 0; i < size; i++) - { - float vtemp = (fluid->_xVelocity[i]*fluid->_xVelocity[i]+fluid->_yVelocity[i]*fluid->_yVelocity[i]+fluid->_zVelocity[i]*fluid->_zVelocity[i]); - if(vtemp > maxVelMag) - maxVelMag = vtemp; - } - - /* adapt timestep for different framerates, dt = 0.1 is at 25fps */ - dt *= (25.0f / fps); - - maxVelMag = sqrt(maxVelMag) * dt * (*(fluid->_dtFactor)); - totalSubsteps = (int)((maxVelMag / maxVel) + 1.0f); /* always round up */ - totalSubsteps = (totalSubsteps < 1) ? 1 : totalSubsteps; - totalSubsteps = (totalSubsteps > maxSubSteps) ? maxSubSteps : totalSubsteps; - dtSubdiv = (float)dt / (float)totalSubsteps; - - // printf("totalSubsteps: %d, maxVelMag: %f, dt: %f\n", totalSubsteps, maxVelMag, dt); - - for(substep = 0; substep < totalSubsteps; substep++) - fluid->step(dtSubdiv); + fluid->step(dtSubdiv); } extern "C" void smoke_turbulence_step(WTURBULENCE *wt, FLUID_3D *fluid) @@ -307,6 +276,18 @@ extern "C" unsigned char *smoke_get_obstacle(FLUID_3D *fluid) return fluid->_obstacles; } +extern "C" void smoke_get_ob_velocity(struct FLUID_3D *fluid, float **x, float **y, float **z) +{ + *x = fluid->_xVelocityOb; + *y = fluid->_yVelocityOb; + *z = fluid->_zVelocityOb; +} + +extern "C" unsigned char *smoke_get_obstacle_anim(FLUID_3D *fluid) +{ + return fluid->_obstaclesAnim; +} + extern "C" void smoke_turbulence_set_noise(WTURBULENCE *wt, int type) { wt->setNoise(type); diff --git a/intern/utfconv/utf_winfunc.c b/intern/utfconv/utf_winfunc.c index 2e200ea3ad3..68d1d6bb403 100644 --- a/intern/utfconv/utf_winfunc.c +++ b/intern/utfconv/utf_winfunc.c @@ -39,7 +39,7 @@ FILE * ufopen(const char * filename, const char * mode) UTF16_ENCODE(filename); UTF16_ENCODE (mode); - if(filename_16 && mode_16) { + if (filename_16 && mode_16) { f = _wfopen(filename_16, mode_16); } @@ -81,7 +81,7 @@ int urename(const char *oldname, const char *newname ) UTF16_ENCODE(oldname); UTF16_ENCODE (newname); - if(oldname_16 && newname_16) r = _wrename(oldname_16, newname_16); + if (oldname_16 && newname_16) r = _wrename(oldname_16, newname_16); UTF16_UN_ENCODE(newname); UTF16_UN_ENCODE(oldname); @@ -94,7 +94,7 @@ int umkdir(const char *pathname) BOOL r = 0; UTF16_ENCODE(pathname); - if(pathname_16) r = CreateDirectoryW(pathname_16, NULL); + if (pathname_16) r = CreateDirectoryW(pathname_16, NULL); UTF16_UN_ENCODE(pathname); @@ -123,10 +123,10 @@ int uput_getenv(const char *varname, char * value, size_t buffsize) { int r = 0; wchar_t * str; - if(!buffsize) return r; + if (!buffsize) return r; UTF16_ENCODE(varname); - if(varname_16) { + if (varname_16) { str = _wgetenv(varname_16); conv_utf_16_to_8(str, value, buffsize); r = 1; @@ -143,7 +143,7 @@ int uputenv(const char *name, const char *value) int r = -1; UTF16_ENCODE(name); UTF16_ENCODE(value); - if(name_16 && value_16) { + if (name_16 && value_16) { r = (SetEnvironmentVariableW(name_16,value_16)!= 0) ? 0 : -1; } UTF16_UN_ENCODE(value); |