Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErwin Coumans <blender@erwincoumans.com>2008-09-13 11:06:43 +0400
committerErwin Coumans <blender@erwincoumans.com>2008-09-13 11:06:43 +0400
commit7f293488d12b5d5076b4bbf3d6c9248867c447a0 (patch)
tree977ac9f1063de48615e8f294bfbcadb2a3b645f6 /extern/bullet2/src/BulletCollision/BroadphaseCollision
parent206cfe7955683ac166201e417977e933fd98f7b3 (diff)
Upgrade to latest Bullet trunk, that is in sync with Blender/extern/bullet2. (except for one define 'WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER')
In case someone reads those SVN logs: you can enable some extra broadphase SSE optimizations by replacing WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER by WIN32 in extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h Thanks to Benoit Bolsee for the upstream patch/contribution. Removed some obsolete files, they were just intended for comparison/testing.
Diffstat (limited to 'extern/bullet2/src/BulletCollision/BroadphaseCollision')
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h85
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.cpp91
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h198
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp66
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h21
5 files changed, 211 insertions, 250 deletions
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
index e7c5fb5b6cf..d0ad09a385a 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
@@ -27,6 +27,7 @@
#include "btOverlappingPairCallback.h"
//#define DEBUG_BROADPHASE 1
+#define USE_OVERLAP_TEST_ON_REMOVES 1
/// The internal templace class btAxisSweep3Internal implements the sweep and prune broadphase.
/// It uses quantized integers to represent the begin and end points for each of the 3 axis.
@@ -52,9 +53,7 @@ public:
};
public:
- //This breaks the Intel compiler, see http://softwarecommunity.intel.com/isn/Community/en-US/forums/thread/30253577.aspx
- class Handle : public btBroadphaseProxy
- //ATTRIBUTE_ALIGNED16(class) Handle : public btBroadphaseProxy
+ class Handle : public btBroadphaseProxy
{
public:
BT_DECLARE_ALIGNED_ALLOCATOR();
@@ -80,7 +79,7 @@ protected:
BP_FP_INT_TYPE m_numHandles; // number of active handles
BP_FP_INT_TYPE m_maxHandles; // max number of handles
Handle* m_pHandles; // handles pool
- void* m_pHandlesRawPtr;
+
BP_FP_INT_TYPE m_firstFreeHandle; // free handles list
Edge* m_pEdges[3]; // edge arrays for the 3 axes (each array has m_maxHandles * 2 + 2 sentinel entries)
@@ -100,7 +99,7 @@ protected:
void freeHandle(BP_FP_INT_TYPE handle);
- bool testOverlap(int ignoreAxis,const Handle* pHandleA, const Handle* pHandleB);
+ bool testOverlap2D(const Handle* pHandleA, const Handle* pHandleB,int axis0,int axis1);
#ifdef DEBUG_BROADPHASE
void debugPrintAxis(int axis,bool checkCardinality=true);
@@ -273,10 +272,9 @@ m_invalidPair(0)
m_quantize = btVector3(btScalar(maxInt),btScalar(maxInt),btScalar(maxInt)) / aabbSize;
- // allocate handles buffer and put all handles on free list
- m_pHandlesRawPtr = btAlignedAlloc(sizeof(Handle)*maxHandles,16);
- m_pHandles = new(m_pHandlesRawPtr) Handle[maxHandles];
-
+ // allocate handles buffer, using btAlignedAlloc, and put all handles on free list
+ m_pHandles = new Handle[maxHandles];
+
m_maxHandles = maxHandles;
m_numHandles = 0;
@@ -327,7 +325,7 @@ btAxisSweep3Internal<BP_FP_INT_TYPE>::~btAxisSweep3Internal()
{
btAlignedFree(m_pEdgesRawPtr[i]);
}
- btAlignedFree(m_pHandlesRawPtr);
+ delete [] m_pHandles;
if (m_ownsPairCache)
{
@@ -603,34 +601,17 @@ bool btAxisSweep3Internal<BP_FP_INT_TYPE>::testAabbOverlap(btBroadphaseProxy* pr
}
template <typename BP_FP_INT_TYPE>
-bool btAxisSweep3Internal<BP_FP_INT_TYPE>::testOverlap(int ignoreAxis,const Handle* pHandleA, const Handle* pHandleB)
+bool btAxisSweep3Internal<BP_FP_INT_TYPE>::testOverlap2D(const Handle* pHandleA, const Handle* pHandleB,int axis0,int axis1)
{
//optimization 1: check the array index (memory address), instead of the m_pos
- for (int axis = 0; axis < 3; axis++)
+ if (pHandleA->m_maxEdges[axis0] < pHandleB->m_minEdges[axis0] ||
+ pHandleB->m_maxEdges[axis0] < pHandleA->m_minEdges[axis0] ||
+ pHandleA->m_maxEdges[axis1] < pHandleB->m_minEdges[axis1] ||
+ pHandleB->m_maxEdges[axis1] < pHandleA->m_minEdges[axis1])
{
- if (axis != ignoreAxis)
- {
- if (pHandleA->m_maxEdges[axis] < pHandleB->m_minEdges[axis] ||
- pHandleB->m_maxEdges[axis] < pHandleA->m_minEdges[axis])
- {
- return false;
- }
- }
+ return false;
}
-
- //optimization 2: only 2 axis need to be tested (conflicts with 'delayed removal' optimization)
-
- /*for (int axis = 0; axis < 3; axis++)
- {
- if (m_pEdges[axis][pHandleA->m_maxEdges[axis]].m_pos < m_pEdges[axis][pHandleB->m_minEdges[axis]].m_pos ||
- m_pEdges[axis][pHandleB->m_maxEdges[axis]].m_pos < m_pEdges[axis][pHandleA->m_minEdges[axis]].m_pos)
- {
- return false;
- }
- }
- */
-
return true;
}
@@ -700,7 +681,9 @@ void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMinDown(int axis, BP_FP_INT_TYPE
if (pPrev->IsMax())
{
// if previous edge is a maximum check the bounds and add an overlap if necessary
- if (updateOverlaps && testOverlap(axis,pHandleEdge, pHandlePrev))
+ const int axis1 = (1 << axis) & 3;
+ const int axis2 = (1 << axis1) & 3;
+ if (updateOverlaps && testOverlap2D(pHandleEdge, pHandlePrev,axis1,axis2))
{
m_pairCache->addOverlappingPair(pHandleEdge,pHandlePrev);
if (m_userPairCallback)
@@ -748,12 +731,19 @@ void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMinUp(int axis, BP_FP_INT_TYPE ed
if (pNext->IsMax())
{
-
+ Handle* handle0 = getHandle(pEdge->m_handle);
+ Handle* handle1 = getHandle(pNext->m_handle);
+ const int axis1 = (1 << axis) & 3;
+ const int axis2 = (1 << axis1) & 3;
+
// if next edge is maximum remove any overlap between the two handles
- if (updateOverlaps)
+ if (updateOverlaps
+#ifdef USE_OVERLAP_TEST_ON_REMOVES
+ && testOverlap2D(handle0,handle1,axis1,axis2)
+#endif //USE_OVERLAP_TEST_ON_REMOVES
+ )
{
- Handle* handle0 = getHandle(pEdge->m_handle);
- Handle* handle1 = getHandle(pNext->m_handle);
+
m_pairCache->removeOverlappingPair(handle0,handle1,dispatcher);
if (m_userPairCallback)
@@ -799,12 +789,20 @@ void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMaxDown(int axis, BP_FP_INT_TYPE
if (!pPrev->IsMax())
{
// if previous edge was a minimum remove any overlap between the two handles
- if (updateOverlaps)
+ Handle* handle0 = getHandle(pEdge->m_handle);
+ Handle* handle1 = getHandle(pPrev->m_handle);
+ const int axis1 = (1 << axis) & 3;
+ const int axis2 = (1 << axis1) & 3;
+
+ if (updateOverlaps
+#ifdef USE_OVERLAP_TEST_ON_REMOVES
+ && testOverlap2D(handle0,handle1,axis1,axis2)
+#endif //USE_OVERLAP_TEST_ON_REMOVES
+ )
{
//this is done during the overlappingpairarray iteration/narrowphase collision
- Handle* handle0 = getHandle(pEdge->m_handle);
- Handle* handle1 = getHandle(pPrev->m_handle);
+
m_pairCache->removeOverlappingPair(handle0,handle1,dispatcher);
if (m_userPairCallback)
m_userPairCallback->removeOverlappingPair(handle0,handle1,dispatcher);
@@ -850,10 +848,13 @@ void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMaxUp(int axis, BP_FP_INT_TYPE ed
{
Handle* pHandleNext = getHandle(pNext->m_handle);
+ const int axis1 = (1 << axis) & 3;
+ const int axis2 = (1 << axis1) & 3;
+
if (!pNext->IsMax())
{
// if next edge is a minimum check the bounds and add an overlap if necessary
- if (updateOverlaps && testOverlap(axis, pHandleEdge, pHandleNext))
+ if (updateOverlaps && testOverlap2D(pHandleEdge, pHandleNext,axis1,axis2))
{
Handle* handle0 = getHandle(pEdge->m_handle);
Handle* handle1 = getHandle(pNext->m_handle);
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.cpp b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
index fade71179e6..7c41c8d8f71 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
@@ -663,24 +663,23 @@ Benchmarking dbvt...
Leaves: 8192
sizeof(btDbvtVolume): 32 bytes
sizeof(btDbvtNode): 44 bytes
-[1] btDbvtVolume intersections: 3537 ms (0%)
-[2] btDbvtVolume merges: 1945 ms (0%)
-[3] btDbvt::collideTT: 6646 ms (0%)
-[4] btDbvt::collideTT self: 3389 ms (0%)
-[5] btDbvt::collideTT xform: 7505 ms (0%)
-[6] btDbvt::collideTT xform,self: 7480 ms (0%)
-[7] btDbvt::collideRAY: 6307 ms (0%),(332511 r/s)
-[8] insert/remove: 2105 ms (-3%),(996271 ir/s)
-[9] updates (teleport): 1943 ms (0%),(1079337 u/s)
-[10] updates (jitter): 1301 ms (0%),(1611953 u/s)
-[11] optimize (incremental): 2510 ms (0%),(1671000 o/s)
-[12] btDbvtVolume notequal: 3677 ms (0%)
-[13] culling(OCL+fullsort): 2231 ms (0%),(458 t/s)
-[14] culling(OCL+qsort): 3500 ms (0%),(2340 t/s)
-[15] culling(KDOP+qsort): 1151 ms (0%),(7117 t/s)
-[16] insert/remove batch(256): 5138 ms (0%),(816330 bir/s)
-[17] btDbvtVolume proximity: 2842 ms (0%)
-[18] btDbvtVolume select: 3390 ms (0%)
+[1] btDbvtVolume intersections: 3499 ms (-1%)
+[2] btDbvtVolume merges: 1934 ms (0%)
+[3] btDbvt::collideTT: 5485 ms (-21%)
+[4] btDbvt::collideTT self: 2814 ms (-20%)
+[5] btDbvt::collideTT xform: 7379 ms (-1%)
+[6] btDbvt::collideTT xform,self: 7270 ms (-2%)
+[7] btDbvt::collideRAY: 6314 ms (0%),(332143 r/s)
+[8] insert/remove: 2093 ms (0%),(1001983 ir/s)
+[9] updates (teleport): 1879 ms (-3%),(1116100 u/s)
+[10] updates (jitter): 1244 ms (-4%),(1685813 u/s)
+[11] optimize (incremental): 2514 ms (0%),(1668000 o/s)
+[12] btDbvtVolume notequal: 3659 ms (0%)
+[13] culling(OCL+fullsort): 2218 ms (0%),(461 t/s)
+[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
+[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
+[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
+[17] btDbvtVolume select: 3419 ms (0%)
*/
struct btDbvtBenchmark
@@ -787,7 +786,7 @@ static const bool cfgEnable = true;
//[1] btDbvtVolume intersections
bool cfgBenchmark1_Enable = cfgEnable;
static const int cfgBenchmark1_Iterations = 8;
-static const int cfgBenchmark1_Reference = 3537;
+static const int cfgBenchmark1_Reference = 3499;
//[2] btDbvtVolume merges
bool cfgBenchmark2_Enable = cfgEnable;
static const int cfgBenchmark2_Iterations = 4;
@@ -795,21 +794,21 @@ static const int cfgBenchmark2_Reference = 1945;
//[3] btDbvt::collideTT
bool cfgBenchmark3_Enable = cfgEnable;
static const int cfgBenchmark3_Iterations = 512;
-static const int cfgBenchmark3_Reference = 6646;
+static const int cfgBenchmark3_Reference = 5485;
//[4] btDbvt::collideTT self
bool cfgBenchmark4_Enable = cfgEnable;
static const int cfgBenchmark4_Iterations = 512;
-static const int cfgBenchmark4_Reference = 3389;
+static const int cfgBenchmark4_Reference = 2814;
//[5] btDbvt::collideTT xform
bool cfgBenchmark5_Enable = cfgEnable;
static const int cfgBenchmark5_Iterations = 512;
static const btScalar cfgBenchmark5_OffsetScale = 2;
-static const int cfgBenchmark5_Reference = 7505;
+static const int cfgBenchmark5_Reference = 7379;
//[6] btDbvt::collideTT xform,self
bool cfgBenchmark6_Enable = cfgEnable;
static const int cfgBenchmark6_Iterations = 512;
static const btScalar cfgBenchmark6_OffsetScale = 2;
-static const int cfgBenchmark6_Reference = 7480;
+static const int cfgBenchmark6_Reference = 7270;
//[7] btDbvt::collideRAY
bool cfgBenchmark7_Enable = cfgEnable;
static const int cfgBenchmark7_Passes = 32;
@@ -824,13 +823,13 @@ static const int cfgBenchmark8_Reference = 2105;
bool cfgBenchmark9_Enable = cfgEnable;
static const int cfgBenchmark9_Passes = 32;
static const int cfgBenchmark9_Iterations = 65536;
-static const int cfgBenchmark9_Reference = 1943;
+static const int cfgBenchmark9_Reference = 1879;
//[10] updates (jitter)
bool cfgBenchmark10_Enable = cfgEnable;
static const btScalar cfgBenchmark10_Scale = cfgVolumeCenterScale/10000;
static const int cfgBenchmark10_Passes = 32;
static const int cfgBenchmark10_Iterations = 65536;
-static const int cfgBenchmark10_Reference = 1301;
+static const int cfgBenchmark10_Reference = 1244;
//[11] optimize (incremental)
bool cfgBenchmark11_Enable = cfgEnable;
static const int cfgBenchmark11_Passes = 64;
@@ -857,14 +856,10 @@ bool cfgBenchmark16_Enable = cfgEnable;
static const int cfgBenchmark16_BatchCount = 256;
static const int cfgBenchmark16_Passes = 16384;
static const int cfgBenchmark16_Reference = 5138;
-//[17] proximity
+//[17] select
bool cfgBenchmark17_Enable = cfgEnable;
-static const int cfgBenchmark17_Iterations = 8;
-static const int cfgBenchmark17_Reference = 2842;
-//[18] select
-bool cfgBenchmark18_Enable = cfgEnable;
-static const int cfgBenchmark18_Iterations = 4;
-static const int cfgBenchmark18_Reference = 3390;
+static const int cfgBenchmark17_Iterations = 4;
+static const int cfgBenchmark17_Reference = 3390;
btClock wallclock;
printf("Benchmarking dbvt...\r\n");
@@ -1259,32 +1254,6 @@ if(cfgBenchmark17_Enable)
{// Benchmark 17
srand(380843);
btAlignedObjectArray<btDbvtVolume> volumes;
- btAlignedObjectArray<btScalar> results;
- volumes.resize(cfgLeaves);
- results.resize(cfgLeaves);
- for(int i=0;i<cfgLeaves;++i)
- {
- volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
- }
- printf("[17] btDbvtVolume proximity: ");
- wallclock.reset();
- for(int i=0;i<cfgBenchmark17_Iterations;++i)
- {
- for(int j=0;j<cfgLeaves;++j)
- {
- for(int k=0;k<cfgLeaves;++k)
- {
- results[k]=Proximity(volumes[j],volumes[k]);
- }
- }
- }
- const int time=(int)wallclock.getTimeMilliseconds();
- printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
- }
-if(cfgBenchmark18_Enable)
- {// Benchmark 18
- srand(380843);
- btAlignedObjectArray<btDbvtVolume> volumes;
btAlignedObjectArray<int> results;
btAlignedObjectArray<int> indices;
volumes.resize(cfgLeaves);
@@ -1299,9 +1268,9 @@ if(cfgBenchmark18_Enable)
{
btSwap(indices[i],indices[rand()%cfgLeaves]);
}
- printf("[18] btDbvtVolume select: ");
+ printf("[17] btDbvtVolume select: ");
wallclock.reset();
- for(int i=0;i<cfgBenchmark18_Iterations;++i)
+ for(int i=0;i<cfgBenchmark17_Iterations;++i)
{
for(int j=0;j<cfgLeaves;++j)
{
@@ -1313,7 +1282,7 @@ if(cfgBenchmark18_Enable)
}
}
const int time=(int)wallclock.getTimeMilliseconds();
- printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark18_Reference)*100/time);
+ printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
}
printf("\r\n\r\n");
}
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
index 10f94627c37..da296445e81 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
@@ -31,7 +31,7 @@ subject to the following restrictions:
#define DBVT_IMPL_SSE 1 // SSE
// Template implementation of ICollide
-#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER
+#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds
#if (defined (_MSC_VER) && _MSC_VER >= 1400)
#define DBVT_USE_TEMPLATE 1
#else
@@ -41,6 +41,9 @@ subject to the following restrictions:
#define DBVT_USE_TEMPLATE 0
#endif
+// Use only intrinsics instead of inline asm
+#define DBVT_USE_INTRINSIC_SSE 1
+
// Using memmov for collideOCL
#define DBVT_USE_MEMMOVE 1
@@ -57,14 +60,21 @@ subject to the following restrictions:
#endif
// Specific methods implementation
-#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER
-#define DBVT_PROXIMITY_IMPL DBVT_IMPL_SSE
+
+#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE
+#define DBVT_INT0_IMPL DBVT_IMPL_SSE
#else
-#define DBVT_PROXIMITY_IMPL DBVT_IMPL_GENERIC
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
+#define DBVT_INT0_IMPL DBVT_IMPL_GENERIC
+#endif
+
+#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \
+ (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \
+ (DBVT_INT0_IMPL==DBVT_IMPL_SSE)
+#include <emmintrin.h>
#endif
//
@@ -104,10 +114,6 @@ subject to the following restrictions:
#error "DBVT_ENABLE_BENCHMARK undefined"
#endif
-#ifndef DBVT_PROXIMITY_IMPL
-#error "DBVT_PROXIMITY_IMPL undefined"
-#endif
-
#ifndef DBVT_SELECT_IMPL
#error "DBVT_SELECT_IMPL undefined"
#endif
@@ -116,6 +122,10 @@ subject to the following restrictions:
#error "DBVT_MERGE_IMPL undefined"
#endif
+#ifndef DBVT_INT0_IMPL
+#error "DBVT_INT0_IMPL undefined"
+#endif
+
//
// Defaults volumes
//
@@ -133,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
-DBVT_INLINE void Expand(const btVector3 e);
-DBVT_INLINE void SignedExpand(const btVector3 e);
+DBVT_INLINE void Expand(const btVector3& e);
+DBVT_INLINE void SignedExpand(const btVector3& e);
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
@@ -173,12 +183,12 @@ struct btDbvtNode
{
btDbvtVolume volume;
btDbvtNode* parent;
- bool isleaf() const { return(childs[1]==0); }
- bool isinternal() const { return(!isleaf()); }
+ DBVT_INLINE bool isleaf() const { return(childs[1]==0); }
+ DBVT_INLINE bool isinternal() const { return(!isleaf()); }
union {
- btDbvtNode* childs[2];
- void* data;
- };
+ btDbvtNode* childs[2];
+ void* data;
+ };
};
///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
@@ -186,8 +196,6 @@ struct btDbvtNode
///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
struct btDbvt
{
-
-
/* Stack element */
struct sStkNN
{
@@ -250,8 +258,8 @@ struct btDbvt
};
// Fields
- btDbvtNode* m_root;
- btDbvtNode* m_free;
+ btDbvtNode* m_root;
+ btDbvtNode* m_free;
int m_lkhd;
int m_leaves;
unsigned m_opath;
@@ -408,17 +416,17 @@ return(box);
}
//
-DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e)
+DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e)
{
mi-=e;mx+=e;
}
//
-DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e)
+DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e)
{
-if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x());
-if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y());
-if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z());
+if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]);
+if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]);
+if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]);
}
//
@@ -486,12 +494,19 @@ for(int i=0;i<3;++i)
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
const btDbvtAabbMm& b)
{
+#if DBVT_INT0_IMPL == DBVT_IMPL_SSE
+const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
+ _mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
+const __int32* pu((const __int32*)&rt);
+return((pu[0]|pu[1]|pu[2])==0);
+#else
return( (a.mi.x()<=b.mx.x())&&
(a.mx.x()>=b.mi.x())&&
(a.mi.y()<=b.mx.y())&&
(a.mx.y()>=b.mi.y())&&
(a.mi.z()<=b.mx.z())&&
(a.mx.z()>=b.mi.z()));
+#endif
}
//
@@ -558,32 +573,8 @@ return(txmax>0);
DBVT_INLINE btScalar Proximity( const btDbvtAabbMm& a,
const btDbvtAabbMm& b)
{
-#if DBVT_PROXIMITY_IMPL == DBVT_IMPL_SSE
-DBVT_ALIGN btScalar r[1];
-static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
-__asm
- {
- mov eax,a
- mov ecx,b
- movaps xmm0,[eax]
- movaps xmm2,[ecx]
- movaps xmm1,[eax+16]
- movaps xmm3,[ecx+16]
- addps xmm0,xmm1
- addps xmm2,xmm3
- subps xmm0,xmm2
- andps xmm0,mask
- movhlps xmm1,xmm0
- addps xmm0,xmm1
- pshufd xmm1,xmm0,1
- addss xmm0,xmm1
- movss r,xmm0
- }
-return(r[0]);
-#else
const btVector3 d=(a.mi+a.mx)-(b.mi+b.mx);
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
-#endif
}
//
@@ -592,36 +583,57 @@ DBVT_INLINE int Select( const btDbvtAabbMm& o,
const btDbvtAabbMm& b)
{
#if DBVT_SELECT_IMPL == DBVT_IMPL_SSE
-DBVT_ALIGN __int32 r[1];
static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
-__asm
- {
- mov eax,o
- mov ecx,a
- mov edx,b
- movaps xmm0,[eax]
- movaps xmm5,mask
- addps xmm0,[eax+16]
- movaps xmm1,[ecx]
- movaps xmm2,[edx]
- addps xmm1,[ecx+16]
- addps xmm2,[edx+16]
- subps xmm1,xmm0
- subps xmm2,xmm0
- andps xmm1,xmm5
- andps xmm2,xmm5
- movhlps xmm3,xmm1
- movhlps xmm4,xmm2
- addps xmm1,xmm3
- addps xmm2,xmm4
- pshufd xmm3,xmm1,1
- pshufd xmm4,xmm2,1
- addss xmm1,xmm3
- addss xmm2,xmm4
- cmpless xmm2,xmm1
- movss r,xmm2
- }
-return(r[0]&1);
+ // TODO: the intrinsic version is 11% slower
+ #if DBVT_USE_INTRINSIC_SSE
+ __m128 omi(_mm_load_ps(o.mi));
+ omi=_mm_add_ps(omi,_mm_load_ps(o.mx));
+ __m128 ami(_mm_load_ps(a.mi));
+ ami=_mm_add_ps(ami,_mm_load_ps(a.mx));
+ ami=_mm_sub_ps(ami,omi);
+ ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask));
+ __m128 bmi(_mm_load_ps(b.mi));
+ bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx));
+ bmi=_mm_sub_ps(bmi,omi);
+ bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask));
+ __m128 t0(_mm_movehl_ps(ami,ami));
+ ami=_mm_add_ps(ami,t0);
+ ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1));
+ __m128 t1(_mm_movehl_ps(bmi,bmi));
+ bmi=_mm_add_ps(bmi,t1);
+ bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1));
+ return(_mm_cmple_ss(bmi,ami).m128_u32[0]&1);
+ #else
+ DBVT_ALIGN __int32 r[1];
+ __asm
+ {
+ mov eax,o
+ mov ecx,a
+ mov edx,b
+ movaps xmm0,[eax]
+ movaps xmm5,mask
+ addps xmm0,[eax+16]
+ movaps xmm1,[ecx]
+ movaps xmm2,[edx]
+ addps xmm1,[ecx+16]
+ addps xmm2,[edx+16]
+ subps xmm1,xmm0
+ subps xmm2,xmm0
+ andps xmm1,xmm5
+ andps xmm2,xmm5
+ movhlps xmm3,xmm1
+ movhlps xmm4,xmm2
+ addps xmm1,xmm3
+ addps xmm2,xmm4
+ pshufd xmm3,xmm1,1
+ pshufd xmm4,xmm2,1
+ addss xmm1,xmm3
+ addss xmm2,xmm4
+ cmpless xmm2,xmm1
+ movss r,xmm2
+ }
+ return(r[0]&1);
+ #endif
#else
return(Proximity(o,a)<Proximity(o,b)?0:1);
#endif
@@ -633,20 +645,14 @@ DBVT_INLINE void Merge( const btDbvtAabbMm& a,
btDbvtAabbMm& r)
{
#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
-__asm
- {
- mov eax,a
- mov edx,b
- mov ecx,r
- movaps xmm0,[eax+0]
- movaps xmm1,[edx+0]
- movaps xmm2,[eax+16]
- movaps xmm3,[edx+16]
- minps xmm0,xmm1
- maxps xmm2,xmm3
- movaps [ecx+0],xmm0
- movaps [ecx+16],xmm2
- }
+__m128 ami(_mm_load_ps(a.mi));
+__m128 amx(_mm_load_ps(a.mx));
+__m128 bmi(_mm_load_ps(b.mi));
+__m128 bmx(_mm_load_ps(b.mx));
+ami=_mm_min_ps(ami,bmi);
+amx=_mm_max_ps(amx,bmx);
+_mm_store_ps(r.mi,ami);
+_mm_store_ps(r.mx,amx);
#else
for(int i=0;i<3;++i)
{
@@ -717,7 +723,7 @@ if(root0&&root1)
int treshold=DOUBLE_STACKSIZE-4;
stack.resize(DOUBLE_STACKSIZE);
stack[0]=sStkNN(root0,root1);
- do {
+ do {
sStkNN p=stack[--depth];
if(depth>treshold)
{
@@ -838,12 +844,13 @@ collideTT(root0,root1,xform,policy);
//
DBVT_PREFIX
inline void btDbvt::collideTV( const btDbvtNode* root,
- const btDbvtVolume& volume,
+ const btDbvtVolume& vol,
DBVT_IPOLICY)
{
DBVT_CHECKTYPE
if(root)
{
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) volume(vol);
btAlignedObjectArray<const btDbvtNode*> stack;
stack.reserve(SIMPLE_STACKSIZE);
stack.push_back(root);
@@ -1095,7 +1102,10 @@ if(root)
#undef DBVT_IPOLICY
#undef DBVT_CHECKTYPE
#undef DBVT_IMPL_GENERIC
-#undef DBVT_IMPL_FPU0x86
#undef DBVT_IMPL_SSE
+#undef DBVT_USE_INTRINSIC_SSE
+#undef DBVT_SELECT_IMPL
+#undef DBVT_MERGE_IMPL
+#undef DBVT_INT0_IMPL
#endif
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
index 2d27f22567f..a57952ffa06 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
@@ -55,17 +55,15 @@ btSimpleBroadphase::btSimpleBroadphase(int maxProxies, btOverlappingPairCache* o
m_maxHandles = maxProxies;
m_numHandles = 0;
m_firstFreeHandle = 0;
- m_firstAllocatedHandle = -1;
+
{
for (int i = m_firstFreeHandle; i < maxProxies; i++)
{
m_pHandles[i].SetNextFree(i + 1);
m_pHandles[i].m_uniqueId = i+2;//any UID will do, we just avoid too trivial values (0,1) for debugging purposes
- m_pHandles[i].SetNextAllocated(-1);
}
m_pHandles[maxProxies - 1].SetNextFree(0);
- m_pHandles[maxProxies - 1].SetNextAllocated(-1);
}
@@ -179,31 +177,29 @@ void btSimpleBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
//first check for new overlapping pairs
int i,j;
- if (m_firstAllocatedHandle >= 0)
+ if (m_numHandles >= 0)
{
- btSimpleBroadphaseProxy* proxy0 = &m_pHandles[m_firstAllocatedHandle];
-
for (i=0;i<m_numHandles;i++)
{
- btSimpleBroadphaseProxy* proxy1 = &m_pHandles[m_firstAllocatedHandle];
+ btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
- for (j=0;j<m_numHandles;j++)
+ for (j=i+1;j<m_numHandles;j++)
{
-
- if (proxy0 != proxy1)
- {
- btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
- btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+ btSimpleBroadphaseProxy* proxy1 = &m_pHandles[j];
+ btAssert(proxy0 != proxy1);
- if (aabbOverlap(p0,p1))
- {
- if ( !m_pairCache->findPair(proxy0,proxy1))
- {
- m_pairCache->addOverlappingPair(proxy0,proxy1);
- }
- } else
+ btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
+ btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+
+ if (aabbOverlap(p0,p1))
+ {
+ if ( !m_pairCache->findPair(proxy0,proxy1))
{
+ m_pairCache->addOverlappingPair(proxy0,proxy1);
+ }
+ } else
+ {
if (!m_pairCache->hasDeferredRemoval())
{
if ( m_pairCache->findPair(proxy0,proxy1))
@@ -211,19 +207,13 @@ void btSimpleBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
-
- }
}
- proxy1 = &m_pHandles[proxy1->GetNextAllocated()];
-
}
- proxy0 = &m_pHandles[proxy0->GetNextAllocated()];
-
}
if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
{
-
+
btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray();
//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
@@ -237,11 +227,11 @@ void btSimpleBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
previousPair.m_pProxy0 = 0;
previousPair.m_pProxy1 = 0;
previousPair.m_algorithm = 0;
-
-
+
+
for (i=0;i<overlappingPairArray.size();i++)
{
-
+
btBroadphasePair& pair = overlappingPairArray[i];
bool isDuplicate = (pair == previousPair);
@@ -268,31 +258,31 @@ void btSimpleBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
//should have no algorithm
btAssert(!pair.m_algorithm);
}
-
+
if (needsRemoval)
{
m_pairCache->cleanOverlappingPair(pair,dispatcher);
- // m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
- // m_overlappingPairArray.pop_back();
+ // m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
+ // m_overlappingPairArray.pop_back();
pair.m_pProxy0 = 0;
pair.m_pProxy1 = 0;
m_invalidPair++;
gOverlappingPairs--;
}
-
+
}
- ///if you don't like to skip the invalid pairs in the array, execute following code:
- #define CLEAN_INVALID_PAIRS 1
- #ifdef CLEAN_INVALID_PAIRS
+ ///if you don't like to skip the invalid pairs in the array, execute following code:
+#define CLEAN_INVALID_PAIRS 1
+#ifdef CLEAN_INVALID_PAIRS
//perform a sort, to sort 'invalid' pairs to the end
overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
m_invalidPair = 0;
- #endif//CLEAN_INVALID_PAIRS
+#endif//CLEAN_INVALID_PAIRS
}
}
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
index 49dfeb84900..e2ebb825725 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
@@ -25,7 +25,7 @@ struct btSimpleBroadphaseProxy : public btBroadphaseProxy
btVector3 m_min;
btVector3 m_max;
int m_nextFree;
- int m_nextAllocated;
+
// int m_handleId;
@@ -42,8 +42,7 @@ struct btSimpleBroadphaseProxy : public btBroadphaseProxy
SIMD_FORCE_INLINE void SetNextFree(int next) {m_nextFree = next;}
SIMD_FORCE_INLINE int GetNextFree() const {return m_nextFree;}
- SIMD_FORCE_INLINE void SetNextAllocated(int next) {m_nextAllocated = next;}
- SIMD_FORCE_INLINE int GetNextAllocated() const {return m_nextAllocated;}
+
};
@@ -57,22 +56,18 @@ protected:
int m_numHandles; // number of active handles
int m_maxHandles; // max number of handles
+
btSimpleBroadphaseProxy* m_pHandles; // handles pool
+
void* m_pHandlesRawPtr;
int m_firstFreeHandle; // free handles list
- int m_firstAllocatedHandle;
-
+
int allocHandle()
{
-
+ btAssert(m_numHandles < m_maxHandles);
int freeHandle = m_firstFreeHandle;
m_firstFreeHandle = m_pHandles[freeHandle].GetNextFree();
-
- m_pHandles[freeHandle].SetNextAllocated(m_firstAllocatedHandle);
- m_firstAllocatedHandle = freeHandle;
-
m_numHandles++;
-
return freeHandle;
}
@@ -84,13 +79,9 @@ protected:
proxy->SetNextFree(m_firstFreeHandle);
m_firstFreeHandle = handle;
- m_firstAllocatedHandle = proxy->GetNextAllocated();
- proxy->SetNextAllocated(-1);
-
m_numHandles--;
}
-
btOverlappingPairCache* m_pairCache;
bool m_ownsPairCache;