Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h')
-rw-r--r--extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h198
1 files changed, 104 insertions, 94 deletions
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
index 10f94627c37..da296445e81 100644
--- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
+++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h
@@ -31,7 +31,7 @@ subject to the following restrictions:
#define DBVT_IMPL_SSE 1 // SSE
// Template implementation of ICollide
-#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER
+#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds
#if (defined (_MSC_VER) && _MSC_VER >= 1400)
#define DBVT_USE_TEMPLATE 1
#else
@@ -41,6 +41,9 @@ subject to the following restrictions:
#define DBVT_USE_TEMPLATE 0
#endif
+// Use only intrinsics instead of inline asm
+#define DBVT_USE_INTRINSIC_SSE 1
+
// Using memmov for collideOCL
#define DBVT_USE_MEMMOVE 1
@@ -57,14 +60,21 @@ subject to the following restrictions:
#endif
// Specific methods implementation
-#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER
-#define DBVT_PROXIMITY_IMPL DBVT_IMPL_SSE
+
+#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE
+#define DBVT_INT0_IMPL DBVT_IMPL_SSE
#else
-#define DBVT_PROXIMITY_IMPL DBVT_IMPL_GENERIC
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
+#define DBVT_INT0_IMPL DBVT_IMPL_GENERIC
+#endif
+
+#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \
+ (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \
+ (DBVT_INT0_IMPL==DBVT_IMPL_SSE)
+#include <emmintrin.h>
#endif
//
@@ -104,10 +114,6 @@ subject to the following restrictions:
#error "DBVT_ENABLE_BENCHMARK undefined"
#endif
-#ifndef DBVT_PROXIMITY_IMPL
-#error "DBVT_PROXIMITY_IMPL undefined"
-#endif
-
#ifndef DBVT_SELECT_IMPL
#error "DBVT_SELECT_IMPL undefined"
#endif
@@ -116,6 +122,10 @@ subject to the following restrictions:
#error "DBVT_MERGE_IMPL undefined"
#endif
+#ifndef DBVT_INT0_IMPL
+#error "DBVT_INT0_IMPL undefined"
+#endif
+
//
// Defaults volumes
//
@@ -133,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
-DBVT_INLINE void Expand(const btVector3 e);
-DBVT_INLINE void SignedExpand(const btVector3 e);
+DBVT_INLINE void Expand(const btVector3& e);
+DBVT_INLINE void SignedExpand(const btVector3& e);
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
@@ -173,12 +183,12 @@ struct btDbvtNode
{
btDbvtVolume volume;
btDbvtNode* parent;
- bool isleaf() const { return(childs[1]==0); }
- bool isinternal() const { return(!isleaf()); }
+ DBVT_INLINE bool isleaf() const { return(childs[1]==0); }
+ DBVT_INLINE bool isinternal() const { return(!isleaf()); }
union {
- btDbvtNode* childs[2];
- void* data;
- };
+ btDbvtNode* childs[2];
+ void* data;
+ };
};
///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
@@ -186,8 +196,6 @@ struct btDbvtNode
///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
struct btDbvt
{
-
-
/* Stack element */
struct sStkNN
{
@@ -250,8 +258,8 @@ struct btDbvt
};
// Fields
- btDbvtNode* m_root;
- btDbvtNode* m_free;
+ btDbvtNode* m_root;
+ btDbvtNode* m_free;
int m_lkhd;
int m_leaves;
unsigned m_opath;
@@ -408,17 +416,17 @@ return(box);
}
//
-DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e)
+DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e)
{
mi-=e;mx+=e;
}
//
-DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e)
+DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e)
{
-if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x());
-if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y());
-if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z());
+if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]);
+if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]);
+if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]);
}
//
@@ -486,12 +494,19 @@ for(int i=0;i<3;++i)
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
const btDbvtAabbMm& b)
{
+#if DBVT_INT0_IMPL == DBVT_IMPL_SSE
+const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
+ _mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
+const __int32* pu((const __int32*)&rt);
+return((pu[0]|pu[1]|pu[2])==0);
+#else
return( (a.mi.x()<=b.mx.x())&&
(a.mx.x()>=b.mi.x())&&
(a.mi.y()<=b.mx.y())&&
(a.mx.y()>=b.mi.y())&&
(a.mi.z()<=b.mx.z())&&
(a.mx.z()>=b.mi.z()));
+#endif
}
//
@@ -558,32 +573,8 @@ return(txmax>0);
DBVT_INLINE btScalar Proximity( const btDbvtAabbMm& a,
const btDbvtAabbMm& b)
{
-#if DBVT_PROXIMITY_IMPL == DBVT_IMPL_SSE
-DBVT_ALIGN btScalar r[1];
-static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
-__asm
- {
- mov eax,a
- mov ecx,b
- movaps xmm0,[eax]
- movaps xmm2,[ecx]
- movaps xmm1,[eax+16]
- movaps xmm3,[ecx+16]
- addps xmm0,xmm1
- addps xmm2,xmm3
- subps xmm0,xmm2
- andps xmm0,mask
- movhlps xmm1,xmm0
- addps xmm0,xmm1
- pshufd xmm1,xmm0,1
- addss xmm0,xmm1
- movss r,xmm0
- }
-return(r[0]);
-#else
const btVector3 d=(a.mi+a.mx)-(b.mi+b.mx);
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
-#endif
}
//
@@ -592,36 +583,57 @@ DBVT_INLINE int Select( const btDbvtAabbMm& o,
const btDbvtAabbMm& b)
{
#if DBVT_SELECT_IMPL == DBVT_IMPL_SSE
-DBVT_ALIGN __int32 r[1];
static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
-__asm
- {
- mov eax,o
- mov ecx,a
- mov edx,b
- movaps xmm0,[eax]
- movaps xmm5,mask
- addps xmm0,[eax+16]
- movaps xmm1,[ecx]
- movaps xmm2,[edx]
- addps xmm1,[ecx+16]
- addps xmm2,[edx+16]
- subps xmm1,xmm0
- subps xmm2,xmm0
- andps xmm1,xmm5
- andps xmm2,xmm5
- movhlps xmm3,xmm1
- movhlps xmm4,xmm2
- addps xmm1,xmm3
- addps xmm2,xmm4
- pshufd xmm3,xmm1,1
- pshufd xmm4,xmm2,1
- addss xmm1,xmm3
- addss xmm2,xmm4
- cmpless xmm2,xmm1
- movss r,xmm2
- }
-return(r[0]&1);
+ // TODO: the intrinsic version is 11% slower
+ #if DBVT_USE_INTRINSIC_SSE
+ __m128 omi(_mm_load_ps(o.mi));
+ omi=_mm_add_ps(omi,_mm_load_ps(o.mx));
+ __m128 ami(_mm_load_ps(a.mi));
+ ami=_mm_add_ps(ami,_mm_load_ps(a.mx));
+ ami=_mm_sub_ps(ami,omi);
+ ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask));
+ __m128 bmi(_mm_load_ps(b.mi));
+ bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx));
+ bmi=_mm_sub_ps(bmi,omi);
+ bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask));
+ __m128 t0(_mm_movehl_ps(ami,ami));
+ ami=_mm_add_ps(ami,t0);
+ ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1));
+ __m128 t1(_mm_movehl_ps(bmi,bmi));
+ bmi=_mm_add_ps(bmi,t1);
+ bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1));
+ return(_mm_cmple_ss(bmi,ami).m128_u32[0]&1);
+ #else
+ DBVT_ALIGN __int32 r[1];
+ __asm
+ {
+ mov eax,o
+ mov ecx,a
+ mov edx,b
+ movaps xmm0,[eax]
+ movaps xmm5,mask
+ addps xmm0,[eax+16]
+ movaps xmm1,[ecx]
+ movaps xmm2,[edx]
+ addps xmm1,[ecx+16]
+ addps xmm2,[edx+16]
+ subps xmm1,xmm0
+ subps xmm2,xmm0
+ andps xmm1,xmm5
+ andps xmm2,xmm5
+ movhlps xmm3,xmm1
+ movhlps xmm4,xmm2
+ addps xmm1,xmm3
+ addps xmm2,xmm4
+ pshufd xmm3,xmm1,1
+ pshufd xmm4,xmm2,1
+ addss xmm1,xmm3
+ addss xmm2,xmm4
+ cmpless xmm2,xmm1
+ movss r,xmm2
+ }
+ return(r[0]&1);
+ #endif
#else
return(Proximity(o,a)<Proximity(o,b)?0:1);
#endif
@@ -633,20 +645,14 @@ DBVT_INLINE void Merge( const btDbvtAabbMm& a,
btDbvtAabbMm& r)
{
#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
-__asm
- {
- mov eax,a
- mov edx,b
- mov ecx,r
- movaps xmm0,[eax+0]
- movaps xmm1,[edx+0]
- movaps xmm2,[eax+16]
- movaps xmm3,[edx+16]
- minps xmm0,xmm1
- maxps xmm2,xmm3
- movaps [ecx+0],xmm0
- movaps [ecx+16],xmm2
- }
+__m128 ami(_mm_load_ps(a.mi));
+__m128 amx(_mm_load_ps(a.mx));
+__m128 bmi(_mm_load_ps(b.mi));
+__m128 bmx(_mm_load_ps(b.mx));
+ami=_mm_min_ps(ami,bmi);
+amx=_mm_max_ps(amx,bmx);
+_mm_store_ps(r.mi,ami);
+_mm_store_ps(r.mx,amx);
#else
for(int i=0;i<3;++i)
{
@@ -717,7 +723,7 @@ if(root0&&root1)
int treshold=DOUBLE_STACKSIZE-4;
stack.resize(DOUBLE_STACKSIZE);
stack[0]=sStkNN(root0,root1);
- do {
+ do {
sStkNN p=stack[--depth];
if(depth>treshold)
{
@@ -838,12 +844,13 @@ collideTT(root0,root1,xform,policy);
//
DBVT_PREFIX
inline void btDbvt::collideTV( const btDbvtNode* root,
- const btDbvtVolume& volume,
+ const btDbvtVolume& vol,
DBVT_IPOLICY)
{
DBVT_CHECKTYPE
if(root)
{
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) volume(vol);
btAlignedObjectArray<const btDbvtNode*> stack;
stack.reserve(SIMPLE_STACKSIZE);
stack.push_back(root);
@@ -1095,7 +1102,10 @@ if(root)
#undef DBVT_IPOLICY
#undef DBVT_CHECKTYPE
#undef DBVT_IMPL_GENERIC
-#undef DBVT_IMPL_FPU0x86
#undef DBVT_IMPL_SSE
+#undef DBVT_USE_INTRINSIC_SSE
+#undef DBVT_SELECT_IMPL
+#undef DBVT_MERGE_IMPL
+#undef DBVT_INT0_IMPL
#endif