diff options
Diffstat (limited to 'extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h')
-rw-r--r-- | extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h | 198 |
1 files changed, 104 insertions, 94 deletions
diff --git a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h index 10f94627c37..da296445e81 100644 --- a/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h +++ b/extern/bullet2/src/BulletCollision/BroadphaseCollision/btDbvt.h @@ -31,7 +31,7 @@ subject to the following restrictions: #define DBVT_IMPL_SSE 1 // SSE // Template implementation of ICollide -#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER +#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds #if (defined (_MSC_VER) && _MSC_VER >= 1400) #define DBVT_USE_TEMPLATE 1 #else @@ -41,6 +41,9 @@ subject to the following restrictions: #define DBVT_USE_TEMPLATE 0 #endif +// Use only intrinsics instead of inline asm +#define DBVT_USE_INTRINSIC_SSE 1 + // Using memmov for collideOCL #define DBVT_USE_MEMMOVE 1 @@ -57,14 +60,21 @@ subject to the following restrictions: #endif // Specific methods implementation -#ifdef WIN32_AVOID_WHEN_EMBEDDED_INSIDE_BLENDER -#define DBVT_PROXIMITY_IMPL DBVT_IMPL_SSE + +#ifdef WIN32_AVOID_SSE_WHEN_EMBEDDED_INSIDE_BLENDER //there is always some weird compiler that breaks SSE builds #define DBVT_SELECT_IMPL DBVT_IMPL_SSE #define DBVT_MERGE_IMPL DBVT_IMPL_SSE +#define DBVT_INT0_IMPL DBVT_IMPL_SSE #else -#define DBVT_PROXIMITY_IMPL DBVT_IMPL_GENERIC #define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC #define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC +#define DBVT_INT0_IMPL DBVT_IMPL_GENERIC +#endif + +#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \ + (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \ + (DBVT_INT0_IMPL==DBVT_IMPL_SSE) +#include <emmintrin.h> #endif // @@ -104,10 +114,6 @@ subject to the following restrictions: #error "DBVT_ENABLE_BENCHMARK undefined" #endif -#ifndef DBVT_PROXIMITY_IMPL -#error "DBVT_PROXIMITY_IMPL undefined" -#endif - #ifndef DBVT_SELECT_IMPL #error "DBVT_SELECT_IMPL undefined" #endif @@ -116,6 +122,10 @@ subject to the following restrictions: #error "DBVT_MERGE_IMPL undefined" #endif +#ifndef DBVT_INT0_IMPL +#error "DBVT_INT0_IMPL undefined" +#endif + // // Defaults volumes // @@ -133,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r); static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx); static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n); static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n); -DBVT_INLINE void Expand(const btVector3 e); -DBVT_INLINE void SignedExpand(const btVector3 e); +DBVT_INLINE void Expand(const btVector3& e); +DBVT_INLINE void SignedExpand(const btVector3& e); DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const; DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const; DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const; @@ -173,12 +183,12 @@ struct btDbvtNode { btDbvtVolume volume; btDbvtNode* parent; - bool isleaf() const { return(childs[1]==0); } - bool isinternal() const { return(!isleaf()); } + DBVT_INLINE bool isleaf() const { return(childs[1]==0); } + DBVT_INLINE bool isinternal() const { return(!isleaf()); } union { - btDbvtNode* childs[2]; - void* data; - }; + btDbvtNode* childs[2]; + void* data; + }; }; ///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree). @@ -186,8 +196,6 @@ struct btDbvtNode ///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure. struct btDbvt { - - /* Stack element */ struct sStkNN { @@ -250,8 +258,8 @@ struct btDbvt }; // Fields - btDbvtNode* m_root; - btDbvtNode* m_free; + btDbvtNode* m_root; + btDbvtNode* m_free; int m_lkhd; int m_leaves; unsigned m_opath; @@ -408,17 +416,17 @@ return(box); } // -DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e) { mi-=e;mx+=e; } // -DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e) { -if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x()); -if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y()); -if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z()); +if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]); +if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]); +if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]); } // @@ -486,12 +494,19 @@ for(int i=0;i<3;++i) DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { +#if DBVT_INT0_IMPL == DBVT_IMPL_SSE +const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)), + _mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi)))); +const __int32* pu((const __int32*)&rt); +return((pu[0]|pu[1]|pu[2])==0); +#else return( (a.mi.x()<=b.mx.x())&& (a.mx.x()>=b.mi.x())&& (a.mi.y()<=b.mx.y())&& (a.mx.y()>=b.mi.y())&& (a.mi.z()<=b.mx.z())&& (a.mx.z()>=b.mi.z())); +#endif } // @@ -558,32 +573,8 @@ return(txmax>0); DBVT_INLINE btScalar Proximity( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { -#if DBVT_PROXIMITY_IMPL == DBVT_IMPL_SSE -DBVT_ALIGN btScalar r[1]; -static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; -__asm - { - mov eax,a - mov ecx,b - movaps xmm0,[eax] - movaps xmm2,[ecx] - movaps xmm1,[eax+16] - movaps xmm3,[ecx+16] - addps xmm0,xmm1 - addps xmm2,xmm3 - subps xmm0,xmm2 - andps xmm0,mask - movhlps xmm1,xmm0 - addps xmm0,xmm1 - pshufd xmm1,xmm0,1 - addss xmm0,xmm1 - movss r,xmm0 - } -return(r[0]); -#else const btVector3 d=(a.mi+a.mx)-(b.mi+b.mx); return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z())); -#endif } // @@ -592,36 +583,57 @@ DBVT_INLINE int Select( const btDbvtAabbMm& o, const btDbvtAabbMm& b) { #if DBVT_SELECT_IMPL == DBVT_IMPL_SSE -DBVT_ALIGN __int32 r[1]; static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; -__asm - { - mov eax,o - mov ecx,a - mov edx,b - movaps xmm0,[eax] - movaps xmm5,mask - addps xmm0,[eax+16] - movaps xmm1,[ecx] - movaps xmm2,[edx] - addps xmm1,[ecx+16] - addps xmm2,[edx+16] - subps xmm1,xmm0 - subps xmm2,xmm0 - andps xmm1,xmm5 - andps xmm2,xmm5 - movhlps xmm3,xmm1 - movhlps xmm4,xmm2 - addps xmm1,xmm3 - addps xmm2,xmm4 - pshufd xmm3,xmm1,1 - pshufd xmm4,xmm2,1 - addss xmm1,xmm3 - addss xmm2,xmm4 - cmpless xmm2,xmm1 - movss r,xmm2 - } -return(r[0]&1); + // TODO: the intrinsic version is 11% slower + #if DBVT_USE_INTRINSIC_SSE + __m128 omi(_mm_load_ps(o.mi)); + omi=_mm_add_ps(omi,_mm_load_ps(o.mx)); + __m128 ami(_mm_load_ps(a.mi)); + ami=_mm_add_ps(ami,_mm_load_ps(a.mx)); + ami=_mm_sub_ps(ami,omi); + ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask)); + __m128 bmi(_mm_load_ps(b.mi)); + bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx)); + bmi=_mm_sub_ps(bmi,omi); + bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask)); + __m128 t0(_mm_movehl_ps(ami,ami)); + ami=_mm_add_ps(ami,t0); + ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1)); + __m128 t1(_mm_movehl_ps(bmi,bmi)); + bmi=_mm_add_ps(bmi,t1); + bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1)); + return(_mm_cmple_ss(bmi,ami).m128_u32[0]&1); + #else + DBVT_ALIGN __int32 r[1]; + __asm + { + mov eax,o + mov ecx,a + mov edx,b + movaps xmm0,[eax] + movaps xmm5,mask + addps xmm0,[eax+16] + movaps xmm1,[ecx] + movaps xmm2,[edx] + addps xmm1,[ecx+16] + addps xmm2,[edx+16] + subps xmm1,xmm0 + subps xmm2,xmm0 + andps xmm1,xmm5 + andps xmm2,xmm5 + movhlps xmm3,xmm1 + movhlps xmm4,xmm2 + addps xmm1,xmm3 + addps xmm2,xmm4 + pshufd xmm3,xmm1,1 + pshufd xmm4,xmm2,1 + addss xmm1,xmm3 + addss xmm2,xmm4 + cmpless xmm2,xmm1 + movss r,xmm2 + } + return(r[0]&1); + #endif #else return(Proximity(o,a)<Proximity(o,b)?0:1); #endif @@ -633,20 +645,14 @@ DBVT_INLINE void Merge( const btDbvtAabbMm& a, btDbvtAabbMm& r) { #if DBVT_MERGE_IMPL==DBVT_IMPL_SSE -__asm - { - mov eax,a - mov edx,b - mov ecx,r - movaps xmm0,[eax+0] - movaps xmm1,[edx+0] - movaps xmm2,[eax+16] - movaps xmm3,[edx+16] - minps xmm0,xmm1 - maxps xmm2,xmm3 - movaps [ecx+0],xmm0 - movaps [ecx+16],xmm2 - } +__m128 ami(_mm_load_ps(a.mi)); +__m128 amx(_mm_load_ps(a.mx)); +__m128 bmi(_mm_load_ps(b.mi)); +__m128 bmx(_mm_load_ps(b.mx)); +ami=_mm_min_ps(ami,bmi); +amx=_mm_max_ps(amx,bmx); +_mm_store_ps(r.mi,ami); +_mm_store_ps(r.mx,amx); #else for(int i=0;i<3;++i) { @@ -717,7 +723,7 @@ if(root0&&root1) int treshold=DOUBLE_STACKSIZE-4; stack.resize(DOUBLE_STACKSIZE); stack[0]=sStkNN(root0,root1); - do { + do { sStkNN p=stack[--depth]; if(depth>treshold) { @@ -838,12 +844,13 @@ collideTT(root0,root1,xform,policy); // DBVT_PREFIX inline void btDbvt::collideTV( const btDbvtNode* root, - const btDbvtVolume& volume, + const btDbvtVolume& vol, DBVT_IPOLICY) { DBVT_CHECKTYPE if(root) { + ATTRIBUTE_ALIGNED16(btDbvtVolume) volume(vol); btAlignedObjectArray<const btDbvtNode*> stack; stack.reserve(SIMPLE_STACKSIZE); stack.push_back(root); @@ -1095,7 +1102,10 @@ if(root) #undef DBVT_IPOLICY #undef DBVT_CHECKTYPE #undef DBVT_IMPL_GENERIC -#undef DBVT_IMPL_FPU0x86 #undef DBVT_IMPL_SSE +#undef DBVT_USE_INTRINSIC_SSE +#undef DBVT_SELECT_IMPL +#undef DBVT_MERGE_IMPL +#undef DBVT_INT0_IMPL #endif |