Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Leung <aligorith@gmail.com>2010-06-17 06:42:43 +0400
committerJoshua Leung <aligorith@gmail.com>2010-06-17 06:42:43 +0400
commited59822857de7e7b41b33b79c306f5e9b8755c62 (patch)
treeaa15011ba1df16a98d023acf2a276b265bd3ceda /extern/bullet2/BulletMultiThreaded
parent22bca493b23904c4dee7130f5737005f8558a26d (diff)
== SoC Bullet - Bullet Upgrade to 2.76 ==
Updated Blender's Bullet to 2.76 in this branch only. This update was done by: 1) deleting the contents of the existing extern/bullet2/src directory (leaving the .svn folder in place), 2) copy/pasting the contents of the bullet/src directory (from unzipped Bullet archive) into this newly cleared folder. Hopefully there aren't any patches that are still needed from the Bullet we had in source. --- Note: I didn't use Moguri's patch, since that was giving me compile errors with headers not being able to be found. [[Split portion of a mixed commit.]]
Diffstat (limited to 'extern/bullet2/BulletMultiThreaded')
-rw-r--r--extern/bullet2/BulletMultiThreaded/CMakeLists.txt92
-rw-r--r--extern/bullet2/BulletMultiThreaded/Makefile.original187
-rw-r--r--extern/bullet2/BulletMultiThreaded/MiniCL.cpp517
-rw-r--r--extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp74
-rw-r--r--extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h62
-rw-r--r--extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp519
-rw-r--r--extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h194
-rw-r--r--extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h84
-rw-r--r--extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp249
-rw-r--r--extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h124
-rw-r--r--extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h20
-rw-r--r--extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp93
-rw-r--r--extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h92
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp48
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h40
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp318
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h163
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp69
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h120
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h110
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp211
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuFakeDma.h135
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp251
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h72
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp257
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h180
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h172
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp302
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h126
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp242
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h106
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h51
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp1381
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h140
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h19
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp348
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h48
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h70
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp1155
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h66
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt1
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp214
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h54
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt1
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp222
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h153
-rw-r--r--extern/bullet2/BulletMultiThreaded/SpuSync.h148
-rw-r--r--extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp262
-rw-r--r--extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h132
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp590
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h138
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h430
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h61
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h67
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpuDefines.h211
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h55
-rw-r--r--extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h52
-rw-r--r--extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp74
-rw-r--r--extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h42
-rw-r--r--extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp22
-rw-r--r--extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h50
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h225
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h343
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h1630
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h433
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h1426
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h1872
-rw-r--r--extern/bullet2/BulletMultiThreaded/vectormath2bullet.h80
68 files changed, 17495 insertions, 0 deletions
diff --git a/extern/bullet2/BulletMultiThreaded/CMakeLists.txt b/extern/bullet2/BulletMultiThreaded/CMakeLists.txt
new file mode 100644
index 00000000000..90f970afbfd
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/CMakeLists.txt
@@ -0,0 +1,92 @@
+INCLUDE_DIRECTORIES(
+ ${BULLET_PHYSICS_SOURCE_DIR}/src
+ ${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/vectormath/scalar/cpp
+)
+
+ADD_LIBRARY(BulletMultiThreaded
+ PlatformDefinitions.h
+ SpuFakeDma.cpp
+ SpuFakeDma.h
+ SpuDoubleBuffer.h
+ SpuLibspe2Support.cpp
+ SpuLibspe2Support.h
+ btThreadSupportInterface.cpp
+ btThreadSupportInterface.h
+
+ Win32ThreadSupport.cpp
+ Win32ThreadSupport.h
+ PosixThreadSupport.cpp
+ PosixThreadSupport.h
+ SequentialThreadSupport.cpp
+ SequentialThreadSupport.h
+ SpuSampleTaskProcess.h
+ SpuSampleTaskProcess.cpp
+
+ SpuCollisionObjectWrapper.cpp
+ SpuCollisionObjectWrapper.h
+ SpuCollisionTaskProcess.h
+ SpuCollisionTaskProcess.cpp
+ SpuGatheringCollisionDispatcher.h
+ SpuGatheringCollisionDispatcher.cpp
+ SpuContactManifoldCollisionAlgorithm.cpp
+ SpuContactManifoldCollisionAlgorithm.h
+
+ btParallelConstraintSolver.cpp
+ btParallelConstraintSolver.h
+
+ SpuNarrowPhaseCollisionTask/Box.h
+ SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
+ SpuNarrowPhaseCollisionTask/boxBoxDistance.h
+ SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
+ SpuNarrowPhaseCollisionTask/SpuContactResult.h
+ SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
+ SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
+ SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
+ SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
+ SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
+ SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
+ SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
+ SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
+
+
+#Some GPU related stuff, mainly CUDA and perhaps OpenCL
+ btGpu3DGridBroadphase.cpp
+ btGpu3DGridBroadphase.h
+ btGpu3DGridBroadphaseSharedCode.h
+ btGpu3DGridBroadphaseSharedDefs.h
+ btGpu3DGridBroadphaseSharedTypes.h
+ btGpuDefines.h
+ btGpuUtilsSharedCode.h
+ btGpuUtilsSharedDefs.h
+
+#MiniCL provides a small subset of OpenCL
+ MiniCL.cpp
+ MiniCLTaskScheduler.cpp
+ MiniCLTaskScheduler.h
+ MiniCLTask/MiniCLTask.cpp
+ MiniCLTask/MiniCLTask.h
+ ../MiniCL/cl.h
+ ../MiniCL/cl_gl.h
+ ../MiniCL/cl_platform.h
+ ../MiniCL/cl_MiniCL_Defs.h
+)
+
+IF (BUILD_SHARED_LIBS)
+ TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletCollision)
+ENDIF (BUILD_SHARED_LIBS)
+
+IF (INSTALL_LIBS)
+ IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ #INSTALL of other files requires CMake 2.6
+ IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+ IF(INSTALL_EXTRA_LIBS)
+ IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+ INSTALL(TARGETS BulletMultiThreaded DESTINATION .)
+ ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+ INSTALL(TARGETS BulletMultiThreaded DESTINATION lib)
+ INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
+ ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+ ENDIF (INSTALL_EXTRA_LIBS)
+ ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+ ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS) \ No newline at end of file
diff --git a/extern/bullet2/BulletMultiThreaded/Makefile.original b/extern/bullet2/BulletMultiThreaded/Makefile.original
new file mode 100644
index 00000000000..1edc9811f8d
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/Makefile.original
@@ -0,0 +1,187 @@
+__ARCH_BITS__ := 32
+
+# define macros
+NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask
+SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask
+
+IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
+
+ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
+ CELL_TOP ?= /opt/cell/sdk
+ CELL_SYSROOT := /opt/cell/sysroot
+else
+ CELL_TOP ?= /opt/ibm/cell-sdk/prototype
+ CELL_SYSROOT := $(CELL_TOP)/sysroot
+endif
+
+
+USE_CCACHE=ccache
+RM=rm -f
+OUTDIR=./out
+DEBUGFLAG=-DNDEBUG
+LIBOUTDIR=../../lib/ibmsdk
+COLLISIONDIR=../../src/BulletCollision
+MATHDIR=../../src/LinearMath
+ARCHITECTUREFLAG=-m$(__ARCH_BITS__)
+ifeq "$(__ARCH_BITS__)" "64"
+ SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64
+else
+ SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__
+endif
+
+SPU_DEFFLAGS+=-DUSE_PE_BOX_BOX
+
+SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc
+SPU_INCLUDEDIR= -Ivectormath/scalar/cpp -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR)
+#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h
+SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h
+
+SPU_LFLAGS= -Wl,-N
+SPU_LIBRARIES=-lstdc++
+SPU_EMBED=/usr/bin/ppu-embedspu
+SPU_AR=/usr/bin/ar
+SYMBOLNAME=spu_program
+
+ifeq "$(__ARCH_BITS__)" "64"
+ PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64
+ PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc
+else
+ PPU_DEFFLAGS= -DUSE_LIBSPE2
+ PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc
+endif
+
+PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h
+PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR)
+PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc
+PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib
+PPU_AR=/usr/bin/ar
+
+MakeOut :
+# rm -f -R $(OUTDIR) ; mkdir $(OUTDIR)
+ @echo "usage: make spu, make ppu, make all, or make clean"
+# SPU
+SpuTaskFile : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp
+
+boxBoxDistance : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+SpuFakeDma : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SpuContactManifoldCollisionAlgorithm_spu : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp
+
+SpuCollisionShapes : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+SpuContactResult : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+#SpuGatheringCollisionTask : MakeOut
+# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+SpuGjkPairDetector: MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+SpuMinkowskiPenetrationDepthSolver : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+SpuVoronoiSimplexSolver : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
+
+#SpuLibspe2Support_spu : MakeOut
+# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp
+
+## SPU-Bullet
+btPersistentManifold : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp
+
+btOptimizedBvh : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
+
+btCollisionObject : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp
+
+btTriangleCallback : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
+
+btTriangleIndexVertexArray : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
+
+btStridingMeshInterface : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
+
+btAlignedAllocator : MakeOut
+ $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp
+
+
+# PPU
+SpuGatheringCollisionDispatcher : MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SequentialThreadSupport: MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SpuLibspe2Support: MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+btThreadSupportInterface: MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SpuCollisionTaskProcess : MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SpuContactManifoldCollisionAlgorithm : MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+SpuSampleTaskProcess : MakeOut
+ $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
+
+
+
+spu : boxBoxDistance SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \
+ SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \
+ btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \
+ btStridingMeshInterface btAlignedAllocator
+ $(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \
+ $(OUTDIR)/SpuTaskFile.o \
+ $(OUTDIR)/SpuFakeDma.o \
+ $(OUTDIR)/boxBoxDistance.o \
+ $(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \
+ $(OUTDIR)/SpuContactResult.o \
+ $(OUTDIR)/SpuCollisionShapes.o \
+ $(OUTDIR)/SpuGjkPairDetector.o \
+ $(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \
+ $(OUTDIR)/SpuVoronoiSimplexSolver.o \
+ $(OUTDIR)/btPersistentManifold.o \
+ $(OUTDIR)/btTriangleCallback.o \
+ $(OUTDIR)/btTriangleIndexVertexArray.o \
+ $(OUTDIR)/btStridingMeshInterface.o \
+ $(OUTDIR)/btAlignedAllocator.o \
+ $(SPU_LFLAGS) $(SPU_LIBRARIES)
+
+spu-embed : spu
+ $(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o
+ $(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o
+
+
+
+ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \
+ SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess
+ $(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \
+ $(OUTDIR)/SpuCollisionTaskProcess.o \
+ $(OUTDIR)/SpuSampleTaskProcess.o \
+ $(OUTDIR)/SpuGatheringCollisionDispatcher.o \
+ $(OUTDIR)/SpuLibspe2Support.o \
+ $(OUTDIR)/btThreadSupportInterface.o \
+ $(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o
+
+all : spu-embed ppu
+
+clean:
+ $(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a
+
+
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCL.cpp b/extern/bullet2/BulletMultiThreaded/MiniCL.cpp
new file mode 100644
index 00000000000..b7f5a699312
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/MiniCL.cpp
@@ -0,0 +1,517 @@
+/*
+ Copyright (C) 2010 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCL/cl.h"
+#define __PHYSICS_COMMON_H__ 1
+#ifdef _WIN32
+#include "BulletMultiThreaded/Win32ThreadSupport.h"
+#endif
+
+#include "BulletMultiThreaded/SequentialThreadSupport.h"
+#include "MiniCLTaskScheduler.h"
+#include "MiniCLTask/MiniCLTask.h"
+#include "LinearMath/btMinMax.h"
+
+//#define DEBUG_MINICL_KERNELS 1
+
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
+ cl_device_id device ,
+ cl_device_info param_name ,
+ size_t param_value_size ,
+ void * param_value ,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+
+ switch (param_name)
+ {
+ case CL_DEVICE_NAME:
+ {
+ char deviceName[] = "CPU";
+ unsigned int nameLen = strlen(deviceName)+1;
+ assert(param_value_size>strlen(deviceName));
+ if (nameLen < param_value_size)
+ {
+ const char* cpuName = "CPU";
+ sprintf((char*)param_value,"%s",cpuName);
+ } else
+ {
+ printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
+ }
+ break;
+ }
+ case CL_DEVICE_TYPE:
+ {
+ if (param_value_size>=sizeof(cl_device_type))
+ {
+ cl_device_type* deviceType = (cl_device_type*)param_value;
+ *deviceType = CL_DEVICE_TYPE_CPU;
+ } else
+ {
+ printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
+ }
+ break;
+ }
+ case CL_DEVICE_MAX_COMPUTE_UNITS:
+ {
+ if (param_value_size>=sizeof(cl_uint))
+ {
+ cl_uint* numUnits = (cl_uint*)param_value;
+ *numUnits= 4;
+ } else
+ {
+ printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
+ }
+
+ break;
+ }
+ case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+ {
+ size_t workitem_size[3];
+
+ if (param_value_size>=sizeof(workitem_size))
+ {
+ size_t* workItemSize = (size_t*)param_value;
+ workItemSize[0] = 64;
+ workItemSize[1] = 24;
+ workItemSize[2] = 16;
+ } else
+ {
+ printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
+ }
+ break;
+ }
+ case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+ {
+ cl_uint* clock_frequency = (cl_uint*)param_value;
+ *clock_frequency = 3*1024;
+ break;
+ }
+ default:
+ {
+ printf("error: unsupported param_name:%d\n",param_name);
+ }
+ }
+
+
+ return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
+{
+ return 0;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
+{
+ return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
+{
+ return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
+{
+ return 0;
+}
+
+
+// Enqueued Commands APIs
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
+ cl_mem buffer ,
+ cl_bool /* blocking_read */,
+ size_t offset ,
+ size_t cb ,
+ void * ptr ,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+ MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+ ///wait for all work items to be completed
+ scheduler->flush();
+
+ memcpy(ptr,(char*)buffer + offset,cb);
+ return 0;
+}
+
+
+CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */,
+ cl_device_id /* device */,
+ cl_program_build_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+
+ return 0;
+}
+
+
+// Program Object APIs
+CL_API_ENTRY cl_program
+clCreateProgramWithSource(cl_context context ,
+ cl_uint /* count */,
+ const char ** /* strings */,
+ const size_t * /* lengths */,
+ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+ *errcode_ret = CL_SUCCESS;
+ return (cl_program)context;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue ,
+ cl_mem buffer ,
+ cl_bool /* blocking_read */,
+ size_t offset,
+ size_t cb ,
+ const void * ptr ,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+ MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+ ///wait for all work items to be completed
+ scheduler->flush();
+
+ memcpy((char*)buffer + offset, ptr,cb);
+ return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
+{
+ MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+ ///wait for all work items to be completed
+ scheduler->flush();
+ return 0;
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+ cl_kernel clKernel ,
+ cl_uint work_dim ,
+ const size_t * /* global_work_offset */,
+ const size_t * global_work_size ,
+ const size_t * /* local_work_size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+
+
+ MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
+ for (unsigned int ii=0;ii<work_dim;ii++)
+ {
+ int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
+ int numWorkItems = global_work_size[ii];
+
+// //at minimum 64 work items per task
+// int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
+ int numWorkItemsPerTask = numWorkItems / maxTask;
+ if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
+
+ for (int t=0;t<numWorkItems;)
+ {
+ //Performance Hint: tweak this number during benchmarking
+ int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
+ kernel->m_scheduler->issueTask(t, endIndex, kernel);
+ t = endIndex;
+ }
+ }
+/*
+
+ void* bla = 0;
+
+ scheduler->issueTask(bla,2,3);
+ scheduler->flush();
+
+ */
+
+ return 0;
+}
+
+#define LOCAL_BUF_SIZE 32768
+static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
+static int* spLocalBufCurr = NULL;
+static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
+static void* localBufMalloc(int size)
+{
+ int size16 = (size + 15) >> 4; // in 16-byte units
+ if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
+ { // reset
+ spLocalBufCurr = sLocalMemBuf;
+ while((long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
+ sLocalBufUsed = 0;
+ }
+ void* ret = spLocalBufCurr;
+ spLocalBufCurr += size16 * 4;
+ sLocalBufUsed += size;
+ return ret;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
+ cl_uint arg_index ,
+ size_t arg_size ,
+ const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
+{
+ MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
+ btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
+ if (arg_index>MINI_CL_MAX_ARG)
+ {
+ printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
+ } else
+ {
+// if (arg_size>=MINICL_MAX_ARGLENGTH)
+ if (arg_size != MINICL_MAX_ARGLENGTH)
+ {
+ printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
+ }
+ else
+ {
+ if(arg_value == NULL)
+ { // this is only for __local memory qualifier
+ void* ptr = localBufMalloc(arg_size);
+ kernel->m_argData[arg_index] = ptr;
+ }
+ else
+ {
+ memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
+ }
+ kernel->m_argSizes[arg_index] = arg_size;
+ if(arg_index >= kernel->m_numArgs)
+ {
+ kernel->m_numArgs = arg_index + 1;
+ kernel->updateLauncher();
+ }
+ }
+ }
+ return 0;
+}
+
+// Kernel Object APIs
+CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
+ const char * kernel_name ,
+ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+ MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
+ MiniCLKernel* kernel = new MiniCLKernel();
+ int nameLen = strlen(kernel_name);
+ if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
+ {
+ *errcode_ret = CL_INVALID_KERNEL_NAME;
+ return NULL;
+ }
+ strcpy(kernel->m_name, kernel_name);
+ kernel->m_numArgs = 0;
+
+ //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
+ //if (kernel->m_kernelProgramCommandId>=0)
+ //{
+ // *errcode_ret = CL_SUCCESS;
+ //} else
+ //{
+ // *errcode_ret = CL_INVALID_KERNEL_NAME;
+ //}
+ kernel->m_scheduler = scheduler;
+ if(kernel->registerSelf() == NULL)
+ {
+ *errcode_ret = CL_INVALID_KERNEL_NAME;
+ return NULL;
+ }
+ else
+ {
+ *errcode_ret = CL_SUCCESS;
+ }
+
+ return (cl_kernel)kernel;
+
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
+{
+ return CL_SUCCESS;
+}
+
+CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const size_t * /* lengths */,
+ const unsigned char ** /* binaries */,
+ cl_int * /* binary_status */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+ return (cl_program)context;
+}
+
+
+// Memory Object APIs
+CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
+ cl_mem_flags flags ,
+ size_t size,
+ void * host_ptr ,
+ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+ cl_mem buf = (cl_mem)malloc(size);
+ if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
+ {
+ memcpy(buf,host_ptr,size);
+ }
+ *errcode_ret = 0;
+ return buf;
+}
+
+// Command Queue APIs
+CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
+ cl_device_id /* device */,
+ cl_command_queue_properties /* properties */,
+ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+ *errcode_ret = 0;
+ return (cl_command_queue) context;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
+ cl_context_info param_name ,
+ size_t param_value_size ,
+ void * param_value,
+ size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+
+ switch (param_name)
+ {
+ case CL_CONTEXT_DEVICES:
+ {
+ if (!param_value_size)
+ {
+ *param_value_size_ret = 13;
+ } else
+ {
+ const char* testName = "MiniCL_Test.";
+ sprintf((char*)param_value,"%s",testName);
+ }
+ break;
+ };
+ default:
+ {
+ printf("unsupported\n");
+ }
+ }
+
+ return 0;
+}
+
+CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
+ cl_device_type /* device_type */,
+ void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
+ void * /* user_data */,
+ cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+ int maxNumOutstandingTasks = 4;
+// int maxNumOutstandingTasks = 2;
+// int maxNumOutstandingTasks = 1;
+ gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
+ const int maxNumOfThreadSupports = 8;
+ static int sUniqueThreadSupportIndex = 0;
+ static char* sUniqueThreadSupportName[maxNumOfThreadSupports] =
+ {
+ "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
+ };
+
+#ifdef DEBUG_MINICL_KERNELS
+ SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+ SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
+#else
+
+#if _WIN32
+ btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
+ Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
+// "MiniCL",
+ sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
+ processMiniCLTask, //processCollisionTask,
+ createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
+ maxNumOutstandingTasks));
+#else
+ ///todo: add posix thread support for other platforms
+ SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+ SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
+#endif
+
+#endif //DEBUG_MINICL_KERNELS
+
+
+ MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
+
+ *errcode_ret = 0;
+ return (cl_context)scheduler;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
+{
+
+ MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
+
+ btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
+ delete scheduler;
+ delete threadSupport;
+
+ return 0;
+}
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
+{
+ return CL_SUCCESS;
+}
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel kernel ,
+ cl_device_id /* device */,
+ cl_kernel_work_group_info wgi/* param_name */,
+ size_t sz /* param_value_size */,
+ void * ptr /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+ if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
+ &&(sz == sizeof(int))
+ &&(ptr != NULL))
+ {
+ MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
+ MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
+ *((int*)ptr) = scheduler->getMaxNumOutstandingTasks();
+ return CL_SUCCESS;
+ }
+ else
+ {
+ return CL_INVALID_VALUE;
+ }
+}
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
new file mode 100644
index 00000000000..babb1d24af5
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
@@ -0,0 +1,74 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCLTask.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "BulletMultiThreaded/SpuFakeDma.h"
+#include "LinearMath/btMinMax.h"
+#include "MiniCLTask.h"
+#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
+
+
+#ifdef __SPU__
+#include <spu_printf.h>
+#else
+#include <stdio.h>
+#define spu_printf printf
+#endif
+
+int gMiniCLNumOutstandingTasks = 0;
+
+struct MiniCLTask_LocalStoreMemory
+{
+
+};
+
+
+//-- MAIN METHOD
+void processMiniCLTask(void* userPtr, void* lsMemory)
+{
+ // BT_PROFILE("processSampleTask");
+
+ MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
+
+ MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
+ MiniCLTaskDesc& taskDesc = *taskDescPtr;
+
+ for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
+ {
+ taskDesc.m_kernel->m_launcher(&taskDesc, i);
+ }
+
+// printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
+
+}
+
+
+#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory gLocalStoreMemory);
+
+void* createMiniCLLocalStoreMemory()
+{
+ return &gLocalStoreMemory;
+}
+#else
+void* createMiniCLLocalStoreMemory()
+{
+ return new MiniCLTask_LocalStoreMemory;
+};
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
new file mode 100644
index 00000000000..7e78be0855e
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef MINICL__TASK_H
+#define MINICL__TASK_H
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+
+#define MINICL_MAX_ARGLENGTH (sizeof(void*))
+#define MINI_CL_MAX_ARG 16
+#define MINI_CL_MAX_KERNEL_NAME 256
+
+struct MiniCLKernel;
+
+ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
+{
+ BT_DECLARE_ALIGNED_ALLOCATOR();
+
+ MiniCLTaskDesc()
+ {
+ for (int i=0;i<MINI_CL_MAX_ARG;i++)
+ {
+ m_argSizes[i]=0;
+ }
+ }
+
+ uint32_t m_taskId;
+
+ uint32_t m_firstWorkUnit;
+ uint32_t m_lastWorkUnit;
+
+ MiniCLKernel* m_kernel;
+
+ void* m_argData[MINI_CL_MAX_ARG];
+ int m_argSizes[MINI_CL_MAX_ARG];
+};
+
+extern "C" int gMiniCLNumOutstandingTasks;
+
+
+void processMiniCLTask(void* userPtr, void* lsMemory);
+void* createMiniCLLocalStoreMemory();
+
+
+#endif //MINICL__TASK_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp
new file mode 100644
index 00000000000..7adee88d245
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp
@@ -0,0 +1,519 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define __CELLOS_LV2__ 1
+#define __BT_SKIP_UINT64_H 1
+
+#define USE_SAMPLE_PROCESS 1
+#ifdef USE_SAMPLE_PROCESS
+
+
+#include "MiniCLTaskScheduler.h"
+#include <stdio.h>
+
+#ifdef __SPU__
+
+
+
+void SampleThreadFunc(void* userPtr,void* lsMemory)
+{
+ //do nothing
+ printf("hello world\n");
+}
+
+
+void* SamplelsMemoryFunc()
+{
+ //don't create local store memory, just return 0
+ return 0;
+}
+
+
+#else
+
+
+#include "BulletMultiThreaded/btThreadSupportInterface.h"
+
+//# include "SPUAssert.h"
+#include <string.h>
+
+#include "MiniCL/cl_platform.h"
+
+extern "C" {
+ extern char SPU_SAMPLE_ELF_SYMBOL[];
+}
+
+
+MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+ m_taskBusy.resize(m_maxNumOutstandingTasks);
+ m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
+
+ m_kernels.resize(0);
+
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ m_taskBusy[i] = false;
+ }
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+
+ m_initialized = false;
+
+ m_threadInterface->startSPU();
+
+
+}
+
+MiniCLTaskScheduler::~MiniCLTaskScheduler()
+{
+ m_threadInterface->stopSPU();
+
+}
+
+
+
+void MiniCLTaskScheduler::initialize()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("MiniCLTaskScheduler::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ m_taskBusy[i] = false;
+ }
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+ m_initialized = true;
+
+}
+
+
+void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ m_taskBusy[m_currentTask] = true;
+ m_numBusyTasks++;
+
+ MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
+ {
+ // send task description in event message
+ taskDesc.m_firstWorkUnit = firstWorkUnit;
+ taskDesc.m_lastWorkUnit = lastWorkUnit;
+ taskDesc.m_kernel = kernel;
+ //some bookkeeping to recognize finished tasks
+ taskDesc.m_taskId = m_currentTask;
+
+// for (int i=0;i<MINI_CL_MAX_ARG;i++)
+ for (unsigned int i=0; i < kernel->m_numArgs; i++)
+ {
+ taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
+ if (taskDesc.m_argSizes[i])
+ {
+ taskDesc.m_argData[i] = kernel->m_argData[i];
+// memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
+ }
+ }
+ }
+
+
+ m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
+
+ // if all tasks busy, wait for spu event to clear the task.
+
+ if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+ {
+ unsigned int taskId;
+ unsigned int outputSize;
+
+ for (int i=0;i<m_maxNumOutstandingTasks;i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+ //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+ postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+ // find new task buffer
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ if (!m_taskBusy[i])
+ {
+ m_currentTask = i;
+ break;
+ }
+ }
+}
+
+
+///Optional PPU-size post processing for each task
+void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
+{
+
+}
+
+
+void MiniCLTaskScheduler::flush()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+
+ // all tasks are issued, wait for all tasks to be complete
+ while(m_numBusyTasks > 0)
+ {
+// Consolidating SPU code
+ unsigned int taskId;
+ unsigned int outputSize;
+
+ for (int i=0;i<m_maxNumOutstandingTasks;i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+ {
+
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+ }
+
+ //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+ postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+
+}
+
+
+
+typedef void (*MiniCLKernelLauncher0)(int);
+typedef void (*MiniCLKernelLauncher1)(void*, int);
+typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
+typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+
+
+static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
+}
+static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ guid);
+}
+static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ guid);
+}
+static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ guid);
+}
+static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ guid);
+}
+static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ guid);
+}
+static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ guid);
+}
+static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ guid);
+}
+static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ guid);
+}
+static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ guid);
+}
+static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ guid);
+}
+static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ guid);
+}
+static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ taskDesc->m_argData[11],
+ guid);
+}
+static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ taskDesc->m_argData[11],
+ taskDesc->m_argData[12],
+ guid);
+}
+static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ taskDesc->m_argData[11],
+ taskDesc->m_argData[12],
+ taskDesc->m_argData[13],
+ guid);
+}
+static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ taskDesc->m_argData[11],
+ taskDesc->m_argData[12],
+ taskDesc->m_argData[13],
+ taskDesc->m_argData[14],
+ guid);
+}
+static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
+{
+ ((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0],
+ taskDesc->m_argData[1],
+ taskDesc->m_argData[2],
+ taskDesc->m_argData[3],
+ taskDesc->m_argData[4],
+ taskDesc->m_argData[5],
+ taskDesc->m_argData[6],
+ taskDesc->m_argData[7],
+ taskDesc->m_argData[8],
+ taskDesc->m_argData[9],
+ taskDesc->m_argData[10],
+ taskDesc->m_argData[11],
+ taskDesc->m_argData[12],
+ taskDesc->m_argData[13],
+ taskDesc->m_argData[14],
+ taskDesc->m_argData[15],
+ guid);
+}
+
+static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] =
+{
+ kernelLauncher0,
+ kernelLauncher1,
+ kernelLauncher2,
+ kernelLauncher3,
+ kernelLauncher4,
+ kernelLauncher5,
+ kernelLauncher6,
+ kernelLauncher7,
+ kernelLauncher8,
+ kernelLauncher9,
+ kernelLauncher10,
+ kernelLauncher11,
+ kernelLauncher12,
+ kernelLauncher13,
+ kernelLauncher14,
+ kernelLauncher15,
+ kernelLauncher16
+};
+
+void MiniCLKernel::updateLauncher()
+{
+ m_launcher = spLauncherList[m_numArgs];
+}
+
+struct MiniCLKernelDescEntry
+{
+ void* pCode;
+ char* pName;
+};
+static MiniCLKernelDescEntry spKernelDesc[256];
+static int sNumKernelDesc = 0;
+
+MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName)
+{
+ for(int i = 0; i < sNumKernelDesc; i++)
+ {
+ if(!strcmp(pName, spKernelDesc[i].pName))
+ { // already registered
+ btAssert(spKernelDesc[i].pCode == pCode);
+ return;
+ }
+ }
+ spKernelDesc[sNumKernelDesc].pCode = pCode;
+ spKernelDesc[sNumKernelDesc].pName = pName;
+ sNumKernelDesc++;
+}
+
+
+MiniCLKernel* MiniCLKernel::registerSelf()
+{
+ m_scheduler->registerKernel(this);
+ for(int i = 0; i < sNumKernelDesc; i++)
+ {
+ if(!strcmp(m_name, spKernelDesc[i].pName))
+ {
+ m_pCode = spKernelDesc[i].pCode;
+ return this;
+ }
+ }
+ return NULL;
+}
+
+#endif
+
+
+#endif //USE_SAMPLE_PROCESS
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h
new file mode 100644
index 00000000000..3061a713436
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h
@@ -0,0 +1,194 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef MINICL_TASK_SCHEDULER_H
+#define MINICL_TASK_SCHEDULER_H
+
+#include <assert.h>
+
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+
+#include <stdlib.h>
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+#include "MiniCLTask/MiniCLTask.h"
+
+//just add your commands here, try to keep them globally unique for debugging purposes
+#define CMD_SAMPLE_TASK_COMMAND 10
+
+struct MiniCLKernel;
+
+/// MiniCLTaskScheduler handles SPU processing of collision pairs.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class MiniCLTaskScheduler
+{
+ // track task buffers that are being used, and total busy tasks
+ btAlignedObjectArray<bool> m_taskBusy;
+ btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc;
+
+
+ btAlignedObjectArray<const MiniCLKernel*> m_kernels;
+
+
+ int m_numBusyTasks;
+
+ // the current task and the current entry to insert a new work unit
+ int m_currentTask;
+
+ bool m_initialized;
+
+ void postProcess(int taskId, int outputSize);
+
+ class btThreadSupportInterface* m_threadInterface;
+
+ int m_maxNumOutstandingTasks;
+
+
+
+public:
+ MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
+
+ ~MiniCLTaskScheduler();
+
+ ///call initialize in the beginning of the frame, before addCollisionPairToTask
+ void initialize();
+
+ void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
+
+ ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+ void flush();
+
+ class btThreadSupportInterface* getThreadSupportInterface()
+ {
+ return m_threadInterface;
+ }
+
+ int findProgramCommandIdByName(const char* programName) const;
+
+ int getMaxNumOutstandingTasks() const
+ {
+ return m_maxNumOutstandingTasks;
+ }
+
+ void registerKernel(MiniCLKernel* kernel)
+ {
+ m_kernels.push_back(kernel);
+ }
+};
+
+typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
+
+struct MiniCLKernel
+{
+ MiniCLTaskScheduler* m_scheduler;
+
+// int m_kernelProgramCommandId;
+
+ char m_name[MINI_CL_MAX_KERNEL_NAME];
+ unsigned int m_numArgs;
+ kernelLauncherCB m_launcher;
+ void* m_pCode;
+ void updateLauncher();
+ MiniCLKernel* registerSelf();
+
+ void* m_argData[MINI_CL_MAX_ARG];
+ int m_argSizes[MINI_CL_MAX_ARG];
+};
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+////////////////////MAIN/////////////////////////////
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+void * SamplelsMemoryFunc();
+void SampleThreadFunc(void* userPtr,void* lsMemory);
+
+//#define DEBUG_LIBSPE2_MAINLOOP
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+ printf("SPU is up \n");
+
+ ATTRIBUTE_ALIGNED128(btSpuStatus status);
+ ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
+ unsigned int received_message = Spu_Mailbox_Event_Nothing;
+ bool shutdown = false;
+
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ status.m_status = Spu_Status_Free;
+ status.m_lsMemory.p = SamplelsMemoryFunc();
+
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ while (!shutdown)
+ {
+ received_message = spu_read_in_mbox();
+
+
+
+ switch(received_message)
+ {
+ case Spu_Mailbox_Event_Shutdown:
+ shutdown = true;
+ break;
+ case Spu_Mailbox_Event_Task:
+ // refresh the status
+#ifdef DEBUG_LIBSPE2_MAINLOOP
+ printf("SPU recieved Task \n");
+#endif //DEBUG_LIBSPE2_MAINLOOP
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ btAssert(status.m_status==Spu_Status_Occupied);
+
+ cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
+ break;
+ case Spu_Mailbox_Event_Nothing:
+ default:
+ break;
+ }
+
+ // set to status free and wait for next task
+ status.m_status = Spu_Status_Free;
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ }
+ return 0;
+}
+//////////////////////////////////////////////////////
+#endif
+
+
+
+#endif // MINICL_TASK_SCHEDULER_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h b/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h
new file mode 100644
index 00000000000..16362f4bce3
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h
@@ -0,0 +1,84 @@
+#ifndef TYPE_DEFINITIONS_H
+#define TYPE_DEFINITIONS_H
+
+///This file provides some platform/compiler checks for common definitions
+
+#ifdef _WIN32
+
+typedef union
+{
+ unsigned int u;
+ void *p;
+} addr64;
+
+#define USE_WIN32_THREADING 1
+
+ #if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
+ #else
+ #endif //__MINGW32__
+
+ typedef unsigned char uint8_t;
+#ifndef __PHYSICS_COMMON_H__
+#ifndef __BT_SKIP_UINT64_H
+ typedef unsigned long int uint64_t;
+#endif //__BT_SKIP_UINT64_H
+ typedef unsigned int uint32_t;
+#endif //__PHYSICS_COMMON_H__
+ typedef unsigned short uint16_t;
+
+ #include <malloc.h>
+ #define memalign(alignment, size) malloc(size);
+
+#include <string.h> //memcpy
+
+
+
+ #include <stdio.h>
+ #define spu_printf printf
+
+#else
+ #include <stdint.h>
+ #include <stdlib.h>
+ #include <string.h> //for memcpy
+
+#if defined (__CELLOS_LV2__)
+ // Playstation 3 Cell SDK
+#include <spu_printf.h>
+
+#else
+ // posix system
+
+#define USE_PTHREADS (1)
+
+#ifdef USE_LIBSPE2
+#include <stdio.h>
+#define spu_printf printf
+#define DWORD unsigned int
+
+ typedef union
+ {
+ unsigned long long ull;
+ unsigned int ui[2];
+ void *p;
+ } addr64;
+
+
+#else
+
+#include <stdio.h>
+#define spu_printf printf
+
+#endif // USE_LIBSPE2
+
+#endif //__CELLOS_LV2__
+
+#endif
+
+
+/* Included here because we need uint*_t typedefs */
+#include "PpuAddressSpace.h"
+
+#endif //TYPE_DEFINITIONS_H
+
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp
new file mode 100644
index 00000000000..540f0dcf106
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp
@@ -0,0 +1,249 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <stdio.h>
+#include "PosixThreadSupport.h"
+#ifdef USE_PTHREADS
+#include <errno.h>
+#include <unistd.h>
+
+#include "SpuCollisionTaskProcess.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+#define checkPThreadFunction(returnValue) \
+ if(0 != returnValue) { \
+ printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
+ }
+
+// The number of threads should be equal to the number of available cores
+// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
+
+// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+// Setup and initialize SPU/CELL/Libspe2
+PosixThreadSupport::PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo)
+{
+ startThreads(threadConstructionInfo);
+}
+
+// cleanup/shutdown Libspe2
+PosixThreadSupport::~PosixThreadSupport()
+{
+ stopSPU();
+}
+
+#if (defined (__APPLE__))
+#define NAMED_SEMAPHORES
+#endif
+
+// this semaphore will signal, if and how many threads are finished with their work
+static sem_t* mainSemaphore;
+
+static sem_t* createSem(const char* baseName)
+{
+ static int semCount = 0;
+#ifdef NAMED_SEMAPHORES
+ /// Named semaphore begin
+ char name[32];
+ snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++);
+ sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
+ if (tempSem != reinterpret_cast<sem_t *>(SEM_FAILED))
+ {
+ //printf("Created \"%s\" Semaphore %x\n", name, tempSem);
+ }
+ else
+ {
+ //printf("Error creating Semaphore %d\n", errno);
+ exit(-1);
+ }
+ /// Named semaphore end
+#else
+ sem_t* tempSem = new sem_t;
+ checkPThreadFunction(sem_init(tempSem, 0, 0));
+#endif
+ return tempSem;
+}
+
+static void destroySem(sem_t* semaphore)
+{
+#ifdef NAMED_SEMAPHORES
+ checkPThreadFunction(sem_close(semaphore));
+#else
+ checkPThreadFunction(sem_destroy(semaphore));
+ delete semaphore;
+#endif
+}
+
+static void *threadFunction(void *argument)
+{
+
+ PosixThreadSupport::btSpuStatus* status = (PosixThreadSupport::btSpuStatus*)argument;
+
+
+ while (1)
+ {
+ checkPThreadFunction(sem_wait(status->startSemaphore));
+
+ void* userPtr = status->m_userPtr;
+
+ if (userPtr)
+ {
+ btAssert(status->m_status);
+ status->m_userThreadFunc(userPtr,status->m_lsMemory);
+ status->m_status = 2;
+ checkPThreadFunction(sem_post(mainSemaphore));
+ status->threadUsed++;
+ } else {
+ //exit Thread
+ status->m_status = 3;
+ checkPThreadFunction(sem_post(mainSemaphore));
+ printf("Thread with taskId %i exiting\n",status->m_taskId);
+ break;
+ }
+
+ }
+
+ printf("Thread TERMINATED\n");
+ return 0;
+
+}
+
+///send messages to SPUs
+void PosixThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+ /// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc);
+
+ ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
+
+
+
+ switch (uiCommand)
+ {
+ case CMD_GATHER_AND_PROCESS_PAIRLIST:
+ {
+ btSpuStatus& spuStatus = m_activeSpuStatus[taskId];
+ btAssert(taskId >= 0);
+ btAssert(taskId < m_activeSpuStatus.size());
+
+ spuStatus.m_commandId = uiCommand;
+ spuStatus.m_status = 1;
+ spuStatus.m_userPtr = (void*)uiArgument0;
+
+ // fire event to start new task
+ checkPThreadFunction(sem_post(spuStatus.startSemaphore));
+ break;
+ }
+ default:
+ {
+ ///not implemented
+ btAssert(0);
+ }
+
+ };
+
+
+}
+
+
+///check for messages from SPUs
+void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+ ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+
+ ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+ btAssert(m_activeSpuStatus.size());
+
+ // wait for any of the threads to finish
+ checkPThreadFunction(sem_wait(mainSemaphore));
+
+ // get at least one thread which has finished
+ size_t last = -1;
+
+ for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
+ if(2 == m_activeSpuStatus[t].m_status) {
+ last = t;
+ break;
+ }
+ }
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[last];
+
+ btAssert(spuStatus.m_status > 1);
+ spuStatus.m_status = 0;
+
+ // need to find an active spu
+ btAssert(last >= 0);
+
+ *puiArgument0 = spuStatus.m_taskId;
+ *puiArgument1 = spuStatus.m_status;
+}
+
+
+
+void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstructionInfo)
+{
+ printf("%s creating %i threads.\n", __FUNCTION__, threadConstructionInfo.m_numThreads);
+ m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
+
+ mainSemaphore = createSem("main");
+
+ for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
+ {
+ printf("starting thread %d\n",i);
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[i];
+
+ spuStatus.startSemaphore = createSem("threadLocal");
+
+ checkPThreadFunction(pthread_create(&spuStatus.thread, NULL, &threadFunction, (void*)&spuStatus));
+
+ spuStatus.m_userPtr=0;
+
+ spuStatus.m_taskId = i;
+ spuStatus.m_commandId = 0;
+ spuStatus.m_status = 0;
+ spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+ spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+ spuStatus.threadUsed = 0;
+
+ printf("started thread %d \n",i);
+
+ }
+
+}
+
+void PosixThreadSupport::startSPU()
+{
+}
+
+
+///tell the task scheduler we are done with the SPU tasks
+void PosixThreadSupport::stopSPU()
+{
+ for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
+ btSpuStatus& spuStatus = m_activeSpuStatus[t];
+ printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed);
+
+ destroySem(spuStatus.startSemaphore);
+ checkPThreadFunction(pthread_cancel(spuStatus.thread));
+ }
+ destroySem(mainSemaphore);
+
+ m_activeSpuStatus.clear();
+}
+
+#endif // USE_PTHREADS
+
diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h
new file mode 100644
index 00000000000..7cc49115b4b
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h
@@ -0,0 +1,124 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+#ifdef USE_PTHREADS //platform specific defines are defined in PlatformDefinitions.h
+#include <pthread.h>
+#include <semaphore.h>
+
+#ifndef POSIX_THREAD_SUPPORT_H
+#define POSIX_THREAD_SUPPORT_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+
+typedef void (*PosixThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*PosixlsMemorySetupFunc)();
+
+// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class PosixThreadSupport : public btThreadSupportInterface
+{
+public:
+ typedef enum sStatus {
+ STATUS_BUSY,
+ STATUS_READY,
+ STATUS_FINISHED
+ } Status;
+
+ // placeholder, until libspe2 support is there
+ struct btSpuStatus
+ {
+ uint32_t m_taskId;
+ uint32_t m_commandId;
+ uint32_t m_status;
+
+ PosixThreadFunc m_userThreadFunc;
+ void* m_userPtr; //for taskDesc etc
+ void* m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc
+
+ pthread_t thread;
+ sem_t* startSemaphore;
+
+ unsigned long threadUsed;
+ };
+private:
+
+ btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
+public:
+ ///Setup and initialize SPU/CELL/Libspe2
+
+
+
+ struct ThreadConstructionInfo
+ {
+ ThreadConstructionInfo(char* uniqueName,
+ PosixThreadFunc userThreadFunc,
+ PosixlsMemorySetupFunc lsMemoryFunc,
+ int numThreads=1,
+ int threadStackSize=65535
+ )
+ :m_uniqueName(uniqueName),
+ m_userThreadFunc(userThreadFunc),
+ m_lsMemoryFunc(lsMemoryFunc),
+ m_numThreads(numThreads),
+ m_threadStackSize(threadStackSize)
+ {
+
+ }
+
+ char* m_uniqueName;
+ PosixThreadFunc m_userThreadFunc;
+ PosixlsMemorySetupFunc m_lsMemoryFunc;
+ int m_numThreads;
+ int m_threadStackSize;
+
+ };
+
+ PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo);
+
+///cleanup/shutdown Libspe2
+ virtual ~PosixThreadSupport();
+
+ void startThreads(ThreadConstructionInfo& threadInfo);
+
+
+///send messages to SPUs
+ virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+
+///check for messages from SPUs
+ virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ virtual void startSPU();
+
+///tell the task scheduler we are done with the SPU tasks
+ virtual void stopSPU();
+
+ virtual void setNumTasks(int numTasks) {}
+
+ virtual int getNumTasks() const
+ {
+ return m_activeSpuStatus.size();
+ }
+};
+
+#endif // POSIX_THREAD_SUPPORT_H
+
+#endif // USE_PTHREADS
diff --git a/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h b/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h
new file mode 100644
index 00000000000..f36fdfb3cd7
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h
@@ -0,0 +1,20 @@
+#ifndef __PPU_ADDRESS_SPACE_H
+#define __PPU_ADDRESS_SPACE_H
+
+
+#ifdef _WIN32
+//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
+#pragma warning (disable: 4311)
+#pragma warning (disable: 4312)
+#endif //_WIN32
+
+#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64)
+typedef uint64_t ppu_address_t;
+#else
+
+typedef uint32_t ppu_address_t;
+
+#endif
+
+#endif
+
diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp
new file mode 100644
index 00000000000..4e9c822bbc0
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp
@@ -0,0 +1,93 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SequentialThreadSupport.h"
+
+
+#include "SpuCollisionTaskProcess.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+SequentialThreadSupport::SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo)
+{
+ startThreads(threadConstructionInfo);
+}
+
+///cleanup/shutdown Libspe2
+SequentialThreadSupport::~SequentialThreadSupport()
+{
+ stopSPU();
+}
+
+#include <stdio.h>
+
+///send messages to SPUs
+void SequentialThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+ switch (uiCommand)
+ {
+ case CMD_GATHER_AND_PROCESS_PAIRLIST:
+ {
+ btSpuStatus& spuStatus = m_activeSpuStatus[0];
+ spuStatus.m_userPtr=(void*)uiArgument0;
+ spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
+ }
+ break;
+ default:
+ {
+ ///not implemented
+ btAssert(0 && "Not implemented");
+ }
+
+ };
+
+
+}
+
+///check for messages from SPUs
+void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+ btAssert(m_activeSpuStatus.size());
+ btSpuStatus& spuStatus = m_activeSpuStatus[0];
+ *puiArgument0 = spuStatus.m_taskId;
+ *puiArgument1 = spuStatus.m_status;
+}
+
+void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo)
+{
+ m_activeSpuStatus.resize(1);
+ printf("STS: Not starting any threads\n");
+ btSpuStatus& spuStatus = m_activeSpuStatus[0];
+ spuStatus.m_userPtr = 0;
+ spuStatus.m_taskId = 0;
+ spuStatus.m_commandId = 0;
+ spuStatus.m_status = 0;
+ spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+ spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+ printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName);
+}
+
+void SequentialThreadSupport::startSPU()
+{
+}
+
+void SequentialThreadSupport::stopSPU()
+{
+ m_activeSpuStatus.clear();
+}
+
+void SequentialThreadSupport::setNumTasks(int numTasks)
+{
+ printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks);
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h
new file mode 100644
index 00000000000..4256ebd2aa9
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h
@@ -0,0 +1,92 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+
+#ifndef SEQUENTIAL_THREAD_SUPPORT_H
+#define SEQUENTIAL_THREAD_SUPPORT_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+typedef void (*SequentialThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*SequentiallsMemorySetupFunc)();
+
+
+
+///The SequentialThreadSupport is a portable non-parallel implementation of the btThreadSupportInterface
+///This is useful for debugging and porting SPU Tasks to other platforms.
+class SequentialThreadSupport : public btThreadSupportInterface
+{
+public:
+ struct btSpuStatus
+ {
+ uint32_t m_taskId;
+ uint32_t m_commandId;
+ uint32_t m_status;
+
+ SequentialThreadFunc m_userThreadFunc;
+
+ void* m_userPtr; //for taskDesc etc
+ void* m_lsMemory; //initialized using SequentiallsMemorySetupFunc
+ };
+private:
+ btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
+ btAlignedObjectArray<void*> m_completeHandles;
+public:
+ struct SequentialThreadConstructionInfo
+ {
+ SequentialThreadConstructionInfo (char* uniqueName,
+ SequentialThreadFunc userThreadFunc,
+ SequentiallsMemorySetupFunc lsMemoryFunc
+ )
+ :m_uniqueName(uniqueName),
+ m_userThreadFunc(userThreadFunc),
+ m_lsMemoryFunc(lsMemoryFunc)
+ {
+
+ }
+
+ char* m_uniqueName;
+ SequentialThreadFunc m_userThreadFunc;
+ SequentiallsMemorySetupFunc m_lsMemoryFunc;
+ };
+
+ SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo);
+ virtual ~SequentialThreadSupport();
+ void startThreads(SequentialThreadConstructionInfo& threadInfo);
+///send messages to SPUs
+ virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+///check for messages from SPUs
+ virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ virtual void startSPU();
+///tell the task scheduler we are done with the SPU tasks
+ virtual void stopSPU();
+
+ virtual void setNumTasks(int numTasks);
+
+ virtual int getNumTasks() const
+ {
+ return 1;
+ }
+
+};
+
+#endif //SEQUENTIAL_THREAD_SUPPORT_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
new file mode 100644
index 00000000000..182aa269478
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
@@ -0,0 +1,48 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuCollisionObjectWrapper.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+
+SpuCollisionObjectWrapper::SpuCollisionObjectWrapper ()
+{
+}
+
+#ifndef __SPU__
+SpuCollisionObjectWrapper::SpuCollisionObjectWrapper (const btCollisionObject* collisionObject)
+{
+ m_shapeType = collisionObject->getCollisionShape()->getShapeType ();
+ m_collisionObjectPtr = (ppu_address_t)collisionObject;
+ m_margin = collisionObject->getCollisionShape()->getMargin ();
+}
+#endif
+
+int
+SpuCollisionObjectWrapper::getShapeType () const
+{
+ return m_shapeType;
+}
+
+float
+SpuCollisionObjectWrapper::getCollisionMargin () const
+{
+ return m_margin;
+}
+
+ppu_address_t
+SpuCollisionObjectWrapper::getCollisionObjectPtr () const
+{
+ return m_collisionObjectPtr;
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h
new file mode 100644
index 00000000000..36ea49209e2
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h
@@ -0,0 +1,40 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_COLLISION_OBJECT_WRAPPER_H
+#define SPU_COLLISION_OBJECT_WRAPPER_H
+
+#include "PlatformDefinitions.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper
+{
+protected:
+ int m_shapeType;
+ float m_margin;
+ ppu_address_t m_collisionObjectPtr;
+
+public:
+ SpuCollisionObjectWrapper ();
+
+ SpuCollisionObjectWrapper (const btCollisionObject* collisionObject);
+
+ int getShapeType () const;
+ float getCollisionMargin () const;
+ ppu_address_t getCollisionObjectPtr () const;
+};
+
+
+#endif //SPU_COLLISION_OBJECT_WRAPPER_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
new file mode 100644
index 00000000000..86eda8697d0
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
@@ -0,0 +1,318 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+//#define DEBUG_SPU_TASK_SCHEDULING 1
+
+
+//class OptimizedBvhNode;
+
+#include "SpuCollisionTaskProcess.h"
+
+
+
+
+void SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
+{
+ if (int(m_maxNumOutstandingTasks) != maxNumTasks)
+ {
+ m_maxNumOutstandingTasks = maxNumTasks;
+ m_taskBusy.resize(m_maxNumOutstandingTasks);
+ m_spuGatherTaskDesc.resize(m_maxNumOutstandingTasks);
+
+ for (int i = 0; i < m_taskBusy.size(); i++)
+ {
+ m_taskBusy[i] = false;
+ }
+
+ ///re-allocate task memory buffers
+ if (m_workUnitTaskBuffers != 0)
+ {
+ btAlignedFree(m_workUnitTaskBuffers);
+ }
+
+ m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
+ m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*6, 128);
+ }
+
+}
+
+
+
+SpuCollisionTaskProcess::SpuCollisionTaskProcess(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(0)
+{
+ m_workUnitTaskBuffers = (unsigned char *)0;
+ setNumTasks(maxNumOutstandingTasks);
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+ m_currentPage = 0;
+ m_currentPageEntry = 0;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+ m_initialized = false;
+#endif
+
+ m_threadInterface->startSPU();
+
+ //printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
+ printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput)));
+
+}
+
+SpuCollisionTaskProcess::~SpuCollisionTaskProcess()
+{
+
+ if (m_workUnitTaskBuffers != 0)
+ {
+ btAlignedFree(m_workUnitTaskBuffers);
+ m_workUnitTaskBuffers = 0;
+ }
+
+
+
+ m_threadInterface->stopSPU();
+
+}
+
+
+
+void SpuCollisionTaskProcess::initialize2(bool useEpa)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("SpuCollisionTaskProcess::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ for (int i = 0; i < int (m_maxNumOutstandingTasks); i++)
+ {
+ m_taskBusy[i] = false;
+ }
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+ m_currentPage = 0;
+ m_currentPageEntry = 0;
+ m_useEpa = useEpa;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+ m_initialized = true;
+ btAssert(MIDPHASE_NUM_WORKUNITS_PER_TASK*sizeof(SpuGatherAndProcessWorkUnitInput) <= MIDPHASE_WORKUNIT_TASK_SIZE);
+#endif
+}
+
+
+void SpuCollisionTaskProcess::issueTask2()
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("SpuCollisionTaskProcess::issueTask (m_currentTask= %d\n)", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ m_taskBusy[m_currentTask] = true;
+ m_numBusyTasks++;
+
+
+ SpuGatherAndProcessPairsTaskDesc& taskDesc = m_spuGatherTaskDesc[m_currentTask];
+ taskDesc.m_useEpa = m_useEpa;
+
+ {
+ // send task description in event message
+ // no error checking here...
+ // but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
+
+ taskDesc.m_inPairPtr = reinterpret_cast<uint64_t>(MIDPHASE_TASK_PTR(m_currentTask));
+
+ taskDesc.taskId = m_currentTask;
+ taskDesc.numPages = m_currentPage+1;
+ taskDesc.numOnLastPage = m_currentPageEntry;
+ }
+
+
+
+ m_threadInterface->sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc,m_currentTask);
+
+ // if all tasks busy, wait for spu event to clear the task.
+
+
+ if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+ {
+ unsigned int taskId;
+ unsigned int outputSize;
+
+
+ for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+
+ btAssert(taskId>=0);
+
+
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+// printf("issueTask taskId %d completed, numBusy=%d\n",taskId,m_numBusyTasks);
+
+ //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+ //postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+}
+
+void SpuCollisionTaskProcess::addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex)
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("#");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+ btAssert(m_initialized);
+ btAssert(m_workUnitTaskBuffers);
+
+#endif
+
+ bool batch = true;
+
+ if (batch)
+ {
+ if (m_currentPageEntry == MIDPHASE_NUM_WORKUNITS_PER_PAGE)
+ {
+ if (m_currentPage == MIDPHASE_NUM_WORKUNIT_PAGES-1)
+ {
+ // task buffer is full, issue current task.
+ // if all task buffers busy, this waits until SPU is done.
+ issueTask2();
+
+ // find new task buffer
+ for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ if (!m_taskBusy[i])
+ {
+ m_currentTask = i;
+ //init the task data
+
+ break;
+ }
+ }
+
+ m_currentPage = 0;
+ }
+ else
+ {
+ m_currentPage++;
+ }
+
+ m_currentPageEntry = 0;
+ }
+ }
+
+ {
+
+
+
+ SpuGatherAndProcessWorkUnitInput &wuInput =
+ *(reinterpret_cast<SpuGatherAndProcessWorkUnitInput*>
+ (MIDPHASE_ENTRY_PTR(m_currentTask, m_currentPage, m_currentPageEntry)));
+
+ wuInput.m_pairArrayPtr = reinterpret_cast<uint64_t>(pairArrayPtr);
+ wuInput.m_startIndex = startIndex;
+ wuInput.m_endIndex = endIndex;
+
+
+
+ m_currentPageEntry++;
+
+ if (!batch)
+ {
+ issueTask2();
+
+ // find new task buffer
+ for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ if (!m_taskBusy[i])
+ {
+ m_currentTask = i;
+ //init the task data
+
+ break;
+ }
+ }
+
+ m_currentPage = 0;
+ m_currentPageEntry =0;
+ }
+ }
+}
+
+
+void
+SpuCollisionTaskProcess::flush2()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ // if there's a partially filled task buffer, submit that task
+ if (m_currentPage > 0 || m_currentPageEntry > 0)
+ {
+ issueTask2();
+ }
+
+
+ // all tasks are issued, wait for all tasks to be complete
+ while(m_numBusyTasks > 0)
+ {
+ // Consolidating SPU code
+ unsigned int taskId=-1;
+ unsigned int outputSize;
+
+ for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+
+ btAssert(taskId>=0);
+
+
+ {
+
+ // SPURS support.
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+ }
+// printf("flush2 taskId %d completed, numBusy =%d \n",taskId,m_numBusyTasks);
+ //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+ //postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h
new file mode 100644
index 00000000000..2614be6c479
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h
@@ -0,0 +1,163 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_COLLISION_TASK_PROCESS_H
+#define SPU_COLLISION_TASK_PROCESS_H
+
+#include <assert.h>
+
+#include "LinearMath/btScalar.h"
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" // for definitions processCollisionTask and createCollisionLocalStoreMemory
+
+#include "btThreadSupportInterface.h"
+
+
+//#include "SPUAssert.h"
+#include <string.h>
+
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+#include <stdio.h>
+
+
+#define DEBUG_SpuCollisionTaskProcess 1
+
+
+#define CMD_GATHER_AND_PROCESS_PAIRLIST 1
+
+class btCollisionObject;
+class btPersistentManifold;
+class btDispatcher;
+
+
+/////Task Description for SPU collision detection
+//struct SpuGatherAndProcessPairsTaskDesc
+//{
+// uint64_t inPtr;//m_pairArrayPtr;
+// //mutex variable
+// uint32_t m_someMutexVariableInMainMemory;
+//
+// uint64_t m_dispatcher;
+//
+// uint32_t numOnLastPage;
+//
+// uint16_t numPages;
+// uint16_t taskId;
+//
+// struct CollisionTask_LocalStoreMemory* m_lsMemory;
+//}
+//
+//#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
+//__attribute__ ((aligned (16)))
+//#endif
+//;
+
+
+///MidphaseWorkUnitInput stores individual primitive versus mesh collision detection input, to be processed by the SPU.
+ATTRIBUTE_ALIGNED16(struct) SpuGatherAndProcessWorkUnitInput
+{
+ uint64_t m_pairArrayPtr;
+ int m_startIndex;
+ int m_endIndex;
+};
+
+
+
+
+/// SpuCollisionTaskProcess handles SPU processing of collision pairs.
+/// Maintains a set of task buffers.
+/// When the task is full, the task is issued for SPUs to process. Contact output goes into btPersistentManifold
+/// associated with each task.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class SpuCollisionTaskProcess
+{
+
+ unsigned char *m_workUnitTaskBuffers;
+
+
+ // track task buffers that are being used, and total busy tasks
+ btAlignedObjectArray<bool> m_taskBusy;
+ btAlignedObjectArray<SpuGatherAndProcessPairsTaskDesc> m_spuGatherTaskDesc;
+
+ class btThreadSupportInterface* m_threadInterface;
+
+ unsigned int m_maxNumOutstandingTasks;
+
+ unsigned int m_numBusyTasks;
+
+ // the current task and the current entry to insert a new work unit
+ unsigned int m_currentTask;
+ unsigned int m_currentPage;
+ unsigned int m_currentPageEntry;
+
+ bool m_useEpa;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+ bool m_initialized;
+#endif
+ void issueTask2();
+ //void postProcess(unsigned int taskId, int outputSize);
+
+public:
+ SpuCollisionTaskProcess(btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks);
+
+ ~SpuCollisionTaskProcess();
+
+ ///call initialize in the beginning of the frame, before addCollisionPairToTask
+ void initialize2(bool useEpa = false);
+
+ ///batch up additional work to a current task for SPU processing. When batch is full, it issues the task.
+ void addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex);
+
+ ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+ void flush2();
+
+ /// set the maximum number of SPU tasks allocated
+ void setNumTasks(int maxNumTasks);
+
+ int getNumTasks() const
+ {
+ return m_maxNumOutstandingTasks;
+ }
+};
+
+
+
+#define MIDPHASE_TASK_PTR(task) (&m_workUnitTaskBuffers[0] + MIDPHASE_WORKUNIT_TASK_SIZE*task)
+#define MIDPHASE_ENTRY_PTR(task,page,entry) (MIDPHASE_TASK_PTR(task) + MIDPHASE_WORKUNIT_PAGE_SIZE*page + sizeof(SpuGatherAndProcessWorkUnitInput)*entry)
+#define MIDPHASE_OUTPUT_PTR(task) (&m_contactOutputBuffers[0] + MIDPHASE_MAX_CONTACT_BUFFER_SIZE*task)
+#define MIDPHASE_TREENODES_PTR(task) (&m_complexShapeBuffers[0] + MIDPHASE_COMPLEX_SHAPE_BUFFER_SIZE*task)
+
+
+#define MIDPHASE_WORKUNIT_PAGE_SIZE (16)
+//#define MIDPHASE_WORKUNIT_PAGE_SIZE (128)
+
+#define MIDPHASE_NUM_WORKUNIT_PAGES 1
+#define MIDPHASE_WORKUNIT_TASK_SIZE (MIDPHASE_WORKUNIT_PAGE_SIZE*MIDPHASE_NUM_WORKUNIT_PAGES)
+#define MIDPHASE_NUM_WORKUNITS_PER_PAGE (MIDPHASE_WORKUNIT_PAGE_SIZE / sizeof(SpuGatherAndProcessWorkUnitInput))
+#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
+
+
+#endif // SPU_COLLISION_TASK_PROCESS_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
new file mode 100644
index 00000000000..286b63191ee
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+
+
+
+
+void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+ btAssert(0);
+}
+
+btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+ btAssert(0);
+ return 1.f;
+}
+
+#ifndef __SPU__
+SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
+:btCollisionAlgorithm(ci)
+#ifdef USE_SEPDISTANCE_UTIL
+,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())
+#endif //USE_SEPDISTANCE_UTIL
+{
+ m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+ m_shapeType0 = body0->getCollisionShape()->getShapeType();
+ m_shapeType1 = body1->getCollisionShape()->getShapeType();
+ m_collisionMargin0 = body0->getCollisionShape()->getMargin();
+ m_collisionMargin1 = body1->getCollisionShape()->getMargin();
+ m_collisionObject0 = body0;
+ m_collisionObject1 = body1;
+
+ if (body0->getCollisionShape()->isPolyhedral())
+ {
+ btPolyhedralConvexShape* convex0 = (btPolyhedralConvexShape*)body0->getCollisionShape();
+ m_shapeDimensions0 = convex0->getImplicitShapeDimensions();
+ }
+ if (body1->getCollisionShape()->isPolyhedral())
+ {
+ btPolyhedralConvexShape* convex1 = (btPolyhedralConvexShape*)body1->getCollisionShape();
+ m_shapeDimensions1 = convex1->getImplicitShapeDimensions();
+ }
+}
+#endif //__SPU__
+
+
+SpuContactManifoldCollisionAlgorithm::~SpuContactManifoldCollisionAlgorithm()
+{
+ if (m_manifoldPtr)
+ m_dispatcher->releaseManifold(m_manifoldPtr);
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
new file mode 100644
index 00000000000..151cb2c7966
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
@@ -0,0 +1,120 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "LinearMath/btTransformUtil.h"
+
+class btPersistentManifold;
+
+//#define USE_SEPDISTANCE_UTIL 1
+
+/// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU.
+ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm
+{
+ btVector3 m_shapeDimensions0;
+ btVector3 m_shapeDimensions1;
+ btPersistentManifold* m_manifoldPtr;
+ int m_shapeType0;
+ int m_shapeType1;
+ float m_collisionMargin0;
+ float m_collisionMargin1;
+
+ btCollisionObject* m_collisionObject0;
+ btCollisionObject* m_collisionObject1;
+
+
+
+
+public:
+
+ virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+ virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+
+ SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+#ifdef USE_SEPDISTANCE_UTIL
+ btConvexSeparatingDistanceUtil m_sepDistance;
+#endif //USE_SEPDISTANCE_UTIL
+
+ virtual ~SpuContactManifoldCollisionAlgorithm();
+
+ virtual void getAllContactManifolds(btManifoldArray& manifoldArray)
+ {
+ if (m_manifoldPtr)
+ manifoldArray.push_back(m_manifoldPtr);
+ }
+
+ btPersistentManifold* getContactManifoldPtr()
+ {
+ return m_manifoldPtr;
+ }
+
+ btCollisionObject* getCollisionObject0()
+ {
+ return m_collisionObject0;
+ }
+
+ btCollisionObject* getCollisionObject1()
+ {
+ return m_collisionObject1;
+ }
+
+ int getShapeType0() const
+ {
+ return m_shapeType0;
+ }
+
+ int getShapeType1() const
+ {
+ return m_shapeType1;
+ }
+ float getCollisionMargin0() const
+ {
+ return m_collisionMargin0;
+ }
+ float getCollisionMargin1() const
+ {
+ return m_collisionMargin1;
+ }
+
+ const btVector3& getShapeDimensions0() const
+ {
+ return m_shapeDimensions0;
+ }
+
+ const btVector3& getShapeDimensions1() const
+ {
+ return m_shapeDimensions1;
+ }
+
+ struct CreateFunc :public btCollisionAlgorithmCreateFunc
+ {
+ virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+ {
+ void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
+ return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
+ }
+ };
+
+};
+
+#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h b/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h
new file mode 100644
index 00000000000..a0695744bd5
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h
@@ -0,0 +1,110 @@
+#ifndef DOUBLE_BUFFER_H
+#define DOUBLE_BUFFER_H
+
+#include "SpuFakeDma.h"
+#include "LinearMath/btScalar.h"
+
+
+///DoubleBuffer
+template<class T, int size>
+class DoubleBuffer
+{
+#if defined(__SPU__) || defined(USE_LIBSPE2)
+ ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
+ ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
+#else
+ T m_buffer0[size];
+ T m_buffer1[size];
+#endif
+
+ T *m_frontBuffer;
+ T *m_backBuffer;
+
+ unsigned int m_dmaTag;
+ bool m_dmaPending;
+public:
+ bool isPending() const { return m_dmaPending;}
+ DoubleBuffer();
+
+ void init ();
+
+ // dma get and put commands
+ void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag);
+ void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag);
+
+ // gets pointer to a buffer
+ T *getFront();
+ T *getBack();
+
+ // if back buffer dma was started, wait for it to complete
+ // then move back to front and vice versa
+ T *swapBuffers();
+};
+
+template<class T, int size>
+DoubleBuffer<T,size>::DoubleBuffer()
+{
+ init ();
+}
+
+template<class T, int size>
+void DoubleBuffer<T,size>::init()
+{
+ this->m_dmaPending = false;
+ this->m_frontBuffer = &this->m_buffer0[0];
+ this->m_backBuffer = &this->m_buffer1[0];
+}
+
+template<class T, int size>
+void
+DoubleBuffer<T,size>::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag)
+{
+ m_dmaPending = true;
+ m_dmaTag = tag;
+ if (numBytes)
+ {
+ m_backBuffer = (T*)cellDmaLargeGetReadOnly(m_backBuffer, ea, numBytes, tag, 0, 0);
+ }
+}
+
+template<class T, int size>
+void
+DoubleBuffer<T,size>::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag)
+{
+ m_dmaPending = true;
+ m_dmaTag = tag;
+ cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0);
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::getFront()
+{
+ return m_frontBuffer;
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::getBack()
+{
+ return m_backBuffer;
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::swapBuffers()
+{
+ if (m_dmaPending)
+ {
+ cellDmaWaitTagStatusAll(1<<m_dmaTag);
+ m_dmaPending = false;
+ }
+
+ T *tmp = m_backBuffer;
+ m_backBuffer = m_frontBuffer;
+ m_frontBuffer = tmp;
+
+ return m_frontBuffer;
+}
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp
new file mode 100644
index 00000000000..62cef39612d
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp
@@ -0,0 +1,211 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuFakeDma.h"
+#include <LinearMath/btScalar.h> //for btAssert
+//Disabling memcpy sometimes helps debugging DMA
+
+#define USE_MEMCPY 1
+#ifdef USE_MEMCPY
+
+#endif
+
+
+void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+ cellDmaLargeGet(ls,ea,size,tag,tid,rid);
+ return ls;
+#else
+ return (void*)(uint32_t)ea;
+#endif
+}
+
+void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+ mfc_get(ls,ea,size,tag,0,0);
+ return ls;
+#else
+ return (void*)(uint32_t)ea;
+#endif
+}
+
+
+
+
+void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+ cellDmaGet(ls,ea,size,tag,tid,rid);
+ return ls;
+#else
+ return (void*)(uint32_t)ea;
+#endif
+}
+
+
+///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
+int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
+{
+
+ btAssert(size<32);
+
+ ATTRIBUTE_ALIGNED16(char tmpBuffer[32]);
+
+
+ char* localStore = (char*)ls;
+ uint32_t i;
+
+
+ ///make sure last 4 bits are the same, for cellDmaSmallGet
+ uint32_t last4BitsOffset = ea & 0x0f;
+ char* tmpTarget = tmpBuffer + last4BitsOffset;
+
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+
+ int remainingSize = size;
+
+//#define FORCE_cellDmaUnalignedGet 1
+#ifdef FORCE_cellDmaUnalignedGet
+ cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
+#else
+ char* remainingTmpTarget = tmpTarget;
+ uint64_t remainingEa = ea;
+
+ while (remainingSize)
+ {
+ switch (remainingSize)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ case 16:
+ {
+ mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0);
+ remainingSize=0;
+ break;
+ }
+ default:
+ {
+ //spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize);
+ int actualSize = 0;
+
+ if (remainingSize > 16)
+ actualSize = 16;
+ else
+ if (remainingSize >8)
+ actualSize=8;
+ else
+ if (remainingSize >4)
+ actualSize=4;
+ else
+ if (remainingSize >2)
+ actualSize=2;
+ mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0);
+ remainingSize-=actualSize;
+ remainingTmpTarget+=actualSize;
+ remainingEa += actualSize;
+ }
+ }
+ }
+#endif//FORCE_cellDmaUnalignedGet
+
+#else
+ char* mainMem = (char*)ea;
+ //copy into final destination
+#ifdef USE_MEMCPY
+
+ memcpy(tmpTarget,mainMem,size);
+#else
+ for ( i=0;i<size;i++)
+ {
+ tmpTarget[i] = mainMem[i];
+ }
+#endif //USE_MEMCPY
+
+#endif
+
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ //this is slowish, perhaps memcpy on SPU is smarter?
+ for (i=0; btLikely( i<size );i++)
+ {
+ localStore[i] = tmpTarget[i];
+ }
+
+ return 0;
+}
+
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+#else
+
+int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+ char* mainMem = (char*)ea;
+ char* localStore = (char*)ls;
+
+#ifdef USE_MEMCPY
+ memcpy(localStore,mainMem,size);
+#else
+ for (uint32_t i=0;i<size;i++)
+ {
+ localStore[i] = mainMem[i];
+ }
+#endif
+ return 0;
+}
+
+int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+ char* mainMem = (char*)ea;
+ char* localStore = (char*)ls;
+#ifdef USE_MEMCPY
+ memcpy(localStore,mainMem,size);
+#else
+ for (uint32_t i=0;i<size;i++)
+ {
+ localStore[i] = mainMem[i];
+ }
+#endif //#ifdef USE_MEMCPY
+ return 0;
+}
+
+int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+ char* mainMem = (char*)ea;
+ const char* localStore = (const char*)ls;
+#ifdef USE_MEMCPY
+ memcpy(mainMem,localStore,size);
+#else
+ for (uint32_t i=0;i<size;i++)
+ {
+ mainMem[i] = localStore[i];
+ }
+#endif //#ifdef USE_MEMCPY
+
+ return 0;
+}
+
+
+
+void cellDmaWaitTagStatusAll(int ignore)
+{
+
+}
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h
new file mode 100644
index 00000000000..f5e49b7be14
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h
@@ -0,0 +1,135 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef FAKE_DMA_H
+#define FAKE_DMA_H
+
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+
+#ifdef __SPU__
+
+#ifndef USE_LIBSPE2
+
+#include <cell/dma.h>
+#include <stdint.h>
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+
+#else // !USE_LIBSPE2
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+
+#include <spu_mfcio.h>
+
+#define DEBUG_DMA
+#ifdef DEBUG_DMA
+#define dUASSERT(a,b) if (!(a)) { printf(b);}
+#define uintsize ppu_address_t
+
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+ dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+ dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+ dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
+ dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+ dUASSERT(size < 16384, "size too big: "); \
+ dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+ dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+ printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+ } \
+ mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+ dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+ dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+ dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
+ dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+ dUASSERT(size < 16384, "size too big: "); \
+ dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+ dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+ printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+ } \
+ mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+ dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+ dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+ dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
+ dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+ dUASSERT(size < 16384, "size too big: "); \
+ dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+ dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+ printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
+ } \
+ mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+ dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+ dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+ dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
+ dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+ dUASSERT(size < 16384, "size too big: "); \
+ dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+ dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+ printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+ } \
+ mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+
+#else
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+#endif // DEBUG_DMA
+
+
+
+
+
+
+
+
+#endif // USE_LIBSPE2
+#else // !__SPU__
+//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
+//Potential to add networked simulation using this interface
+
+#define DMA_TAG(a) (a)
+#define DMA_MASK(a) (a)
+
+ /// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+ int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+ int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+ /// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+ int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+ /// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+ void cellDmaWaitTagStatusAll(int ignore);
+
+
+#endif //__CELLOS_LV2__
+
+///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
+int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
+
+
+void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+
+
+#endif //FAKE_DMA_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
new file mode 100644
index 00000000000..ee0832f12e2
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
@@ -0,0 +1,251 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuGatheringCollisionDispatcher.h"
+#include "SpuCollisionTaskProcess.h"
+
+
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h"
+#include "SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "LinearMath/btQuickprof.h"
+
+
+
+
+SpuGatheringCollisionDispatcher::SpuGatheringCollisionDispatcher(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration)
+:btCollisionDispatcher(collisionConfiguration),
+m_spuCollisionTaskProcess(0),
+m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+}
+
+
+bool SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,int proxyType1)
+{
+ bool supported0 = (
+ (proxyType0 == BOX_SHAPE_PROXYTYPE) ||
+ (proxyType0 == TRIANGLE_SHAPE_PROXYTYPE) ||
+ (proxyType0 == SPHERE_SHAPE_PROXYTYPE) ||
+ (proxyType0 == CAPSULE_SHAPE_PROXYTYPE) ||
+ (proxyType0 == CYLINDER_SHAPE_PROXYTYPE) ||
+// (proxyType0 == CONE_SHAPE_PROXYTYPE) ||
+ (proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
+ (proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
+ (proxyType0 == STATIC_PLANE_PROXYTYPE)||
+ (proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
+ );
+
+ bool supported1 = (
+ (proxyType1 == BOX_SHAPE_PROXYTYPE) ||
+ (proxyType1 == TRIANGLE_SHAPE_PROXYTYPE) ||
+ (proxyType1 == SPHERE_SHAPE_PROXYTYPE) ||
+ (proxyType1 == CAPSULE_SHAPE_PROXYTYPE) ||
+ (proxyType1 == CYLINDER_SHAPE_PROXYTYPE) ||
+// (proxyType1 == CONE_SHAPE_PROXYTYPE) ||
+ (proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
+ (proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
+ (proxyType1 == STATIC_PLANE_PROXYTYPE) ||
+ (proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
+ );
+
+
+ return supported0 && supported1;
+}
+
+
+
+SpuGatheringCollisionDispatcher::~SpuGatheringCollisionDispatcher()
+{
+ if (m_spuCollisionTaskProcess)
+ delete m_spuCollisionTaskProcess;
+
+}
+
+#include "stdio.h"
+
+
+
+///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc)
+///this is useful for the collision dispatcher.
+class btSpuCollisionPairCallback : public btOverlapCallback
+{
+ const btDispatcherInfo& m_dispatchInfo;
+ SpuGatheringCollisionDispatcher* m_dispatcher;
+
+public:
+
+ btSpuCollisionPairCallback(const btDispatcherInfo& dispatchInfo, SpuGatheringCollisionDispatcher* dispatcher)
+ :m_dispatchInfo(dispatchInfo),
+ m_dispatcher(dispatcher)
+ {
+ }
+
+ virtual bool processOverlap(btBroadphasePair& collisionPair)
+ {
+
+
+ //PPU version
+ //(*m_dispatcher->getNearCallback())(collisionPair,*m_dispatcher,m_dispatchInfo);
+
+ //only support discrete collision detection for now, we could fallback on PPU/unoptimized version for TOI/CCD
+ btAssert(m_dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE);
+
+ //by default, Bullet will use this near callback
+ {
+ ///userInfo is used to determine if the SPU has to handle this case or not (skip PPU tasks)
+ if (!collisionPair.m_internalTmpValue)
+ {
+ collisionPair.m_internalTmpValue = 1;
+ }
+ if (!collisionPair.m_algorithm)
+ {
+ btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+ btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+
+ btCollisionAlgorithmConstructionInfo ci;
+ ci.m_dispatcher1 = m_dispatcher;
+ ci.m_manifold = 0;
+
+ if (m_dispatcher->needsCollision(colObj0,colObj1))
+ {
+ int proxyType0 = colObj0->getCollisionShape()->getShapeType();
+ int proxyType1 = colObj1->getCollisionShape()->getShapeType();
+ if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1)
+ && (colObj0->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING)
+ && (colObj1->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING)
+ )
+ {
+ int so = sizeof(SpuContactManifoldCollisionAlgorithm);
+#ifdef ALLOCATE_SEPARATELY
+ void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
+#else
+ void* mem = m_dispatcher->allocateCollisionAlgorithm(so);
+#endif
+ collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1);
+ collisionPair.m_internalTmpValue = 2;
+ } else
+ {
+ collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
+ collisionPair.m_internalTmpValue = 3;
+ }
+ }
+ }
+ }
+ return false;
+ }
+};
+
+void SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo, btDispatcher* dispatcher)
+{
+
+ if (dispatchInfo.m_enableSPU)
+ {
+ m_maxNumOutstandingTasks = m_threadInterface->getNumTasks();
+
+ {
+ BT_PROFILE("processAllOverlappingPairs");
+
+ if (!m_spuCollisionTaskProcess)
+ m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
+
+ m_spuCollisionTaskProcess->setNumTasks(m_maxNumOutstandingTasks);
+ // printf("m_maxNumOutstandingTasks =%d\n",m_maxNumOutstandingTasks);
+
+ m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
+
+
+ ///modified version of btCollisionDispatcher::dispatchAllCollisionPairs:
+ {
+ btSpuCollisionPairCallback collisionCallback(dispatchInfo,this);
+
+ pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher);
+ }
+ }
+
+ //send one big batch
+ int numTotalPairs = pairCache->getNumOverlappingPairs();
+
+ btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
+ int i;
+ {
+ int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS;
+ if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS))
+ {
+ pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1;
+ }
+
+ BT_PROFILE("addWorkToTask");
+ for (i=0;i<numTotalPairs;)
+ {
+ //Performance Hint: tweak this number during benchmarking
+
+ int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
+ m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
+ i = endIndex;
+ }
+ }
+
+ {
+ BT_PROFILE("PPU fallback");
+ //handle PPU fallback pairs
+ for (i=0;i<numTotalPairs;i++)
+ {
+ btBroadphasePair& collisionPair = pairPtr[i];
+ if (collisionPair.m_internalTmpValue == 3)
+ {
+ if (collisionPair.m_algorithm)
+ {
+ btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+ btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+
+ if (dispatcher->needsCollision(colObj0,colObj1))
+ {
+ btManifoldResult contactPointResult(colObj0,colObj1);
+
+ if (dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE)
+ {
+ //discrete collision detection query
+ collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
+ } else
+ {
+ //continuous collision detection query, time of impact (toi)
+ btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
+ if (dispatchInfo.m_timeOfImpact > toi)
+ dispatchInfo.m_timeOfImpact = toi;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ {
+ BT_PROFILE("flush2");
+ //make sure all SPU work is done
+ m_spuCollisionTaskProcess->flush2();
+ }
+
+ } else
+ {
+ ///PPU fallback
+ ///!Need to make sure to clear all 'algorithms' when switching between SPU and PPU
+ btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher);
+ }
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
new file mode 100644
index 00000000000..7d5be88d71d
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
@@ -0,0 +1,72 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H
+#define SPU_GATHERING_COLLISION__DISPATCHER_H
+
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+
+
+///Tuning value to optimized SPU utilization
+///Too small value means Task overhead is large compared to computation (too fine granularity)
+///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
+#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
+
+
+
+class SpuCollisionTaskProcess;
+
+///SpuGatheringCollisionDispatcher can use SPU to gather and calculate collision detection
+///Time of Impact, Closest Points and Penetration Depth.
+class SpuGatheringCollisionDispatcher : public btCollisionDispatcher
+{
+
+ SpuCollisionTaskProcess* m_spuCollisionTaskProcess;
+
+protected:
+
+ class btThreadSupportInterface* m_threadInterface;
+
+ unsigned int m_maxNumOutstandingTasks;
+
+
+public:
+
+ //can be used by SPU collision algorithms
+ SpuCollisionTaskProcess* getSpuCollisionTaskProcess()
+ {
+ return m_spuCollisionTaskProcess;
+ }
+
+ SpuGatheringCollisionDispatcher (class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration);
+
+ virtual ~SpuGatheringCollisionDispatcher();
+
+ bool supportsDispatchPairOnSpu(int proxyType0,int proxyType1);
+
+ virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
+
+};
+
+
+
+#endif //SPU_GATHERING_COLLISION__DISPATCHER_H
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp
new file mode 100644
index 00000000000..a312450ed72
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp
@@ -0,0 +1,257 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifdef USE_LIBSPE2
+
+#include "SpuLibspe2Support.h"
+
+
+
+
+//SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+///Setup and initialize SPU/CELL/Libspe2
+SpuLibspe2Support::SpuLibspe2Support(spe_program_handle_t *speprog, int numThreads)
+{
+ this->program = speprog;
+ this->numThreads = ((numThreads <= spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)) ? numThreads : spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1));
+}
+
+///cleanup/shutdown Libspe2
+SpuLibspe2Support::~SpuLibspe2Support()
+{
+
+ stopSPU();
+}
+
+
+
+///send messages to SPUs
+void SpuLibspe2Support::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1)
+{
+ spe_context_ptr_t context;
+
+ switch (uiCommand)
+ {
+ case CMD_SAMPLE_TASK_COMMAND:
+ {
+ //get taskdescription
+ SpuSampleTaskDesc* taskDesc = (SpuSampleTaskDesc*) uiArgument0;
+
+ btAssert(taskDesc->m_taskId<m_activeSpuStatus.size());
+
+ //get status of SPU on which task should run
+ btSpuStatus& spuStatus = m_activeSpuStatus[taskDesc->m_taskId];
+
+ //set data for spuStatus
+ spuStatus.m_commandId = uiCommand;
+ spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
+ spuStatus.m_taskDesc.p = taskDesc;
+
+ //get context
+ context = data[taskDesc->m_taskId].context;
+
+
+ taskDesc->m_mainMemoryPtr = reinterpret_cast<uint64_t> (spuStatus.m_lsMemory.p);
+
+
+ break;
+ }
+ case CMD_GATHER_AND_PROCESS_PAIRLIST:
+ {
+ //get taskdescription
+ SpuGatherAndProcessPairsTaskDesc* taskDesc = (SpuGatherAndProcessPairsTaskDesc*) uiArgument0;
+
+ btAssert(taskDesc->taskId<m_activeSpuStatus.size());
+
+ //get status of SPU on which task should run
+ btSpuStatus& spuStatus = m_activeSpuStatus[taskDesc->taskId];
+
+ //set data for spuStatus
+ spuStatus.m_commandId = uiCommand;
+ spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
+ spuStatus.m_taskDesc.p = taskDesc;
+
+ //get context
+ context = data[taskDesc->taskId].context;
+
+
+ taskDesc->m_lsMemory = (CollisionTask_LocalStoreMemory*)spuStatus.m_lsMemory.p;
+
+ break;
+ }
+ default:
+ {
+ ///not implemented
+ btAssert(0);
+ }
+
+ };
+
+
+ //write taskdescription in mailbox
+ unsigned int event = Spu_Mailbox_Event_Task;
+ spe_in_mbox_write(context, &event, 1, SPE_MBOX_ANY_NONBLOCKING);
+
+}
+
+///check for messages from SPUs
+void SpuLibspe2Support::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+ ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+
+ ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+ btAssert(m_activeSpuStatus.size());
+
+
+ int last = -1;
+
+ //find an active spu/thread
+ while(last < 0)
+ {
+ for (int i=0;i<m_activeSpuStatus.size();i++)
+ {
+ if ( m_activeSpuStatus[i].m_status == Spu_Status_Free)
+ {
+ last = i;
+ break;
+ }
+ }
+ if(last < 0)
+ sched_yield();
+ }
+
+
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[last];
+
+ ///need to find an active spu
+ btAssert(last>=0);
+
+
+
+ *puiArgument0 = spuStatus.m_taskId;
+ *puiArgument1 = spuStatus.m_status;
+
+
+}
+
+
+void SpuLibspe2Support::startSPU()
+{
+ this->internal_startSPU();
+}
+
+
+
+///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+void SpuLibspe2Support::internal_startSPU()
+{
+ m_activeSpuStatus.resize(numThreads);
+
+
+ for (int i=0; i < numThreads; i++)
+ {
+
+ if(data[i].context == NULL)
+ {
+
+ /* Create context */
+ if ((data[i].context = spe_context_create(0, NULL)) == NULL)
+ {
+ perror ("Failed creating context");
+ exit(1);
+ }
+
+ /* Load program into context */
+ if(spe_program_load(data[i].context, this->program))
+ {
+ perror ("Failed loading program");
+ exit(1);
+ }
+
+ m_activeSpuStatus[i].m_status = Spu_Status_Startup;
+ m_activeSpuStatus[i].m_taskId = i;
+ m_activeSpuStatus[i].m_commandId = 0;
+ m_activeSpuStatus[i].m_lsMemory.p = NULL;
+
+
+ data[i].entry = SPE_DEFAULT_ENTRY;
+ data[i].flags = 0;
+ data[i].argp.p = &m_activeSpuStatus[i];
+ data[i].envp.p = NULL;
+
+ /* Create thread for each SPE context */
+ if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) ))
+ {
+ perror ("Failed creating thread");
+ exit(1);
+ }
+ /*
+ else
+ {
+ printf("started thread %d\n",i);
+ }*/
+ }
+ }
+
+
+ for (int i=0; i < numThreads; i++)
+ {
+ if(data[i].context != NULL)
+ {
+ while( m_activeSpuStatus[i].m_status == Spu_Status_Startup)
+ {
+ // wait for spu to set up
+ sched_yield();
+ }
+ printf("Spu %d is ready\n", i);
+ }
+ }
+}
+
+///tell the task scheduler we are done with the SPU tasks
+void SpuLibspe2Support::stopSPU()
+{
+ // wait for all threads to finish
+ int i;
+ for ( i = 0; i < this->numThreads; i++ )
+ {
+
+ unsigned int event = Spu_Mailbox_Event_Shutdown;
+ spe_context_ptr_t context = data[i].context;
+ spe_in_mbox_write(context, &event, 1, SPE_MBOX_ALL_BLOCKING);
+ pthread_join (data[i].pthread, NULL);
+
+ }
+ // close SPE program
+ spe_image_close(program);
+ // destroy SPE contexts
+ for ( i = 0; i < this->numThreads; i++ )
+ {
+ if(data[i].context != NULL)
+ {
+ spe_context_destroy (data[i].context);
+ }
+ }
+
+ m_activeSpuStatus.clear();
+
+}
+
+
+
+#endif //USE_LIBSPE2
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h
new file mode 100644
index 00000000000..a6d6baca47b
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h
@@ -0,0 +1,180 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef SPU_LIBSPE2_SUPPORT_H
+#define SPU_LIBSPE2_SUPPORT_H
+
+#include <LinearMath/btScalar.h> //for uint32_t etc.
+
+#ifdef USE_LIBSPE2
+
+#include <stdlib.h>
+#include <stdio.h>
+//#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+#include "PlatformDefinitions.h"
+
+
+//extern struct SpuGatherAndProcessPairsTaskDesc;
+
+enum
+{
+ Spu_Mailbox_Event_Nothing = 0,
+ Spu_Mailbox_Event_Task = 1,
+ Spu_Mailbox_Event_Shutdown = 2,
+
+ Spu_Mailbox_Event_ForceDword = 0xFFFFFFFF
+
+};
+
+enum
+{
+ Spu_Status_Free = 0,
+ Spu_Status_Occupied = 1,
+ Spu_Status_Startup = 2,
+
+ Spu_Status_ForceDword = 0xFFFFFFFF
+
+};
+
+
+struct btSpuStatus
+{
+ uint32_t m_taskId;
+ uint32_t m_commandId;
+ uint32_t m_status;
+
+ addr64 m_taskDesc;
+ addr64 m_lsMemory;
+
+}
+__attribute__ ((aligned (128)))
+;
+
+
+
+#ifndef __SPU__
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "SpuCollisionTaskProcess.h"
+#include "SpuSampleTaskProcess.h"
+#include "btThreadSupportInterface.h"
+#include <libspe2.h>
+#include <pthread.h>
+#include <sched.h>
+
+#define MAX_SPUS 4
+
+typedef struct ppu_pthread_data
+{
+ spe_context_ptr_t context;
+ pthread_t pthread;
+ unsigned int entry;
+ unsigned int flags;
+ addr64 argp;
+ addr64 envp;
+ spe_stop_info_t stopinfo;
+} ppu_pthread_data_t;
+
+
+static void *ppu_pthread_function(void *arg)
+{
+ ppu_pthread_data_t * datap = (ppu_pthread_data_t *)arg;
+ /*
+ int rc;
+ do
+ {*/
+ spe_context_run(datap->context, &datap->entry, datap->flags, datap->argp.p, datap->envp.p, &datap->stopinfo);
+ if (datap->stopinfo.stop_reason == SPE_EXIT)
+ {
+ if (datap->stopinfo.result.spe_exit_code != 0)
+ {
+ perror("FAILED: SPE returned a non-zero exit status: \n");
+ exit(1);
+ }
+ }
+ else
+ {
+ perror("FAILED: SPE abnormally terminated\n");
+ exit(1);
+ }
+
+
+ //} while (rc > 0); // loop until exit or error, and while any stop & signal
+ pthread_exit(NULL);
+}
+
+
+
+
+
+
+///SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class SpuLibspe2Support : public btThreadSupportInterface
+{
+
+ btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
+
+public:
+ //Setup and initialize SPU/CELL/Libspe2
+ SpuLibspe2Support(spe_program_handle_t *speprog,int numThreads);
+
+ // SPE program handle ptr.
+ spe_program_handle_t *program;
+
+ // SPE program data
+ ppu_pthread_data_t data[MAX_SPUS];
+
+ //cleanup/shutdown Libspe2
+ ~SpuLibspe2Support();
+
+ ///send messages to SPUs
+ void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1=0);
+
+ //check for messages from SPUs
+ void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+ //start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ virtual void startSPU();
+
+ //tell the task scheduler we are done with the SPU tasks
+ virtual void stopSPU();
+
+ virtual void setNumTasks(int numTasks)
+ {
+ //changing the number of tasks after initialization is not implemented (yet)
+ }
+
+private:
+
+ ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ void internal_startSPU();
+
+
+
+
+ int numThreads;
+
+};
+
+#endif // NOT __SPU__
+
+#endif //USE_LIBSPE2
+
+#endif //SPU_LIBSPE2_SUPPORT_H
+
+
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
new file mode 100644
index 00000000000..9bc2ebf51ec
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
@@ -0,0 +1,172 @@
+/*
+ Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BOX_H__
+#define __BOX_H__
+
+
+#ifndef PE_REF
+#define PE_REF(a) a&
+#endif
+
+#include <math.h>
+
+///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH
+#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH)
+#include <vectormath_aos.h>
+#else
+#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
+#endif
+
+
+
+using namespace Vectormath::Aos;
+
+enum FeatureType { F, E, V };
+
+//----------------------------------------------------------------------------
+// Box
+//----------------------------------------------------------------------------
+///The Box is an internal class used by the boxBoxDistance calculation.
+class Box
+{
+public:
+ Vector3 half;
+
+ inline Box()
+ {}
+ inline Box(PE_REF(Vector3) half_);
+ inline Box(float hx, float hy, float hz);
+
+ inline void Set(PE_REF(Vector3) half_);
+ inline void Set(float hx, float hy, float hz);
+
+ inline Vector3 GetAABB(const Matrix3& rotation) const;
+};
+
+inline
+Box::Box(PE_REF(Vector3) half_)
+{
+ Set(half_);
+}
+
+inline
+Box::Box(float hx, float hy, float hz)
+{
+ Set(hx, hy, hz);
+}
+
+inline
+void
+Box::Set(PE_REF(Vector3) half_)
+{
+ half = half_;
+}
+
+inline
+void
+Box::Set(float hx, float hy, float hz)
+{
+ half = Vector3(hx, hy, hz);
+}
+
+inline
+Vector3
+Box::GetAABB(const Matrix3& rotation) const
+{
+ return absPerElem(rotation) * half;
+}
+
+//-------------------------------------------------------------------------------------------------
+// BoxPoint
+//-------------------------------------------------------------------------------------------------
+
+///The BoxPoint class is an internally used class to contain feature information for boxBoxDistance calculation.
+class BoxPoint
+{
+public:
+ BoxPoint() : localPoint(0.0f) {}
+
+ Point3 localPoint;
+ FeatureType featureType;
+ int featureIdx;
+
+ inline void setVertexFeature(int plusX, int plusY, int plusZ);
+ inline void setEdgeFeature(int dim0, int plus0, int dim1, int plus1);
+ inline void setFaceFeature(int dim, int plus);
+
+ inline void getVertexFeature(int & plusX, int & plusY, int & plusZ) const;
+ inline void getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const;
+ inline void getFaceFeature(int & dim, int & plus) const;
+};
+
+inline
+void
+BoxPoint::setVertexFeature(int plusX, int plusY, int plusZ)
+{
+ featureType = V;
+ featureIdx = plusX << 2 | plusY << 1 | plusZ;
+}
+
+inline
+void
+BoxPoint::setEdgeFeature(int dim0, int plus0, int dim1, int plus1)
+{
+ featureType = E;
+
+ if (dim0 > dim1) {
+ featureIdx = plus1 << 5 | dim1 << 3 | plus0 << 2 | dim0;
+ } else {
+ featureIdx = plus0 << 5 | dim0 << 3 | plus1 << 2 | dim1;
+ }
+}
+
+inline
+void
+BoxPoint::setFaceFeature(int dim, int plus)
+{
+ featureType = F;
+ featureIdx = plus << 2 | dim;
+}
+
+inline
+void
+BoxPoint::getVertexFeature(int & plusX, int & plusY, int & plusZ) const
+{
+ plusX = featureIdx >> 2;
+ plusY = featureIdx >> 1 & 1;
+ plusZ = featureIdx & 1;
+}
+
+inline
+void
+BoxPoint::getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const
+{
+ plus0 = featureIdx >> 5;
+ dim0 = featureIdx >> 3 & 3;
+ plus1 = featureIdx >> 2 & 1;
+ dim1 = featureIdx & 3;
+}
+
+inline
+void
+BoxPoint::getFaceFeature(int & dim, int & plus) const
+{
+ plus = featureIdx >> 2;
+ dim = featureIdx & 3;
+}
+
+#endif /* __BOX_H__ */
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
new file mode 100644
index 00000000000..dfcd8426695
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
@@ -0,0 +1,302 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "SpuCollisionShapes.h"
+
+///not supported on IBM SDK, until we fix the alignment of btVector3
+#if defined (__CELLOS_LV2__) && defined (__SPU__)
+#include <spu_intrinsics.h>
+static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 )
+{
+ vec_float4 result;
+ result = spu_mul( vec0, vec1 );
+ result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+ return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+#endif //__SPU__
+
+
+void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform)
+{
+ //calculate the aabb, given the types...
+ switch (shapeType)
+ {
+ case CYLINDER_SHAPE_PROXYTYPE:
+ /* fall through */
+ case BOX_SHAPE_PROXYTYPE:
+ {
+ btScalar margin=convexShape->getMarginNV();
+ btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
+ halfExtents += btVector3(margin,margin,margin);
+ const btTransform& t = xform;
+ btMatrix3x3 abs_b = t.getBasis().absolute();
+ btVector3 center = t.getOrigin();
+ btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+
+ aabbMin = center - extent;
+ aabbMax = center + extent;
+ break;
+ }
+ case CAPSULE_SHAPE_PROXYTYPE:
+ {
+ btScalar margin=convexShape->getMarginNV();
+ btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
+ //add the radius to y-axis to get full height
+ btScalar radius = halfExtents[0];
+ halfExtents[1] += radius;
+ halfExtents += btVector3(margin,margin,margin);
+#if 0
+ int capsuleUpAxis = convexShape->getUpAxis();
+ btScalar halfHeight = convexShape->getHalfHeight();
+ btScalar radius = convexShape->getRadius();
+ halfExtents[capsuleUpAxis] = radius + halfHeight;
+#endif
+ const btTransform& t = xform;
+ btMatrix3x3 abs_b = t.getBasis().absolute();
+ btVector3 center = t.getOrigin();
+ btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+
+ aabbMin = center - extent;
+ aabbMax = center + extent;
+ break;
+ }
+ case SPHERE_SHAPE_PROXYTYPE:
+ {
+ btScalar radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
+ btScalar margin = radius + convexShape->getMarginNV();
+ const btTransform& t = xform;
+ const btVector3& center = t.getOrigin();
+ btVector3 extent(margin,margin,margin);
+ aabbMin = center - extent;
+ aabbMax = center + extent;
+ break;
+ }
+ case CONVEX_HULL_SHAPE_PROXYTYPE:
+ {
+ ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+ cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0;
+ const btTransform& t = xform;
+ btScalar margin = convexShape->getMarginNV();
+ localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin);
+ //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ());
+ //spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ());
+ break;
+ }
+ default:
+ {
+ // spu_printf("SPU: unsupported shapetype %d in AABB calculation\n");
+ }
+ };
+}
+
+void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape)
+{
+ register int dmaSize;
+ register ppu_address_t dmaPpuAddress2;
+
+ dmaSize = sizeof(btTriangleIndexVertexArray);
+ dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getMeshInterface());
+ // spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
+#ifdef __SPU__
+ cellDmaGet(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ bvhMeshShape->gTriangleMeshInterfacePtr = &bvhMeshShape->gTriangleMeshInterfaceStorage;
+#else
+ bvhMeshShape->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+#endif
+
+ //cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ ///now DMA over the BVH
+
+ dmaSize = sizeof(btOptimizedBvh);
+ dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getOptimizedBvh());
+ //spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2);
+ cellDmaGet(&bvhMeshShape->gOptimizedBvh, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(2));
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+}
+
+void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag)
+{
+ cellDmaGet(IndexMesh, (ppu_address_t)&indexArray[index] , sizeof(btIndexedMesh), DMA_TAG(dmaTag), 0, 0);
+
+}
+
+void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag)
+{
+ cellDmaGet(subTreeHeaders, subTreePtr, batchSize * sizeof(btBvhSubtreeInfo), DMA_TAG(dmaTag), 0, 0);
+}
+
+void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag)
+{
+ cellDmaGet(nodes, reinterpret_cast<ppu_address_t>(&nodeArray[subtree.m_rootNodeIndex]) , subtree.m_subtreeSize* sizeof(btQuantizedBvhNode), DMA_TAG(2), 0, 0);
+}
+
+///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck
+int getShapeTypeSize(int shapeType)
+{
+
+
+ switch (shapeType)
+ {
+ case CYLINDER_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btCylinderShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+ case BOX_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btBoxShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+ case SPHERE_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btSphereShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+ case TRIANGLE_MESH_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btBvhTriangleMeshShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+ case CAPSULE_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btCapsuleShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+
+ case CONVEX_HULL_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btConvexHullShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+
+ case COMPOUND_SHAPE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btCompoundShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+ case STATIC_PLANE_PROXYTYPE:
+ {
+ int shapeSize = sizeof(btStaticPlaneShape);
+ btAssert(shapeSize < MAX_SHAPE_SIZE);
+ return shapeSize;
+ }
+
+ default:
+ btAssert(0);
+ //unsupported shapetype, please add here
+ return 0;
+ }
+}
+
+void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU)
+{
+ convexVertexData->gNumConvexPoints = convexShapeSPU->getNumPoints();
+ if (convexVertexData->gNumConvexPoints>MAX_NUM_SPU_CONVEX_POINTS)
+ {
+ btAssert(0);
+ // spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,convexVertexData->gNumConvexPoints);
+ return;
+ }
+
+ register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btVector3);
+ ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getUnscaledPoints();
+ cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU , dmaSize, DMA_TAG(2), 0, 0);
+}
+
+void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType)
+{
+ register int dmaSize = getShapeTypeSize(shapeType);
+ cellDmaGet(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0);
+ //cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
+}
+
+void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
+{
+ register int dmaSize;
+ register ppu_address_t dmaPpuAddress2;
+ int childShapeCount = spuCompoundShape->getNumChildShapes();
+ dmaSize = childShapeCount * sizeof(btCompoundShapeChild);
+ dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList();
+ cellDmaGet(&compoundShapeLocation->gSubshapes[0], dmaPpuAddress2, dmaSize, DMA_TAG(dmaTag), 0, 0);
+}
+
+void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
+{
+ int childShapeCount = spuCompoundShape->getNumChildShapes();
+ int i;
+ // DMA all the subshapes
+ for ( i = 0; i < childShapeCount; ++i)
+ {
+ btCompoundShapeChild& childShape = compoundShapeLocation->gSubshapes[i];
+ dmaCollisionShape (&compoundShapeLocation->gSubshapeShape[i],(ppu_address_t)childShape.m_childShape, dmaTag, childShape.m_childShapeType);
+ }
+}
+
+
+void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex)
+{
+
+ int curIndex = startNodeIndex;
+ int walkIterations = 0;
+#ifdef BT_DEBUG
+ int subTreeSize = endNodeIndex - startNodeIndex;
+#endif
+
+ int escapeIndex;
+
+ unsigned int aabbOverlap, isLeafNode;
+
+ while (curIndex < endNodeIndex)
+ {
+ //catch bugs in tree data
+ btAssert (walkIterations < subTreeSize);
+
+ walkIterations++;
+ aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
+ isLeafNode = rootNode->isLeafNode();
+
+ if (isLeafNode && aabbOverlap)
+ {
+ //printf("overlap with node %d\n",rootNode->getTriangleIndex());
+ nodeCallback->processNode(0,rootNode->getTriangleIndex());
+ // spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex());
+ }
+
+ if (aabbOverlap || isLeafNode)
+ {
+ rootNode++;
+ curIndex++;
+ } else
+ {
+ escapeIndex = rootNode->getEscapeIndex();
+ rootNode += escapeIndex;
+ curIndex += escapeIndex;
+ }
+ }
+
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
new file mode 100644
index 00000000000..d369395e160
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
@@ -0,0 +1,126 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef __SPU_COLLISION_SHAPES_H
+#define __SPU_COLLISION_SHAPES_H
+
+#include "../SpuDoubleBuffer.h"
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
+#include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+
+#define MAX_NUM_SPU_CONVEX_POINTS 128
+
+ATTRIBUTE_ALIGNED16(struct) SpuConvexPolyhedronVertexData
+{
+ void* gSpuConvexShapePtr;
+ btVector3* gConvexPoints;
+ int gNumConvexPoints;
+ int unused;
+ ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
+};
+
+#define MAX_SHAPE_SIZE 256
+
+ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
+{
+ ATTRIBUTE_ALIGNED16(char collisionShape[MAX_SHAPE_SIZE]);
+};
+
+ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory
+{
+ // Compound data
+#define MAX_SPU_COMPOUND_SUBSHAPES 16
+ ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
+ ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
+};
+
+ATTRIBUTE_ALIGNED16(struct) bvhMeshShape_LocalStoreMemory
+{
+ //ATTRIBUTE_ALIGNED16(btOptimizedBvh gOptimizedBvh);
+ ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]);
+ btOptimizedBvh* getOptimizedBvh()
+ {
+ return (btOptimizedBvh*) gOptimizedBvh;
+ }
+
+ ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterfaceStorage);
+ btTriangleIndexVertexArray* gTriangleMeshInterfacePtr;
+ ///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment
+ ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh);
+ #define MAX_SPU_SUBTREE_HEADERS 32
+ //1024
+ ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]);
+ ATTRIBUTE_ALIGNED16(btQuantizedBvhNode gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]);
+};
+
+
+void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform);
+void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape);
+void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag);
+void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag);
+void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag);
+
+int getShapeTypeSize(int shapeType);
+void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU);
+void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType);
+void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
+void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
+
+
+#define USE_BRANCHFREE_TEST 1
+#ifdef USE_BRANCHFREE_TEST
+SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
+{
+#if defined(__CELLOS_LV2__) && defined (__SPU__)
+ vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0};
+ vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0};
+ vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax);
+ return spu_extract(spu_gather(isGt),0)==0;
+
+#else
+ return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
+ & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
+ & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
+ 1, 0);
+#endif
+}
+#else
+
+SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
+{
+ unsigned int overlap = 1;
+ overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap;
+ overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap;
+ overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap;
+ return overlap;
+}
+#endif
+
+void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex);
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
new file mode 100644
index 00000000000..8e540d9297b
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
@@ -0,0 +1,242 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuContactResult.h"
+
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+#ifndef __SPU__
+#include <stdio.h>
+#define spu_printf printf
+#endif
+#endif DEBUG_SPU_COLLISION_DETECTION
+
+SpuContactResult::SpuContactResult()
+{
+ m_manifoldAddress = 0;
+ m_spuManifold = NULL;
+ m_RequiresWriteBack = false;
+}
+
+ SpuContactResult::~SpuContactResult()
+{
+ g_manifoldDmaExport.swapBuffers();
+}
+
+ ///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
+inline btScalar calculateCombinedFriction(btScalar friction0,btScalar friction1)
+{
+ btScalar friction = friction0*friction1;
+
+ const btScalar MAX_FRICTION = btScalar(10.);
+
+ if (friction < -MAX_FRICTION)
+ friction = -MAX_FRICTION;
+ if (friction > MAX_FRICTION)
+ friction = MAX_FRICTION;
+ return friction;
+
+}
+
+inline btScalar calculateCombinedRestitution(btScalar restitution0,btScalar restitution1)
+{
+ return restitution0*restitution1;
+}
+
+
+
+ void SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped)
+ {
+ //spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress);
+ m_rootWorldTransform0 = worldTrans0;
+ m_rootWorldTransform1 = worldTrans1;
+ m_manifoldAddress = manifoldAddress;
+ m_spuManifold = spuManifold;
+
+ m_combinedFriction = calculateCombinedFriction(friction0,friction1);
+ m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1);
+ m_isSwapped = isSwapped;
+ }
+
+ void SpuContactResult::setShapeIdentifiersA(int partId0,int index0)
+ {
+
+ }
+
+ void SpuContactResult::setShapeIdentifiersB(int partId1,int index1)
+ {
+
+ }
+
+
+
+ ///return true if it requires a dma transfer back
+bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
+ const btVector3& pointInWorld,
+ float depth,
+ btPersistentManifold* manifoldPtr,
+ btTransform& transA,
+ btTransform& transB,
+ btScalar combinedFriction,
+ btScalar combinedRestitution,
+ bool isSwapped)
+{
+
+// float contactTreshold = manifoldPtr->getContactBreakingThreshold();
+
+ //spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr);
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ spu_printf("SPU: contactTreshold %f\n",contactTreshold);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+ if (depth > manifoldPtr->getContactBreakingThreshold())
+ return false;
+
+ btVector3 pointA;
+ btVector3 localA;
+ btVector3 localB;
+ btVector3 normal;
+
+
+ if (isSwapped)
+ {
+ normal = normalOnBInWorld * -1;
+ pointA = pointInWorld + normal * depth;
+ localA = transA.invXform(pointA );
+ localB = transB.invXform(pointInWorld);
+ }
+ else
+ {
+ normal = normalOnBInWorld;
+ pointA = pointInWorld + normal * depth;
+ localA = transA.invXform(pointA );
+ localB = transB.invXform(pointInWorld);
+ }
+
+ btManifoldPoint newPt(localA,localB,normal,depth);
+ newPt.m_positionWorldOnA = pointA;
+ newPt.m_positionWorldOnB = pointInWorld;
+
+ newPt.m_combinedFriction = combinedFriction;
+ newPt.m_combinedRestitution = combinedRestitution;
+
+
+ int insertIndex = manifoldPtr->getCacheEntry(newPt);
+ if (insertIndex >= 0)
+ {
+ // we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc)
+ manifoldPtr->replaceContactPoint(newPt,insertIndex);
+ return true;
+
+ } else
+ {
+
+ /*
+ ///@todo: SPU callbacks, either immediate (local on the SPU), or deferred
+ //User can override friction and/or restitution
+ if (gContactAddedCallback &&
+ //and if either of the two bodies requires custom material
+ ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) ||
+ (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback)))
+ {
+ //experimental feature info, for per-triangle material etc.
+ (*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
+ }
+ */
+ manifoldPtr->addManifoldPoint(newPt);
+ return true;
+
+ }
+ return false;
+
+}
+
+
+void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold)
+{
+ ///only write back the contact information on SPU. Other platforms avoid copying, and use the data in-place
+ ///see SpuFakeDma.cpp 'cellDmaLargeGetReadOnly'
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+ memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold));
+
+ g_manifoldDmaExport.swapBuffers();
+ ppu_address_t mmAddr = (ppu_address_t)mmManifold;
+ g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9));
+ // Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon?
+ //no, the swapBuffers does the wait
+#endif
+}
+
+void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+{
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
+ spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
+ spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ // int sman = sizeof(rage::phManifold);
+// spu_printf("sizeof_manifold = %i\n",sman);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+ btPersistentManifold* localManifold = m_spuManifold;
+
+ btVector3 normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
+ btVector3 pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
+
+ //process the contact point
+ const bool retVal = ManifoldResultAddContactPoint(normalB,
+ pointWrld,
+ depth,
+ localManifold,
+ m_rootWorldTransform0,
+ m_rootWorldTransform1,
+ m_combinedFriction,
+ m_combinedRestitution,
+ m_isSwapped);
+ m_RequiresWriteBack = m_RequiresWriteBack || retVal;
+}
+
+void SpuContactResult::flush()
+{
+
+ if (m_spuManifold && m_spuManifold->getNumContacts())
+ {
+ m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1);
+ m_RequiresWriteBack = true;
+ }
+
+
+ if (m_RequiresWriteBack)
+ {
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n");
+ spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts());
+ spu_printf("Manifold address: %llu\n", m_manifoldAddress);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+ // spu_printf("writeDoubleBufferedManifold\n");
+ writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress);
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ spu_printf("SPU: Finished (Put) DMA\n");
+#endif //DEBUG_SPU_COLLISION_DETECTION
+ }
+ m_spuManifold = NULL;
+ m_RequiresWriteBack = false;
+}
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
new file mode 100644
index 00000000000..394f56dcbd1
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
@@ -0,0 +1,106 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_CONTACT_RESULT2_H
+#define SPU_CONTACT_RESULT2_H
+
+
+#ifndef _WIN32
+#include <stdint.h>
+#endif
+
+
+
+#include "../SpuDoubleBuffer.h"
+
+
+#include "LinearMath/btTransform.h"
+
+
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+
+class btCollisionShape;
+
+
+struct SpuCollisionPairInput
+{
+ ppu_address_t m_collisionShapes[2];
+ btCollisionShape* m_spuCollisionShapes[2];
+
+ ppu_address_t m_persistentManifoldPtr;
+ btVector3 m_primitiveDimensions0;
+ btVector3 m_primitiveDimensions1;
+ int m_shapeType0;
+ int m_shapeType1;
+ float m_collisionMargin0;
+ float m_collisionMargin1;
+
+ btTransform m_worldTransform0;
+ btTransform m_worldTransform1;
+
+ bool m_isSwapped;
+ bool m_useEpa;
+};
+
+
+struct SpuClosestPointInput : public btDiscreteCollisionDetectorInterface::ClosestPointInput
+{
+ struct SpuConvexPolyhedronVertexData* m_convexVertexData[2];
+};
+
+///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed
+///So when an existing contact point is duplicated, no transfer/refresh is performed.
+class SpuContactResult : public btDiscreteCollisionDetectorInterface::Result
+{
+ btTransform m_rootWorldTransform0;
+ btTransform m_rootWorldTransform1;
+ ppu_address_t m_manifoldAddress;
+
+ btPersistentManifold* m_spuManifold;
+ bool m_RequiresWriteBack;
+ btScalar m_combinedFriction;
+ btScalar m_combinedRestitution;
+
+ bool m_isSwapped;
+
+ DoubleBuffer<btPersistentManifold, 1> g_manifoldDmaExport;
+
+ public:
+ SpuContactResult();
+ virtual ~SpuContactResult();
+
+ btPersistentManifold* GetSpuManifold() const
+ {
+ return m_spuManifold;
+ }
+
+ virtual void setShapeIdentifiersA(int partId0,int index0);
+ virtual void setShapeIdentifiersB(int partId1,int index1);
+
+ void setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped);
+
+
+ void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold);
+
+ virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth);
+
+ void flush();
+};
+
+
+
+#endif //SPU_CONTACT_RESULT2_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
new file mode 100644
index 00000000000..449f19288c4
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
@@ -0,0 +1,51 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef SPU_CONVEX_PENETRATION_DEPTH_H
+#define SPU_CONVEX_PENETRATION_DEPTH_H
+
+
+
+class btStackAlloc;
+class btIDebugDraw;
+#include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h"
+
+#include "LinearMath/btTransform.h"
+
+
+///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
+class SpuConvexPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+public:
+
+ virtual ~SpuConvexPenetrationDepthSolver() {};
+ virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
+ void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
+ btTransform& transA,const btTransform& transB,
+ btVector3& v, btVector3& pa, btVector3& pb,
+ class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
+ struct SpuConvexPolyhedronVertexData* convexVertexDataA,
+ struct SpuConvexPolyhedronVertexData* convexVertexDataB
+ ) const = 0;
+
+
+};
+
+
+
+#endif //SPU_CONVEX_PENETRATION_DEPTH_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
new file mode 100644
index 00000000000..c3dfaa793e3
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
@@ -0,0 +1,1381 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuGatheringCollisionTask.h"
+
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+#include "../SpuDoubleBuffer.h"
+
+#include "../SpuCollisionTaskProcess.h"
+#include "../SpuGatheringCollisionDispatcher.h" //for SPU_BATCHSIZE_BROADPHASE_PAIRS
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "../SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "SpuContactResult.h"
+#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btConvexPointCloudShape.h"
+
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+
+#include "SpuMinkowskiPenetrationDepthSolver.h"
+//#include "SpuEpaPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+
+
+#include "boxBoxDistance.h"
+#include "BulletMultiThreaded/vectormath2bullet.h"
+#include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData
+#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+
+#ifdef __SPU__
+///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases
+#ifndef USE_LIBSPE2
+#define USE_SOFTWARE_CACHE 1
+#endif
+#endif //__SPU__
+
+int gSkippedCol = 0;
+int gProcessedCol = 0;
+
+////////////////////////////////////////////////
+/// software caching
+#if USE_SOFTWARE_CACHE
+#include <spu_intrinsics.h>
+#include <sys/spu_thread.h>
+#include <sys/spu_event.h>
+#include <stdint.h>
+#define SPE_CACHE_NWAY 4
+//#define SPE_CACHE_NSETS 32, 16
+#define SPE_CACHE_NSETS 8
+//#define SPE_CACHELINE_SIZE 512
+#define SPE_CACHELINE_SIZE 128
+#define SPE_CACHE_SET_TAGID(set) 15
+///make sure that spe_cache.h is below those defines!
+#include "../Extras/software_cache/cache/include/spe_cache.h"
+
+
+int g_CacheMisses=0;
+int g_CacheHits=0;
+
+#if 0 // Added to allow cache misses and hits to be tracked, change this to 1 to restore unmodified version
+#define spe_cache_read(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1)
+#else
+#define spe_cache_read(ea) \
+({ \
+ int set, idx, line, byte; \
+ _spe_cache_nway_lookup_(ea, set, idx); \
+ \
+ if (btUnlikely(idx < 0)) { \
+ ++g_CacheMisses; \
+ idx = _spe_cache_miss_(ea, set, -1); \
+ spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
+ spu_mfcstat(MFC_TAG_UPDATE_ALL); \
+ } \
+ else \
+ { \
+ ++g_CacheHits; \
+ } \
+ line = _spe_cacheline_num_(set, idx); \
+ byte = _spe_cacheline_byte_offset_(ea); \
+ (void *) &spe_cache_mem[line + byte]; \
+})
+
+#endif
+
+#endif // USE_SOFTWARE_CACHE
+
+bool gUseEpa = false;
+
+#ifdef USE_SN_TUNER
+#include <LibSN_SPU.h>
+#endif //USE_SN_TUNER
+
+#if defined (__SPU__) && !defined (USE_LIBSPE2)
+#include <spu_printf.h>
+#elif defined (USE_LIBSPE2)
+#define spu_printf(a)
+#else
+#define IGNORE_ALIGNMENT 1
+#include <stdio.h>
+#include <stdlib.h>
+#define spu_printf printf
+
+#endif
+
+//int gNumConvexPoints0=0;
+
+///Make sure no destructors are called on this memory
+struct CollisionTask_LocalStoreMemory
+{
+ ///This CollisionTask_LocalStoreMemory is mainly used for the SPU version, using explicit DMA
+ ///Other platforms can use other memory programming models.
+
+ ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairsBuffer[SPU_BATCHSIZE_BROADPHASE_PAIRS]);
+ DoubleBuffer<unsigned char, MIDPHASE_WORKUNIT_PAGE_SIZE> g_workUnitTaskBuffers;
+ ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgoBuffer [sizeof(SpuContactManifoldCollisionAlgorithm)+16]);
+ ATTRIBUTE_ALIGNED16(char gColObj0Buffer [sizeof(btCollisionObject)+16]);
+ ATTRIBUTE_ALIGNED16(char gColObj1Buffer [sizeof(btCollisionObject)+16]);
+ ///we reserve 32bit integer indices, even though they might be 16bit
+ ATTRIBUTE_ALIGNED16(int spuIndices[16]);
+ btPersistentManifold gPersistentManifoldBuffer;
+ CollisionShape_LocalStoreMemory gCollisionShapes[2];
+ bvhMeshShape_LocalStoreMemory bvhShapeData;
+ SpuConvexPolyhedronVertexData convexVertexData[2];
+ CompoundShape_LocalStoreMemory compoundShapeData[2];
+
+ ///The following pointers might either point into this local store memory, or to the original/other memory locations.
+ ///See SpuFakeDma for implementation of cellDmaSmallGetReadOnly.
+ btCollisionObject* m_lsColObj0Ptr;
+ btCollisionObject* m_lsColObj1Ptr;
+ btBroadphasePair* m_pairsPointer;
+ btPersistentManifold* m_lsManifoldPtr;
+ SpuContactManifoldCollisionAlgorithm* m_lsCollisionAlgorithmPtr;
+
+ bool needsDmaPutContactManifoldAlgo;
+
+ btCollisionObject* getColObj0()
+ {
+ return m_lsColObj0Ptr;
+ }
+ btCollisionObject* getColObj1()
+ {
+ return m_lsColObj1Ptr;
+ }
+
+
+ btBroadphasePair* getBroadphasePairPtr()
+ {
+ return m_pairsPointer;
+ }
+
+ SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm()
+ {
+ return m_lsCollisionAlgorithmPtr;
+ }
+
+ btPersistentManifold* getContactManifoldPtr()
+ {
+ return m_lsManifoldPtr;
+ }
+};
+
+
+#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory gLocalStoreMemory);
+
+void* createCollisionLocalStoreMemory()
+{
+ return &gLocalStoreMemory;
+}
+#else
+void* createCollisionLocalStoreMemory()
+{
+ return new CollisionTask_LocalStoreMemory;
+}
+
+#endif
+
+void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts);
+
+
+SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
+{
+#if USE_SOFTWARE_CACHE
+ // Check for alignment requirements. We need to make sure the entire request fits within one cache line,
+ // so the first and last bytes should fall on the same cache line
+ btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
+
+ void* ls = spe_cache_read(ea);
+ memcpy(buffer, ls, size);
+#else
+ stallingUnalignedDmaSmallGet(buffer,ea,size);
+#endif
+}
+
+SIMD_FORCE_INLINE void small_cache_read_triple( void* ls0, ppu_address_t ea0,
+ void* ls1, ppu_address_t ea1,
+ void* ls2, ppu_address_t ea2,
+ size_t size)
+{
+ btAssert(size<16);
+ ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]);
+ ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]);
+ ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]);
+
+ uint32_t i;
+
+
+ ///make sure last 4 bits are the same, for cellDmaSmallGet
+ char* localStore0 = (char*)ls0;
+ uint32_t last4BitsOffset = ea0 & 0x0f;
+ char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
+#ifdef __SPU__
+ cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
+#else
+ tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
+#endif
+
+
+ char* localStore1 = (char*)ls1;
+ last4BitsOffset = ea1 & 0x0f;
+ char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
+#ifdef __SPU__
+ cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
+#else
+ tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
+#endif
+
+ char* localStore2 = (char*)ls2;
+ last4BitsOffset = ea2 & 0x0f;
+ char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
+#ifdef __SPU__
+ cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
+#else
+ tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
+#endif
+
+
+ cellDmaWaitTagStatusAll( DMA_MASK(1) );
+
+ //this is slowish, perhaps memcpy on SPU is smarter?
+ for (i=0; btLikely( i<size );i++)
+ {
+ localStore0[i] = tmpTarget0[i];
+ localStore1[i] = tmpTarget1[i];
+ localStore2[i] = tmpTarget2[i];
+ }
+
+
+}
+
+
+
+
+class spuNodeCallback : public btNodeOverlapCallback
+{
+ SpuCollisionPairInput* m_wuInput;
+ SpuContactResult& m_spuContacts;
+ CollisionTask_LocalStoreMemory* m_lsMemPtr;
+ ATTRIBUTE_ALIGNED16(btTriangleShape) m_tmpTriangleShape;
+
+ ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
+ ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
+
+
+
+public:
+ spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts)
+ : m_wuInput(wuInput),
+ m_spuContacts(spuContacts),
+ m_lsMemPtr(lsMemPtr)
+ {
+ }
+
+ virtual void processNode(int subPart, int triangleIndex)
+ {
+ ///Create a triangle on the stack, call process collision, with GJK
+ ///DMA the vertices, can benefit from software caching
+
+ // spu_printf("processNode with triangleIndex %d\n",triangleIndex);
+
+ if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
+ {
+ unsigned short int* indexBasePtr = (unsigned short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
+ ATTRIBUTE_ALIGNED16(unsigned short int tmpIndices[3]);
+
+ small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
+ &tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
+ &tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
+ sizeof(unsigned short int));
+
+ m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
+ m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
+ m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
+ } else
+ {
+ unsigned int* indexBasePtr = (unsigned int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
+
+ small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
+ &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
+ &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
+ sizeof(int));
+ }
+
+ // spu_printf("SPU index0=%d ,",spuIndices[0]);
+ // spu_printf("SPU index1=%d ,",spuIndices[1]);
+ // spu_printf("SPU index2=%d ,",spuIndices[2]);
+ // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
+
+ const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
+ for (int j=2;btLikely( j>=0 );j--)
+ {
+ int graphicsindex = m_lsMemPtr->spuIndices[j];
+
+ // spu_printf("SPU index=%d ,",graphicsindex);
+ btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
+ // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
+
+
+ ///handle un-aligned vertices...
+
+ //another DMA for each vertex
+ small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
+ &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
+ &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
+ sizeof(btScalar));
+
+ m_tmpTriangleShape.getVertexPtr(j).setValue(spuUnscaledVertex[0]*meshScaling.getX(),
+ spuUnscaledVertex[1]*meshScaling.getY(),
+ spuUnscaledVertex[2]*meshScaling.getZ());
+
+ // spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
+ }
+
+
+ SpuCollisionPairInput triangleConcaveInput(*m_wuInput);
+// triangleConcaveInput.m_spuCollisionShapes[1] = &spuTriangleVertices[0];
+ triangleConcaveInput.m_spuCollisionShapes[1] = &m_tmpTriangleShape;
+ triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE;
+
+ m_spuContacts.setShapeIdentifiersB(subPart,triangleIndex);
+
+ // m_spuContacts.flush();
+
+ ProcessSpuConvexConvexCollision(&triangleConcaveInput, m_lsMemPtr,m_spuContacts);
+ ///this flush should be automatic
+ // m_spuContacts.flush();
+ }
+
+};
+
+
+
+void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts)
+{
+
+ btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0];
+ btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1];
+
+ bool hasCollision = false;
+ const btVector3& planeNormal = planeShape->getPlaneNormal();
+ const btScalar& planeConstant = planeShape->getPlaneConstant();
+
+
+ btTransform convexWorldTransform = wuInput->m_worldTransform0;
+ btTransform convexInPlaneTrans;
+ convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform;
+ btTransform planeInConvex;
+ planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1;
+
+ //btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal);
+ btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal);
+
+ btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+ btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+ btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+ btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected;
+
+ hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold();
+ //resultOut->setPersistentManifold(m_manifoldPtr);
+ if (hasCollision)
+ {
+ /// report a contact. internally this will be kept persistent, and contact reduction is done
+ btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal;
+ btVector3 pOnB = vtxInPlaneWorld;
+ spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance);
+ }
+}
+
+void ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+
+ register int dmaSize = 0;
+ register ppu_address_t dmaPpuAddress2;
+ btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
+
+ ///DMA in the vertices for convex shapes
+ ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+ ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
+
+ if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ // spu_printf("SPU: DMA btConvexHullShape\n");
+
+ dmaSize = sizeof(btConvexHullShape);
+ dmaPpuAddress2 = wuInput->m_collisionShapes[0];
+
+ cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ // spu_printf("SPU: DMA btConvexHullShape\n");
+ dmaSize = sizeof(btConvexHullShape);
+ dmaPpuAddress2 = wuInput->m_collisionShapes[1];
+ cellDmaGet(&convexHullShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+
+ if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
+ lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
+ }
+
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
+ lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
+ }
+
+
+ btConvexPointCloudShape cpc0,cpc1;
+
+ if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(2));
+ lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
+ btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
+ const btVector3& localScaling = ch->getLocalScalingNV();
+ cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
+ wuInput->m_spuCollisionShapes[0] = &cpc0;
+ }
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(2));
+ lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
+ btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
+ const btVector3& localScaling = ch->getLocalScalingNV();
+ cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
+ wuInput->m_spuCollisionShapes[1] = &cpc1;
+
+ }
+
+
+// const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
+// const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
+// int shapeType0 = wuInput->m_shapeType0;
+// int shapeType1 = wuInput->m_shapeType1;
+ float marginA = wuInput->m_collisionMargin0;
+ float marginB = wuInput->m_collisionMargin1;
+
+ SpuClosestPointInput cpInput;
+ cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
+ cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
+ cpInput.m_transformA = wuInput->m_worldTransform0;
+ cpInput.m_transformB = wuInput->m_worldTransform1;
+ float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
+ cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
+
+ ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+ btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
+ //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
+ spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
+ lsMemPtr->getColObj1()->getWorldTransform(),
+ lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
+ lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
+ wuInput->m_isSwapped);
+
+
+ btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts);
+
+
+
+
+}
+
+
+
+
+////////////////////////
+/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc)
+///////////////////
+void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+ //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
+
+ btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1];
+ //need the mesh interface, for access to triangle vertices
+ dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape);
+
+ btVector3 aabbMin(-1,-400,-1);
+ btVector3 aabbMax(1,400,1);
+
+
+ //recalc aabbs
+ btTransform convexInTriangleSpace;
+ convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0;
+ btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0];
+
+ computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace);
+
+
+ //CollisionShape* triangleShape = static_cast<btCollisionShape*>(triBody->m_collisionShape);
+ //convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax);
+
+ // btScalar extraMargin = collisionMarginTriangle;
+ // btVector3 extra(extraMargin,extraMargin,extraMargin);
+ // aabbMax += extra;
+ // aabbMin -= extra;
+
+ ///quantize query AABB
+ unsigned short int quantizedQueryAabbMin[3];
+ unsigned short int quantizedQueryAabbMax[3];
+ lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0);
+ lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1);
+
+ QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
+ //spu_printf("SPU: numNodes = %d\n",nodeArray.size());
+
+ BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
+
+
+ spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts);
+ IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
+ //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
+
+ // spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
+ //not likely to happen
+ if (subTrees.size() && indexArray.size() == 1)
+ {
+ ///DMA in the index info
+ dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ //display the headers
+ int numBatch = subTrees.size();
+ for (int i=0;i<numBatch;)
+ {
+ //@todo- can reorder DMA transfers for less stall
+ int remaining = subTrees.size() - i;
+ int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
+
+ dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+ // spu_printf("nextBatch = %d\n",nextBatch);
+
+ for (int j=0;j<nextBatch;j++)
+ {
+ const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
+
+ unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
+ if (overlap)
+ {
+ btAssert(subtree.m_subtreeSize);
+
+ //dma the actual nodes of this subtree
+ dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
+ cellDmaWaitTagStatusAll(DMA_MASK(2));
+
+ /* Walk this subtree */
+ spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
+ &lsMemPtr->bvhShapeData.gSubtreeNodes[0],
+ 0,
+ subtree.m_subtreeSize);
+ }
+ // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
+ }
+
+ // unsigned short int m_quantizedAabbMin[3];
+ // unsigned short int m_quantizedAabbMax[3];
+ // int m_rootNodeIndex;
+ // int m_subtreeSize;
+ i+=nextBatch;
+ }
+
+ //pre-fetch first tree, then loop and double buffer
+ }
+
+}
+
+
+int stats[11]={0,0,0,0,0,0,0,0,0,0,0};
+int degenerateStats[11]={0,0,0,0,0,0,0,0,0,0,0};
+
+
+////////////////////////
+/// Convex versus Convex collision detection (handles collision between sphere, box, cylinder, triangle, cone, convex polyhedron etc)
+///////////////////
+void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+ register int dmaSize;
+ register ppu_address_t dmaPpuAddress2;
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ //spu_printf("SPU: ProcessSpuConvexConvexCollision\n");
+#endif //DEBUG_SPU_COLLISION_DETECTION
+ //CollisionShape* shape0 = (CollisionShape*)wuInput->m_collisionShapes[0];
+ //CollisionShape* shape1 = (CollisionShape*)wuInput->m_collisionShapes[1];
+ btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
+
+ bool genericGjk = true;
+
+ if (genericGjk)
+ {
+ //try generic GJK
+
+
+
+ //SpuConvexPenetrationDepthSolver* penetrationSolver=0;
+ btVoronoiSimplexSolver simplexSolver;
+ btGjkEpaPenetrationDepthSolver epaPenetrationSolver2;
+
+ btConvexPenetrationDepthSolver* penetrationSolver = &epaPenetrationSolver2;
+
+ //SpuMinkowskiPenetrationDepthSolver minkowskiPenetrationSolver;
+#ifdef ENABLE_EPA
+ if (gUseEpa)
+ {
+ penetrationSolver = &epaPenetrationSolver2;
+ } else
+#endif
+ {
+ //penetrationSolver = &minkowskiPenetrationSolver;
+ }
+
+
+ ///DMA in the vertices for convex shapes
+ ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+ ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
+
+ if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ // spu_printf("SPU: DMA btConvexHullShape\n");
+
+ dmaSize = sizeof(btConvexHullShape);
+ dmaPpuAddress2 = wuInput->m_collisionShapes[0];
+
+ cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ // spu_printf("SPU: DMA btConvexHullShape\n");
+ dmaSize = sizeof(btConvexHullShape);
+ dmaPpuAddress2 = wuInput->m_collisionShapes[1];
+ cellDmaGet(&convexHullShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ //cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+
+ if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
+ lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
+ }
+
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
+ lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
+ }
+
+
+ btConvexPointCloudShape cpc0,cpc1;
+
+ if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(2));
+ lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
+ btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
+ const btVector3& localScaling = ch->getLocalScalingNV();
+ cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
+ wuInput->m_spuCollisionShapes[0] = &cpc0;
+ }
+
+ if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+ {
+ cellDmaWaitTagStatusAll(DMA_MASK(2));
+ lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
+ btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
+ const btVector3& localScaling = ch->getLocalScalingNV();
+ cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
+ wuInput->m_spuCollisionShapes[1] = &cpc1;
+
+ }
+
+
+ const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
+ const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
+ int shapeType0 = wuInput->m_shapeType0;
+ int shapeType1 = wuInput->m_shapeType1;
+ float marginA = wuInput->m_collisionMargin0;
+ float marginB = wuInput->m_collisionMargin1;
+
+ SpuClosestPointInput cpInput;
+ cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
+ cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
+ cpInput.m_transformA = wuInput->m_worldTransform0;
+ cpInput.m_transformB = wuInput->m_worldTransform1;
+ float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
+ cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
+
+ ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+ btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
+ //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
+ spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
+ lsMemPtr->getColObj1()->getWorldTransform(),
+ lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
+ lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
+ wuInput->m_isSwapped);
+
+ {
+ btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver);
+ gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw);
+
+ stats[gjk.m_lastUsedMethod]++;
+ degenerateStats[gjk.m_degenerateSimplex]++;
+
+#ifdef USE_SEPDISTANCE_UTIL
+ btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold();
+ lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1);
+ lsMemPtr->needsDmaPutContactManifoldAlgo = true;
+#endif //USE_SEPDISTANCE_UTIL
+
+ }
+
+ }
+
+
+}
+
+
+template<typename T> void DoSwap(T& a, T& b)
+{
+ char tmp[sizeof(T)];
+ memcpy(tmp, &a, sizeof(T));
+ memcpy(&a, &b, sizeof(T));
+ memcpy(&b, tmp, sizeof(T));
+}
+
+SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem)
+{
+ register int dmaSize;
+ register ppu_address_t dmaPpuAddress2;
+
+ dmaSize = sizeof(btCollisionObject);//btTransform);
+ dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0();
+ lsMem.m_lsColObj0Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj0Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+
+ dmaSize = sizeof(btCollisionObject);//btTransform);
+ dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1();
+ lsMem.m_lsColObj1Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj1Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
+
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+ btCollisionObject* ob0 = lsMem.getColObj0();
+ btCollisionObject* ob1 = lsMem.getColObj1();
+
+ collisionPairInput.m_worldTransform0 = ob0->getWorldTransform();
+ collisionPairInput.m_worldTransform1 = ob1->getWorldTransform();
+}
+
+
+
+void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem,
+ SpuContactResult &spuContacts,
+ ppu_address_t collisionShape0Ptr, void* collisionShape0Loc,
+ ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true)
+{
+
+ if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)
+ && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
+ {
+ if (dmaShapes)
+ {
+ dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+ dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+ }
+
+ btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+ btConvexInternalShape* spuConvexShape1 = (btConvexInternalShape*)collisionShape1Loc;
+
+ btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+ btVector3 dim1 = spuConvexShape1->getImplicitShapeDimensions();
+
+ collisionPairInput.m_primitiveDimensions0 = dim0;
+ collisionPairInput.m_primitiveDimensions1 = dim1;
+ collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+ collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+ collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+ collisionPairInput.m_spuCollisionShapes[1] = spuConvexShape1;
+ ProcessSpuConvexConvexCollision(&collisionPairInput,&lsMem,spuContacts);
+ }
+ else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) &&
+ btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1))
+ {
+ //snPause();
+
+ dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+ dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+ // Both are compounds, do N^2 CD for now
+ ///@todo: add some AABB-based pruning (probably not -> slower)
+
+ btCompoundShape* spuCompoundShape0 = (btCompoundShape*)collisionShape0Loc;
+ btCompoundShape* spuCompoundShape1 = (btCompoundShape*)collisionShape1Loc;
+
+ dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
+ dmaCompoundShapeInfo (&lsMem.compoundShapeData[1], spuCompoundShape1, 2);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+
+ dmaCompoundSubShapes (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ dmaCompoundSubShapes (&lsMem.compoundShapeData[1], spuCompoundShape1, 1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ int childShapeCount0 = spuCompoundShape0->getNumChildShapes();
+ int childShapeCount1 = spuCompoundShape1->getNumChildShapes();
+
+ // Start the N^2
+ for (int i = 0; i < childShapeCount0; ++i)
+ {
+ btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i];
+ btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType));
+
+ for (int j = 0; j < childShapeCount1; ++j)
+ {
+ btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j];
+ btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType));
+
+
+ /* Create a new collision pair input struct using the two child shapes */
+ SpuCollisionPairInput cinput (collisionPairInput);
+
+ cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform;
+ cinput.m_shapeType0 = childShape0.m_childShapeType;
+ cinput.m_collisionMargin0 = childShape0.m_childMargin;
+
+ cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform;
+ cinput.m_shapeType1 = childShape1.m_childShapeType;
+ cinput.m_collisionMargin1 = childShape1.m_childMargin;
+ /* Recursively call handleCollisionPair () with new collision pair input */
+
+ handleCollisionPair(cinput, lsMem, spuContacts,
+ (ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i],
+ (ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false);
+ }
+ }
+ }
+ else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) )
+ {
+ //snPause();
+
+ dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+ dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+ // object 0 compound, object 1 non-compound
+ btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape0Loc;
+ dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ int childShapeCount = spuCompoundShape->getNumChildShapes();
+
+ for (int i = 0; i < childShapeCount; ++i)
+ {
+ btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
+ btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
+ // Dma the child shape
+ dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ SpuCollisionPairInput cinput (collisionPairInput);
+ cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform;
+ cinput.m_shapeType0 = childShape.m_childShapeType;
+ cinput.m_collisionMargin0 = childShape.m_childMargin;
+
+ handleCollisionPair(cinput, lsMem, spuContacts,
+ (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i],
+ collisionShape1Ptr, collisionShape1Loc, false);
+ }
+ }
+ else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1) )
+ {
+ //snPause();
+
+ dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+ dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+ // object 0 non-compound, object 1 compound
+ btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape1Loc;
+ dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ int childShapeCount = spuCompoundShape->getNumChildShapes();
+
+ for (int i = 0; i < childShapeCount; ++i)
+ {
+ btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
+ btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
+ // Dma the child shape
+ dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ SpuCollisionPairInput cinput (collisionPairInput);
+ cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform;
+ cinput.m_shapeType1 = childShape.m_childShapeType;
+ cinput.m_collisionMargin1 = childShape.m_childMargin;
+ handleCollisionPair(cinput, lsMem, spuContacts,
+ collisionShape0Ptr, collisionShape0Loc,
+ (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], false);
+ }
+
+ }
+ else
+ {
+ //a non-convex shape is involved
+ bool handleConvexConcave = false;
+
+ //snPause();
+
+ if (btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType0) &&
+ btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
+ {
+ // Swap stuff
+ DoSwap(collisionShape0Ptr, collisionShape1Ptr);
+ DoSwap(collisionShape0Loc, collisionShape1Loc);
+ DoSwap(collisionPairInput.m_shapeType0, collisionPairInput.m_shapeType1);
+ DoSwap(collisionPairInput.m_worldTransform0, collisionPairInput.m_worldTransform1);
+ DoSwap(collisionPairInput.m_collisionMargin0, collisionPairInput.m_collisionMargin1);
+
+ collisionPairInput.m_isSwapped = true;
+ }
+
+ if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)&&
+ btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType1))
+ {
+ handleConvexConcave = true;
+ }
+ if (handleConvexConcave)
+ {
+ if (dmaShapes)
+ {
+ dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+ dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+ cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+ }
+
+ if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE)
+ {
+ btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+ btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc;
+
+ btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+ collisionPairInput.m_primitiveDimensions0 = dim0;
+ collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+ collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+ collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+ collisionPairInput.m_spuCollisionShapes[1] = planeShape;
+
+ ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+ } else
+ {
+ btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+ btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
+
+ btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+ collisionPairInput.m_primitiveDimensions0 = dim0;
+ collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+ collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+ collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+ collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
+
+ ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+ }
+ }
+
+ }
+
+ spuContacts.flush();
+
+}
+
+
+void processCollisionTask(void* userPtr, void* lsMemPtr)
+{
+
+ SpuGatherAndProcessPairsTaskDesc* taskDescPtr = (SpuGatherAndProcessPairsTaskDesc*)userPtr;
+ SpuGatherAndProcessPairsTaskDesc& taskDesc = *taskDescPtr;
+ CollisionTask_LocalStoreMemory* colMemPtr = (CollisionTask_LocalStoreMemory*)lsMemPtr;
+ CollisionTask_LocalStoreMemory& lsMem = *(colMemPtr);
+
+ gUseEpa = taskDesc.m_useEpa;
+
+ // spu_printf("taskDescPtr=%llx\n",taskDescPtr);
+
+ SpuContactResult spuContacts;
+
+ ////////////////////
+
+ ppu_address_t dmaInPtr = taskDesc.m_inPairPtr;
+ unsigned int numPages = taskDesc.numPages;
+ unsigned int numOnLastPage = taskDesc.numOnLastPage;
+
+ // prefetch first set of inputs and wait
+ lsMem.g_workUnitTaskBuffers.init();
+
+ unsigned int nextNumOnPage = (numPages > 1)? MIDPHASE_NUM_WORKUNITS_PER_PAGE : numOnLastPage;
+ lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
+ dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
+
+
+ register unsigned char *inputPtr;
+ register unsigned int numOnPage;
+ register unsigned int j;
+ SpuGatherAndProcessWorkUnitInput* wuInputs;
+ register int dmaSize;
+ register ppu_address_t dmaPpuAddress;
+ register ppu_address_t dmaPpuAddress2;
+
+ int numPairs;
+ register int p;
+ SpuCollisionPairInput collisionPairInput;
+
+ for (unsigned int i = 0; btLikely(i < numPages); i++)
+ {
+
+ // wait for back buffer dma and swap buffers
+ inputPtr = lsMem.g_workUnitTaskBuffers.swapBuffers();
+
+ // number on current page is number prefetched last iteration
+ numOnPage = nextNumOnPage;
+
+
+ // prefetch next set of inputs
+#if MIDPHASE_NUM_WORKUNIT_PAGES > 2
+ if ( btLikely( i < numPages-1 ) )
+#else
+ if ( btUnlikely( i < numPages-1 ) )
+#endif
+ {
+ nextNumOnPage = (i == numPages-2)? numOnLastPage : MIDPHASE_NUM_WORKUNITS_PER_PAGE;
+ lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
+ dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
+ }
+
+ wuInputs = reinterpret_cast<SpuGatherAndProcessWorkUnitInput *>(inputPtr);
+
+
+ for (j = 0; btLikely( j < numOnPage ); j++)
+ {
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ // printMidphaseInput(&wuInputs[j]);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+
+ numPairs = wuInputs[j].m_endIndex - wuInputs[j].m_startIndex;
+
+ if ( btLikely( numPairs ) )
+ {
+ dmaSize = numPairs*sizeof(btBroadphasePair);
+ dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair);
+ lsMem.m_pairsPointer = (btBroadphasePair*)cellDmaGetReadOnly(&lsMem.gBroadphasePairsBuffer, dmaPpuAddress , dmaSize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+ for (p=0;p<numPairs;p++)
+ {
+
+ //for each broadphase pair, do something
+
+ btBroadphasePair& pair = lsMem.getBroadphasePairPtr()[p];
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ spu_printf("pair->m_userInfo = %d\n",pair.m_userInfo);
+ spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm);
+ spu_printf("pair->m_pProxy0 = %d\n",pair.m_pProxy0);
+ spu_printf("pair->m_pProxy1 = %d\n",pair.m_pProxy1);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+ if (pair.m_internalTmpValue == 2 && pair.m_algorithm && pair.m_pProxy0 && pair.m_pProxy1)
+ {
+ dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
+ dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
+ lsMem.m_lsCollisionAlgorithmPtr = (SpuContactManifoldCollisionAlgorithm*)cellDmaGetReadOnly(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ lsMem.needsDmaPutContactManifoldAlgo = false;
+
+ collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr();
+ collisionPairInput.m_isSwapped = false;
+
+ if (1)
+ {
+
+ ///can wait on the combined DMA_MASK, or dma on the same tag
+
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ // spu_printf("SPU collisionPairInput->m_shapeType0 = %d\n",collisionPairInput->m_shapeType0);
+ // spu_printf("SPU collisionPairInput->m_shapeType1 = %d\n",collisionPairInput->m_shapeType1);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+
+ dmaSize = sizeof(btPersistentManifold);
+
+ dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr;
+ lsMem.m_lsManifoldPtr = (btPersistentManifold*)cellDmaGetReadOnly(&lsMem.gPersistentManifoldBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+
+ collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0();
+ collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1();
+ collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+ collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+
+
+
+ //??cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+ if (1)
+ {
+ //snPause();
+
+ // Get the collision objects
+ dmaAndSetupCollisionObjects(collisionPairInput, lsMem);
+
+ if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive())
+ {
+
+ lsMem.needsDmaPutContactManifoldAlgo = true;
+#ifdef USE_SEPDISTANCE_UTIL
+ lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
+#endif //USE_SEPDISTANCE_UTIL
+
+#define USE_DEDICATED_BOX_BOX 1
+#ifdef USE_DEDICATED_BOX_BOX
+ bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&&
+ (lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE));
+ if (boxbox)
+ {
+ //spu_printf("boxbox dist = %f\n",distance);
+ btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
+ btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
+ ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+ spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
+ lsMem.getColObj1()->getWorldTransform(),
+ lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
+ lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
+ collisionPairInput.m_isSwapped);
+
+
+ //float distance=0.f;
+ btVector3 normalInB;
+
+
+ if (//!gUseEpa &&
+#ifdef USE_SEPDISTANCE_UTIL
+ lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
+#else
+ 1
+#endif
+ )
+ {
+//#define USE_PE_BOX_BOX 1
+#ifdef USE_PE_BOX_BOX
+ {
+
+ //getCollisionMargin0
+ btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+ btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+ btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
+ btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
+
+ Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
+ Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
+ Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
+ Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
+ Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
+
+ Transform3 transformA(vmMatrix0,vmPos0);
+ Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
+ Transform3 transformB(vmMatrix1,vmPos1);
+ BoxPoint resultClosestBoxPointA;
+ BoxPoint resultClosestBoxPointB;
+ Vector3 resultNormal;
+#ifdef USE_SEPDISTANCE_UTIL
+ float distanceThreshold = FLT_MAX
+#else
+ float distanceThreshold = 0.f;
+#endif
+
+
+ distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold);
+
+ normalInB = -getBtVector3(resultNormal);
+
+ if(distance < spuManifold->getContactBreakingThreshold())
+ {
+ btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
+
+ spuContacts.addContactPoint(
+ normalInB,
+ pointOnB,
+ distance);
+ }
+ }
+#else
+ {
+
+ btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+ btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+ btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
+ btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
+
+
+ btBoxShape box0(shapeDim0);
+ btBoxShape box1(shapeDim1);
+
+ struct SpuBridgeContactCollector : public btDiscreteCollisionDetectorInterface::Result
+ {
+ SpuContactResult& m_spuContacts;
+
+ virtual void setShapeIdentifiersA(int partId0,int index0)
+ {
+ m_spuContacts.setShapeIdentifiersA(partId0,index0);
+ }
+ virtual void setShapeIdentifiersB(int partId1,int index1)
+ {
+ m_spuContacts.setShapeIdentifiersB(partId1,index1);
+ }
+ virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+ {
+ m_spuContacts.addContactPoint(normalOnBInWorld,pointInWorld,depth);
+ }
+
+ SpuBridgeContactCollector(SpuContactResult& spuContacts)
+ :m_spuContacts(spuContacts)
+ {
+
+ }
+ };
+
+ SpuBridgeContactCollector bridgeOutput(spuContacts);
+
+ btDiscreteCollisionDetectorInterface::ClosestPointInput input;
+ input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
+ input.m_transformA = collisionPairInput.m_worldTransform0;
+ input.m_transformB = collisionPairInput.m_worldTransform1;
+
+ btBoxBoxDetector detector(&box0,&box1);
+
+ detector.getClosestPoints(input,bridgeOutput,0);
+
+ }
+#endif //USE_PE_BOX_BOX
+
+ lsMem.needsDmaPutContactManifoldAlgo = true;
+#ifdef USE_SEPDISTANCE_UTIL
+ btScalar sepDist2 = distance+spuManifold->getContactBreakingThreshold();
+ lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist2,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
+#endif //USE_SEPDISTANCE_UTIL
+ gProcessedCol++;
+ } else
+ {
+ gSkippedCol++;
+ }
+
+ spuContacts.flush();
+
+
+ } else
+#endif //USE_DEDICATED_BOX_BOX
+ {
+ if (
+#ifdef USE_SEPDISTANCE_UTIL
+ lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
+#else
+ 1
+#endif //USE_SEPDISTANCE_UTIL
+ )
+ {
+ handleCollisionPair(collisionPairInput, lsMem, spuContacts,
+ (ppu_address_t)lsMem.getColObj0()->getRootCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
+ (ppu_address_t)lsMem.getColObj1()->getRootCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
+ } else
+ {
+ //spu_printf("boxbox dist = %f\n",distance);
+ btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
+ btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
+ ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+ spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
+ lsMem.getColObj1()->getWorldTransform(),
+ lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
+ lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
+ collisionPairInput.m_isSwapped);
+
+ spuContacts.flush();
+ }
+ }
+
+ }
+
+ }
+ }
+
+#ifdef USE_SEPDISTANCE_UTIL
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+ if (lsMem.needsDmaPutContactManifoldAlgo)
+ {
+ dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
+ dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
+ cellDmaLargePut(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+#endif
+#endif //#ifdef USE_SEPDISTANCE_UTIL
+
+ }
+ }
+ }
+ } //end for (j = 0; j < numOnPage; j++)
+
+ }// for
+
+
+
+ return;
+}
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
new file mode 100644
index 00000000000..bbaa555ee1b
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
@@ -0,0 +1,140 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_GATHERING_COLLISION_TASK_H
+#define SPU_GATHERING_COLLISION_TASK_H
+
+#include "../PlatformDefinitions.h"
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+
+
+///Task Description for SPU collision detection
+struct SpuGatherAndProcessPairsTaskDesc
+{
+ ppu_address_t m_inPairPtr;//m_pairArrayPtr;
+ //mutex variable
+ uint32_t m_someMutexVariableInMainMemory;
+
+ ppu_address_t m_dispatcher;
+
+ uint32_t numOnLastPage;
+
+ uint16_t numPages;
+ uint16_t taskId;
+ bool m_useEpa;
+
+ struct CollisionTask_LocalStoreMemory* m_lsMemory;
+}
+
+#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
+__attribute__ ((aligned (128)))
+#endif
+;
+
+
+void processCollisionTask(void* userPtr, void* lsMemory);
+
+void* createCollisionLocalStoreMemory();
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+//#define DEBUG_LIBSPE2_SPU_TASK
+
+
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+ printf("SPU: hello \n");
+
+ ATTRIBUTE_ALIGNED128(btSpuStatus status);
+ ATTRIBUTE_ALIGNED16( SpuGatherAndProcessPairsTaskDesc taskDesc ) ;
+ unsigned int received_message = Spu_Mailbox_Event_Nothing;
+ bool shutdown = false;
+
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ status.m_status = Spu_Status_Free;
+ status.m_lsMemory.p = createCollisionLocalStoreMemory();
+
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ while ( btLikely( !shutdown ) )
+ {
+
+ received_message = spu_read_in_mbox();
+
+ if( btLikely( received_message == Spu_Mailbox_Event_Task ))
+ {
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+ printf("SPU: received Spu_Mailbox_Event_Task\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+
+ // refresh the status
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ btAssert(status.m_status==Spu_Status_Occupied);
+
+ cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuGatherAndProcessPairsTaskDesc), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+ printf("SPU:processCollisionTask\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+ processCollisionTask((void*)&taskDesc, taskDesc.m_lsMemory);
+
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+ printf("SPU:finished processCollisionTask\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+ }
+ else
+ {
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+ printf("SPU: received ShutDown\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+ if( btLikely( received_message == Spu_Mailbox_Event_Shutdown ) )
+ {
+ shutdown = true;
+ }
+ else
+ {
+ //printf("SPU - Sth. recieved\n");
+ }
+ }
+
+ // set to status free and wait for next task
+ status.m_status = Spu_Status_Free;
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ }
+
+ printf("SPU: shutdown\n");
+ return 0;
+}
+#endif // USE_LIBSPE2
+
+
+#endif //SPU_GATHERING_COLLISION_TASK_H
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
new file mode 100644
index 00000000000..8b89de03f59
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
@@ -0,0 +1,19 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
new file mode 100644
index 00000000000..9f7e64dd1b3
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
@@ -0,0 +1,348 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuMinkowskiPenetrationDepthSolver.h"
+#include "SpuContactResult.h"
+#include "SpuPreferredPenetrationDirections.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "SpuCollisionShapes.h"
+
+#define NUM_UNITSPHERE_POINTS 42
+static btVector3 sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] =
+{
+btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+};
+
+
+bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( btSimplexSolverInterface& simplexSolver,
+ const btConvexShape* convexA,const btConvexShape* convexB,
+ const btTransform& transA,const btTransform& transB,
+ btVector3& v, btVector3& pa, btVector3& pb,
+ class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc)
+{
+#if 0
+ (void)stackAlloc;
+ (void)v;
+
+
+ struct btIntermediateResult : public SpuContactResult
+ {
+
+ btIntermediateResult():m_hasResult(false)
+ {
+ }
+
+ btVector3 m_normalOnBInWorld;
+ btVector3 m_pointInWorld;
+ btScalar m_depth;
+ bool m_hasResult;
+
+ virtual void setShapeIdentifiersA(int partId0,int index0)
+ {
+ (void)partId0;
+ (void)index0;
+ }
+
+ virtual void setShapeIdentifiersB(int partId1,int index1)
+ {
+ (void)partId1;
+ (void)index1;
+ }
+ void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+ {
+ m_normalOnBInWorld = normalOnBInWorld;
+ m_pointInWorld = pointInWorld;
+ m_depth = depth;
+ m_hasResult = true;
+ }
+ };
+
+ //just take fixed number of orientation, and sample the penetration depth in that direction
+ btScalar minProj = btScalar(BT_LARGE_FLOAT);
+ btVector3 minNorm(0.f,0.f,0.f);
+ btVector3 minVertex;
+ btVector3 minA,minB;
+ btVector3 seperatingAxisInA,seperatingAxisInB;
+ btVector3 pInA,qInB,pWorld,qWorld,w;
+
+//#define USE_BATCHED_SUPPORT 1
+#ifdef USE_BATCHED_SUPPORT
+
+ btVector3 supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+ btVector3 supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+ btVector3 seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+ btVector3 seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+ int i;
+
+ int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+ for (i=0;i<numSampleDirections;i++)
+ {
+ const btVector3& norm = sPenetrationDirections[i];
+ seperatingAxisInABatch[i] = (-norm) * transA.getBasis() ;
+ seperatingAxisInBBatch[i] = norm * transB.getBasis() ;
+ }
+
+ {
+ int numPDA = convexA->getNumPreferredPenetrationDirections();
+ if (numPDA)
+ {
+ for (int i=0;i<numPDA;i++)
+ {
+ btVector3 norm;
+ convexA->getPreferredPenetrationDirection(i,norm);
+ norm = transA.getBasis() * norm;
+ sPenetrationDirections[numSampleDirections] = norm;
+ seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+ seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+ numSampleDirections++;
+ }
+ }
+ }
+
+ {
+ int numPDB = convexB->getNumPreferredPenetrationDirections();
+ if (numPDB)
+ {
+ for (int i=0;i<numPDB;i++)
+ {
+ btVector3 norm;
+ convexB->getPreferredPenetrationDirection(i,norm);
+ norm = transB.getBasis() * norm;
+ sPenetrationDirections[numSampleDirections] = norm;
+ seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+ seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+ numSampleDirections++;
+ }
+ }
+ }
+
+
+
+ convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections);
+ convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections);
+
+ for (i=0;i<numSampleDirections;i++)
+ {
+ const btVector3& norm = sPenetrationDirections[i];
+ seperatingAxisInA = seperatingAxisInABatch[i];
+ seperatingAxisInB = seperatingAxisInBBatch[i];
+
+ pInA = supportVerticesABatch[i];
+ qInB = supportVerticesBBatch[i];
+
+ pWorld = transA(pInA);
+ qWorld = transB(qInB);
+ w = qWorld - pWorld;
+ btScalar delta = norm.dot(w);
+ //find smallest delta
+ if (delta < minProj)
+ {
+ minProj = delta;
+ minNorm = norm;
+ minA = pWorld;
+ minB = qWorld;
+ }
+ }
+#else
+
+ int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+///this is necessary, otherwise the normal is not correct, and sphere will rotate forever on a sloped triangle mesh
+#define DO_PREFERRED_DIRECTIONS 1
+#ifdef DO_PREFERRED_DIRECTIONS
+ {
+ int numPDA = spuGetNumPreferredPenetrationDirections(shapeTypeA,convexA);
+ if (numPDA)
+ {
+ for (int i=0;i<numPDA;i++)
+ {
+ btVector3 norm;
+ spuGetPreferredPenetrationDirection(shapeTypeA,convexA,i,norm);
+ norm = transA.getBasis() * norm;
+ sPenetrationDirections[numSampleDirections] = norm;
+ numSampleDirections++;
+ }
+ }
+ }
+
+ {
+ int numPDB = spuGetNumPreferredPenetrationDirections(shapeTypeB,convexB);
+ if (numPDB)
+ {
+ for (int i=0;i<numPDB;i++)
+ {
+ btVector3 norm;
+ spuGetPreferredPenetrationDirection(shapeTypeB,convexB,i,norm);
+ norm = transB.getBasis() * norm;
+ sPenetrationDirections[numSampleDirections] = norm;
+ numSampleDirections++;
+ }
+ }
+ }
+#endif //DO_PREFERRED_DIRECTIONS
+
+ for (int i=0;i<numSampleDirections;i++)
+ {
+ const btVector3& norm = sPenetrationDirections[i];
+ seperatingAxisInA = (-norm)* transA.getBasis();
+ seperatingAxisInB = norm* transB.getBasis();
+
+ pInA = convexA->localGetSupportVertexWithoutMarginNonVirtual( seperatingAxisInA);//, NULL);
+ qInB = convexB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);//, NULL);
+
+ // pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
+ // qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
+
+ pWorld = transA(pInA);
+ qWorld = transB(qInB);
+ w = qWorld - pWorld;
+ btScalar delta = norm.dot(w);
+ //find smallest delta
+ if (delta < minProj)
+ {
+ minProj = delta;
+ minNorm = norm;
+ minA = pWorld;
+ minB = qWorld;
+ }
+ }
+#endif //USE_BATCHED_SUPPORT
+
+ //add the margins
+
+ minA += minNorm*marginA;
+ minB -= minNorm*marginB;
+ //no penetration
+ if (minProj < btScalar(0.))
+ return false;
+
+ minProj += (marginA + marginB) + btScalar(1.00);
+
+
+
+
+
+//#define DEBUG_DRAW 1
+#ifdef DEBUG_DRAW
+ if (debugDraw)
+ {
+ btVector3 color(0,1,0);
+ debugDraw->drawLine(minA,minB,color);
+ color = btVector3 (1,1,1);
+ btVector3 vec = minB-minA;
+ btScalar prj2 = minNorm.dot(vec);
+ debugDraw->drawLine(minA,minA+(minNorm*minProj),color);
+
+ }
+#endif //DEBUG_DRAW
+
+
+ btGjkPairDetector gjkdet(convexA,convexB,&simplexSolver,0);
+
+ btScalar offsetDist = minProj;
+ btVector3 offset = minNorm * offsetDist;
+
+
+ SpuClosestPointInput input;
+ input.m_convexVertexData[0] = convexVertexDataA;
+ input.m_convexVertexData[1] = convexVertexDataB;
+ btVector3 newOrg = transA.getOrigin() + offset;
+
+ btTransform displacedTrans = transA;
+ displacedTrans.setOrigin(newOrg);
+
+ input.m_transformA = displacedTrans;
+ input.m_transformB = transB;
+ input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);//minProj;
+
+ btIntermediateResult res;
+ gjkdet.getClosestPoints(input,res,0);
+
+ btScalar correctedMinNorm = minProj - res.m_depth;
+
+
+ //the penetration depth is over-estimated, relax it
+ btScalar penetration_relaxation= btScalar(1.);
+ minNorm*=penetration_relaxation;
+
+ if (res.m_hasResult)
+ {
+
+ pa = res.m_pointInWorld - minNorm * correctedMinNorm;
+ pb = res.m_pointInWorld;
+
+#ifdef DEBUG_DRAW
+ if (debugDraw)
+ {
+ btVector3 color(1,0,0);
+ debugDraw->drawLine(pa,pb,color);
+ }
+#endif//DEBUG_DRAW
+
+
+ } else {
+ // could not seperate shapes
+ //btAssert (false);
+ }
+ return res.m_hasResult;
+#endif
+ return false;
+}
+
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
new file mode 100644
index 00000000000..18ad223ed36
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
@@ -0,0 +1,48 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
+
+#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
+
+class btStackAlloc;
+class btIDebugDraw;
+class btVoronoiSimplexSolver;
+class btConvexShape;
+
+///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
+///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
+class SpuMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+public:
+ SpuMinkowskiPenetrationDepthSolver() {}
+ virtual ~SpuMinkowskiPenetrationDepthSolver() {};
+
+ virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
+ const btConvexShape* convexA,const btConvexShape* convexB,
+ const btTransform& transA,const btTransform& transB,
+ btVector3& v, btVector3& pa, btVector3& pb,
+ class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
+ );
+
+
+};
+
+
+#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
new file mode 100644
index 00000000000..774a0cb2eb1
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
@@ -0,0 +1,70 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
+#define _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
+
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+
+int spuGetNumPreferredPenetrationDirections(int shapeType, void* shape)
+{
+ switch (shapeType)
+ {
+ case TRIANGLE_SHAPE_PROXYTYPE:
+ {
+ return 2;
+ //spu_printf("2\n");
+ break;
+ }
+ default:
+ {
+#if __ASSERT
+ spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
+#endif // __ASSERT
+ }
+ }
+
+ return 0;
+}
+
+void spuGetPreferredPenetrationDirection(int shapeType, void* shape, int index, btVector3& penetrationVector)
+{
+
+
+ switch (shapeType)
+ {
+ case TRIANGLE_SHAPE_PROXYTYPE:
+ {
+ btVector3* vertices = (btVector3*)shape;
+ ///calcNormal
+ penetrationVector = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
+ penetrationVector.normalize();
+ if (index)
+ penetrationVector *= btScalar(-1.);
+ break;
+ }
+ default:
+ {
+
+#if __ASSERT
+ spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
+#endif // __ASSERT
+ }
+ }
+
+}
+
+#endif //_SPU_PREFERRED_PENETRATION_DIRECTIONS_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
new file mode 100644
index 00000000000..30642a39294
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
@@ -0,0 +1,1155 @@
+/*
+ Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "Box.h"
+
+static inline float sqr( float a )
+{
+ return (a * a);
+}
+
+enum BoxSepAxisType
+{
+ A_AXIS, B_AXIS, CROSS_AXIS
+};
+
+//-------------------------------------------------------------------------------------------------
+// voronoiTol: bevels Voronoi planes slightly which helps when features are parallel.
+//-------------------------------------------------------------------------------------------------
+
+static const float voronoiTol = -1.0e-5f;
+
+//-------------------------------------------------------------------------------------------------
+// separating axis tests: gaps along each axis are computed, and the axis with the maximum
+// gap is stored. cross product axes are normalized.
+//-------------------------------------------------------------------------------------------------
+
+#define AaxisTest( dim, letter, first ) \
+{ \
+ if ( first ) \
+ { \
+ maxGap = gap = gapsA.get##letter(); \
+ if ( gap > distanceThreshold ) return gap; \
+ axisType = A_AXIS; \
+ faceDimA = dim; \
+ axisA = identity.getCol##dim(); \
+ } \
+ else \
+ { \
+ gap = gapsA.get##letter(); \
+ if ( gap > distanceThreshold ) return gap; \
+ else if ( gap > maxGap ) \
+ { \
+ maxGap = gap; \
+ axisType = A_AXIS; \
+ faceDimA = dim; \
+ axisA = identity.getCol##dim(); \
+ } \
+ } \
+}
+
+
+#define BaxisTest( dim, letter ) \
+{ \
+ gap = gapsB.get##letter(); \
+ if ( gap > distanceThreshold ) return gap; \
+ else if ( gap > maxGap ) \
+ { \
+ maxGap = gap; \
+ axisType = B_AXIS; \
+ faceDimB = dim; \
+ axisB = identity.getCol##dim(); \
+ } \
+}
+
+#define CrossAxisTest( dima, dimb, letterb ) \
+{ \
+ const float lsqr_tolerance = 1.0e-30f; \
+ float lsqr; \
+ \
+ lsqr = lsqrs.getCol##dima().get##letterb(); \
+ \
+ if ( lsqr > lsqr_tolerance ) \
+ { \
+ float l_recip = 1.0f / sqrtf( lsqr ); \
+ gap = float(gapsAxB.getCol##dima().get##letterb()) * l_recip; \
+ \
+ if ( gap > distanceThreshold ) \
+ { \
+ return gap; \
+ } \
+ \
+ if ( gap > maxGap ) \
+ { \
+ maxGap = gap; \
+ axisType = CROSS_AXIS; \
+ edgeDimA = dima; \
+ edgeDimB = dimb; \
+ axisA = cross(identity.getCol##dima(),matrixAB.getCol##dimb()) * l_recip; \
+ } \
+ } \
+}
+
+//-------------------------------------------------------------------------------------------------
+// tests whether a vertex of box B and a face of box A are the closest features
+//-------------------------------------------------------------------------------------------------
+
+inline
+float
+VertexBFaceATest(
+ bool & inVoronoi,
+ float & t0,
+ float & t1,
+ const Vector3 & hA,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesB )
+{
+ // compute a corner of box B in A's coordinate system
+
+ Vector3 corner =
+ Vector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() );
+
+ // compute the parameters of the point on A, closest to this corner
+
+ t0 = corner[0];
+ t1 = corner[1];
+
+ if ( t0 > hA[0] )
+ t0 = hA[0];
+ else if ( t0 < -hA[0] )
+ t0 = -hA[0];
+ if ( t1 > hA[1] )
+ t1 = hA[1];
+ else if ( t1 < -hA[1] )
+ t1 = -hA[1];
+
+ // do the Voronoi test: already know the point on B is in the Voronoi region of the
+ // point on A, check the reverse.
+
+ Vector3 facePointB =
+ Vector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) );
+
+ inVoronoi = ( ( facePointB[0] >= voronoiTol * facePointB[2] ) &&
+ ( facePointB[1] >= voronoiTol * facePointB[0] ) &&
+ ( facePointB[2] >= voronoiTol * facePointB[1] ) );
+
+ return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
+}
+
+#define VertexBFaceA_SetNewMin() \
+{ \
+ minDistSqr = distSqr; \
+ localPointA.setX(t0); \
+ localPointA.setY(t1); \
+ localPointB.setX( scalesB.getX() ); \
+ localPointB.setY( scalesB.getY() ); \
+ featureA = F; \
+ featureB = V; \
+}
+
+void
+VertexBFaceATests(
+ bool & done,
+ float & minDistSqr,
+ Point3 & localPointA,
+ Point3 & localPointB,
+ FeatureType & featureA,
+ FeatureType & featureB,
+ const Vector3 & hA,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesB,
+ bool first )
+{
+
+ float t0, t1;
+ float distSqr;
+
+ distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsB, scalesB );
+
+ if ( first ) {
+ VertexBFaceA_SetNewMin();
+ } else {
+ if ( distSqr < minDistSqr ) {
+ VertexBFaceA_SetNewMin();
+ }
+ }
+
+ if ( done )
+ return;
+
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsB, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ VertexBFaceA_SetNewMin();
+ }
+
+ if ( done )
+ return;
+
+ signsB.setY( -signsB.getY() );
+ scalesB.setY( -scalesB.getY() );
+
+ distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsB, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ VertexBFaceA_SetNewMin();
+ }
+
+ if ( done )
+ return;
+
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsB, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ VertexBFaceA_SetNewMin();
+ }
+}
+
+//-------------------------------------------------------------------------------------------------
+// VertexAFaceBTest: tests whether a vertex of box A and a face of box B are the closest features
+//-------------------------------------------------------------------------------------------------
+
+inline
+float
+VertexAFaceBTest(
+ bool & inVoronoi,
+ float & t0,
+ float & t1,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) scalesA )
+{
+ Vector3 corner =
+ Vector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() );
+
+ t0 = corner[0];
+ t1 = corner[1];
+
+ if ( t0 > hB[0] )
+ t0 = hB[0];
+ else if ( t0 < -hB[0] )
+ t0 = -hB[0];
+ if ( t1 > hB[1] )
+ t1 = hB[1];
+ else if ( t1 < -hB[1] )
+ t1 = -hB[1];
+
+ Vector3 facePointA =
+ Vector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) );
+
+ inVoronoi = ( ( facePointA[0] >= voronoiTol * facePointA[2] ) &&
+ ( facePointA[1] >= voronoiTol * facePointA[0] ) &&
+ ( facePointA[2] >= voronoiTol * facePointA[1] ) );
+
+ return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
+}
+
+#define VertexAFaceB_SetNewMin() \
+{ \
+ minDistSqr = distSqr; \
+ localPointB.setX(t0); \
+ localPointB.setY(t1); \
+ localPointA.setX( scalesA.getX() ); \
+ localPointA.setY( scalesA.getY() ); \
+ featureA = V; \
+ featureB = F; \
+}
+
+void
+VertexAFaceBTests(
+ bool & done,
+ float & minDistSqr,
+ Point3 & localPointA,
+ Point3 & localPointB,
+ FeatureType & featureA,
+ FeatureType & featureB,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) scalesA,
+ bool first )
+{
+ float t0, t1;
+ float distSqr;
+
+ distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, scalesA );
+
+ if ( first ) {
+ VertexAFaceB_SetNewMin();
+ } else {
+ if ( distSqr < minDistSqr ) {
+ VertexAFaceB_SetNewMin();
+ }
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, scalesA );
+
+ if ( distSqr < minDistSqr ) {
+ VertexAFaceB_SetNewMin();
+ }
+
+ if ( done )
+ return;
+
+ signsA.setY( -signsA.getY() );
+ scalesA.setY( -scalesA.getY() );
+
+ distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, scalesA );
+
+ if ( distSqr < minDistSqr ) {
+ VertexAFaceB_SetNewMin();
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, scalesA );
+
+ if ( distSqr < minDistSqr ) {
+ VertexAFaceB_SetNewMin();
+ }
+}
+
+//-------------------------------------------------------------------------------------------------
+// EdgeEdgeTest:
+//
+// tests whether a pair of edges are the closest features
+//
+// note on the shorthand:
+// 'a' & 'b' refer to the edges.
+// 'c' is the dimension of the axis that points from the face center to the edge Center
+// 'd' is the dimension of the edge Direction
+// the dimension of the face normal is 2
+//-------------------------------------------------------------------------------------------------
+
+#define EdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter ) \
+{ \
+ Vector3 edgeOffsetAB; \
+ Vector3 edgeOffsetBA; \
+ \
+ edgeOffsetAB = faceOffsetAB + matrixAB.getCol##bc() * scalesB.get##bc_letter(); \
+ edgeOffsetAB.set##ac_letter( edgeOffsetAB.get##ac_letter() - scalesA.get##ac_letter() ); \
+ \
+ edgeOffsetBA = faceOffsetBA + matrixBA.getCol##ac() * scalesA.get##ac_letter(); \
+ edgeOffsetBA.set##bc_letter( edgeOffsetBA.get##bc_letter() - scalesB.get##bc_letter() ); \
+ \
+ float dirDot = matrixAB.getCol##bd().get##ad_letter(); \
+ float denom = 1.0f - dirDot*dirDot; \
+ float edgeOffsetAB_ad = edgeOffsetAB.get##ad_letter(); \
+ float edgeOffsetBA_bd = edgeOffsetBA.get##bd_letter(); \
+ \
+ if ( denom == 0.0f ) \
+ { \
+ tA = 0.0f; \
+ } \
+ else \
+ { \
+ tA = ( edgeOffsetAB_ad + edgeOffsetBA_bd * dirDot ) / denom; \
+ } \
+ \
+ if ( tA < -hA[ad] ) tA = -hA[ad]; \
+ else if ( tA > hA[ad] ) tA = hA[ad]; \
+ \
+ tB = tA * dirDot + edgeOffsetBA_bd; \
+ \
+ if ( tB < -hB[bd] ) \
+ { \
+ tB = -hB[bd]; \
+ tA = tB * dirDot + edgeOffsetAB_ad; \
+ \
+ if ( tA < -hA[ad] ) tA = -hA[ad]; \
+ else if ( tA > hA[ad] ) tA = hA[ad]; \
+ } \
+ else if ( tB > hB[bd] ) \
+ { \
+ tB = hB[bd]; \
+ tA = tB * dirDot + edgeOffsetAB_ad; \
+ \
+ if ( tA < -hA[ad] ) tA = -hA[ad]; \
+ else if ( tA > hA[ad] ) tA = hA[ad]; \
+ } \
+ \
+ Vector3 edgeOffAB = Vector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\
+ Vector3 edgeOffBA = Vector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\
+ \
+ inVoronoi = ( edgeOffAB[ac] >= voronoiTol * edgeOffAB[2] ) && \
+ ( edgeOffAB[2] >= voronoiTol * edgeOffAB[ac] ) && \
+ ( edgeOffBA[bc] >= voronoiTol * edgeOffBA[2] ) && \
+ ( edgeOffBA[2] >= voronoiTol * edgeOffBA[bc] ); \
+ \
+ edgeOffAB[ad] -= tA; \
+ edgeOffBA[bd] -= tB; \
+ \
+ return dot(edgeOffAB,edgeOffAB); \
+}
+
+float
+EdgeEdgeTest_0101(
+ bool & inVoronoi,
+ float & tA,
+ float & tB,
+ const Vector3 & hA,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesA,
+ PE_REF(Vector3) scalesB )
+{
+ EdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y );
+}
+
+float
+EdgeEdgeTest_0110(
+ bool & inVoronoi,
+ float & tA,
+ float & tB,
+ const Vector3 & hA,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesA,
+ PE_REF(Vector3) scalesB )
+{
+ EdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X );
+}
+
+float
+EdgeEdgeTest_1001(
+ bool & inVoronoi,
+ float & tA,
+ float & tB,
+ const Vector3 & hA,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesA,
+ PE_REF(Vector3) scalesB )
+{
+ EdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y );
+}
+
+float
+EdgeEdgeTest_1010(
+ bool & inVoronoi,
+ float & tA,
+ float & tB,
+ const Vector3 & hA,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesA,
+ PE_REF(Vector3) scalesB )
+{
+ EdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X );
+}
+
+#define EdgeEdge_SetNewMin( ac_letter, ad_letter, bc_letter, bd_letter ) \
+{ \
+ minDistSqr = distSqr; \
+ localPointA.set##ac_letter(scalesA.get##ac_letter()); \
+ localPointA.set##ad_letter(tA); \
+ localPointB.set##bc_letter(scalesB.get##bc_letter()); \
+ localPointB.set##bd_letter(tB); \
+ otherFaceDimA = testOtherFaceDimA; \
+ otherFaceDimB = testOtherFaceDimB; \
+ featureA = E; \
+ featureB = E; \
+}
+
+void
+EdgeEdgeTests(
+ bool & done,
+ float & minDistSqr,
+ Point3 & localPointA,
+ Point3 & localPointB,
+ int & otherFaceDimA,
+ int & otherFaceDimB,
+ FeatureType & featureA,
+ FeatureType & featureB,
+ const Vector3 & hA,
+ const Vector3 & hB,
+ PE_REF(Vector3) faceOffsetAB,
+ PE_REF(Vector3) faceOffsetBA,
+ const Matrix3 & matrixAB,
+ const Matrix3 & matrixBA,
+ PE_REF(Vector3) signsA,
+ PE_REF(Vector3) signsB,
+ PE_REF(Vector3) scalesA,
+ PE_REF(Vector3) scalesB,
+ bool first )
+{
+
+ float distSqr;
+ float tA, tB;
+
+ int testOtherFaceDimA, testOtherFaceDimB;
+
+ testOtherFaceDimA = 0;
+ testOtherFaceDimB = 0;
+
+ distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( first ) {
+ EdgeEdge_SetNewMin( X, Y, X, Y );
+ } else {
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, X, Y );
+ }
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ testOtherFaceDimA = 1;
+ testOtherFaceDimB = 0;
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setY( -signsA.getY() );
+ scalesA.setY( -scalesA.getY() );
+
+ distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setY( -signsA.getY() );
+ scalesA.setY( -scalesA.getY() );
+
+ distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, X, Y );
+ }
+
+ if ( done )
+ return;
+
+ testOtherFaceDimA = 0;
+ testOtherFaceDimB = 1;
+ signsB.setX( -signsB.getX() );
+ scalesB.setX( -scalesB.getX() );
+
+ distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsB.setY( -signsB.getY() );
+ scalesB.setY( -scalesB.getY() );
+
+ distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setX( -signsA.getX() );
+ scalesA.setX( -scalesA.getX() );
+
+ distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( X, Y, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ testOtherFaceDimA = 1;
+ testOtherFaceDimB = 1;
+ signsB.setY( -signsB.getY() );
+ scalesB.setY( -scalesB.getY() );
+
+ distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setY( -signsA.getY() );
+ scalesA.setY( -scalesA.getY() );
+
+ distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsB.setY( -signsB.getY() );
+ scalesB.setY( -scalesB.getY() );
+
+ distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, Y, X );
+ }
+
+ if ( done )
+ return;
+
+ signsA.setY( -signsA.getY() );
+ scalesA.setY( -scalesA.getY() );
+
+ distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+ matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+ if ( distSqr < minDistSqr ) {
+ EdgeEdge_SetNewMin( Y, X, Y, X );
+ }
+}
+
+float
+boxBoxDistance(
+ Vector3& normal,
+ BoxPoint& boxPointA,
+ BoxPoint& boxPointB,
+ PE_REF(Box) boxA, const Transform3& transformA,
+ PE_REF(Box) boxB, const Transform3& transformB,
+ float distanceThreshold )
+{
+ Matrix3 identity;
+ identity = Matrix3::identity();
+ Vector3 ident[3];
+ ident[0] = identity.getCol0();
+ ident[1] = identity.getCol1();
+ ident[2] = identity.getCol2();
+
+ // get relative transformations
+
+ Transform3 transformAB, transformBA;
+ Matrix3 matrixAB, matrixBA;
+ Vector3 offsetAB, offsetBA;
+
+ transformAB = orthoInverse(transformA) * transformB;
+ transformBA = orthoInverse(transformAB);
+
+ matrixAB = transformAB.getUpper3x3();
+ offsetAB = transformAB.getTranslation();
+ matrixBA = transformBA.getUpper3x3();
+ offsetBA = transformBA.getTranslation();
+
+ Matrix3 absMatrixAB = absPerElem(matrixAB);
+ Matrix3 absMatrixBA = absPerElem(matrixBA);
+
+ // find separating axis with largest gap between projections
+
+ BoxSepAxisType axisType;
+ Vector3 axisA(0.0f), axisB(0.0f);
+ float gap, maxGap;
+ int faceDimA = 0, faceDimB = 0, edgeDimA = 0, edgeDimB = 0;
+
+ // face axes
+
+ Vector3 gapsA = absPerElem(offsetAB) - boxA.half - absMatrixAB * boxB.half;
+
+ AaxisTest(0,X,true);
+ AaxisTest(1,Y,false);
+ AaxisTest(2,Z,false);
+
+ Vector3 gapsB = absPerElem(offsetBA) - boxB.half - absMatrixBA * boxA.half;
+
+ BaxisTest(0,X);
+ BaxisTest(1,Y);
+ BaxisTest(2,Z);
+
+ // cross product axes
+
+ // ŠOÏ‚ª‚O‚Ì‚Æ‚«‚Ì‘Îô
+ absMatrixAB += Matrix3(1.0e-5f);
+ absMatrixBA += Matrix3(1.0e-5f);
+
+ Matrix3 lsqrs, projOffset, projAhalf, projBhalf;
+
+ lsqrs.setCol0( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
+ mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) );
+ lsqrs.setCol1( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
+ mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
+ lsqrs.setCol2( mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) +
+ mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
+
+ projOffset.setCol0(matrixBA.getCol1() * offsetAB.getZ() - matrixBA.getCol2() * offsetAB.getY());
+ projOffset.setCol1(matrixBA.getCol2() * offsetAB.getX() - matrixBA.getCol0() * offsetAB.getZ());
+ projOffset.setCol2(matrixBA.getCol0() * offsetAB.getY() - matrixBA.getCol1() * offsetAB.getX());
+
+ projAhalf.setCol0(absMatrixBA.getCol1() * boxA.half.getZ() + absMatrixBA.getCol2() * boxA.half.getY());
+ projAhalf.setCol1(absMatrixBA.getCol2() * boxA.half.getX() + absMatrixBA.getCol0() * boxA.half.getZ());
+ projAhalf.setCol2(absMatrixBA.getCol0() * boxA.half.getY() + absMatrixBA.getCol1() * boxA.half.getX());
+
+ projBhalf.setCol0(absMatrixAB.getCol1() * boxB.half.getZ() + absMatrixAB.getCol2() * boxB.half.getY());
+ projBhalf.setCol1(absMatrixAB.getCol2() * boxB.half.getX() + absMatrixAB.getCol0() * boxB.half.getZ());
+ projBhalf.setCol2(absMatrixAB.getCol0() * boxB.half.getY() + absMatrixAB.getCol1() * boxB.half.getX());
+
+ Matrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf);
+
+ CrossAxisTest(0,0,X);
+ CrossAxisTest(0,1,Y);
+ CrossAxisTest(0,2,Z);
+ CrossAxisTest(1,0,X);
+ CrossAxisTest(1,1,Y);
+ CrossAxisTest(1,2,Z);
+ CrossAxisTest(2,0,X);
+ CrossAxisTest(2,1,Y);
+ CrossAxisTest(2,2,Z);
+
+ // need to pick the face on each box whose normal best matches the separating axis.
+ // will transform vectors to be in the coordinate system of this face to simplify things later.
+ // for this, a permutation matrix can be used, which the next section computes.
+
+ int dimA[3], dimB[3];
+
+ if ( axisType == A_AXIS ) {
+ if ( dot(axisA,offsetAB) < 0.0f )
+ axisA = -axisA;
+ axisB = matrixBA * -axisA;
+
+ Vector3 absAxisB = Vector3(absPerElem(axisB));
+
+ if ( ( absAxisB[0] > absAxisB[1] ) && ( absAxisB[0] > absAxisB[2] ) )
+ faceDimB = 0;
+ else if ( absAxisB[1] > absAxisB[2] )
+ faceDimB = 1;
+ else
+ faceDimB = 2;
+ } else if ( axisType == B_AXIS ) {
+ if ( dot(axisB,offsetBA) < 0.0f )
+ axisB = -axisB;
+ axisA = matrixAB * -axisB;
+
+ Vector3 absAxisA = Vector3(absPerElem(axisA));
+
+ if ( ( absAxisA[0] > absAxisA[1] ) && ( absAxisA[0] > absAxisA[2] ) )
+ faceDimA = 0;
+ else if ( absAxisA[1] > absAxisA[2] )
+ faceDimA = 1;
+ else
+ faceDimA = 2;
+ }
+
+ if ( axisType == CROSS_AXIS ) {
+ if ( dot(axisA,offsetAB) < 0.0f )
+ axisA = -axisA;
+ axisB = matrixBA * -axisA;
+
+ Vector3 absAxisA = Vector3(absPerElem(axisA));
+ Vector3 absAxisB = Vector3(absPerElem(axisB));
+
+ dimA[1] = edgeDimA;
+ dimB[1] = edgeDimB;
+
+ if ( edgeDimA == 0 ) {
+ if ( absAxisA[1] > absAxisA[2] ) {
+ dimA[0] = 2;
+ dimA[2] = 1;
+ } else {
+ dimA[0] = 1;
+ dimA[2] = 2;
+ }
+ } else if ( edgeDimA == 1 ) {
+ if ( absAxisA[2] > absAxisA[0] ) {
+ dimA[0] = 0;
+ dimA[2] = 2;
+ } else {
+ dimA[0] = 2;
+ dimA[2] = 0;
+ }
+ } else {
+ if ( absAxisA[0] > absAxisA[1] ) {
+ dimA[0] = 1;
+ dimA[2] = 0;
+ } else {
+ dimA[0] = 0;
+ dimA[2] = 1;
+ }
+ }
+
+ if ( edgeDimB == 0 ) {
+ if ( absAxisB[1] > absAxisB[2] ) {
+ dimB[0] = 2;
+ dimB[2] = 1;
+ } else {
+ dimB[0] = 1;
+ dimB[2] = 2;
+ }
+ } else if ( edgeDimB == 1 ) {
+ if ( absAxisB[2] > absAxisB[0] ) {
+ dimB[0] = 0;
+ dimB[2] = 2;
+ } else {
+ dimB[0] = 2;
+ dimB[2] = 0;
+ }
+ } else {
+ if ( absAxisB[0] > absAxisB[1] ) {
+ dimB[0] = 1;
+ dimB[2] = 0;
+ } else {
+ dimB[0] = 0;
+ dimB[2] = 1;
+ }
+ }
+ } else {
+ dimA[2] = faceDimA;
+ dimA[0] = (faceDimA+1)%3;
+ dimA[1] = (faceDimA+2)%3;
+ dimB[2] = faceDimB;
+ dimB[0] = (faceDimB+1)%3;
+ dimB[1] = (faceDimB+2)%3;
+ }
+
+ Matrix3 aperm_col, bperm_col;
+
+ aperm_col.setCol0(ident[dimA[0]]);
+ aperm_col.setCol1(ident[dimA[1]]);
+ aperm_col.setCol2(ident[dimA[2]]);
+
+ bperm_col.setCol0(ident[dimB[0]]);
+ bperm_col.setCol1(ident[dimB[1]]);
+ bperm_col.setCol2(ident[dimB[2]]);
+
+ Matrix3 aperm_row, bperm_row;
+
+ aperm_row = transpose(aperm_col);
+ bperm_row = transpose(bperm_col);
+
+ // permute all box parameters to be in the face coordinate systems
+
+ Matrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col;
+ Matrix3 matrixBA_perm = transpose(matrixAB_perm);
+
+ Vector3 offsetAB_perm, offsetBA_perm;
+
+ offsetAB_perm = aperm_row * offsetAB;
+ offsetBA_perm = bperm_row * offsetBA;
+
+ Vector3 halfA_perm, halfB_perm;
+
+ halfA_perm = aperm_row * boxA.half;
+ halfB_perm = bperm_row * boxB.half;
+
+ // compute the vector between the centers of each face, in each face's coordinate frame
+
+ Vector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm;
+
+ signsA_perm = copySignPerElem(Vector3(1.0f),aperm_row * axisA);
+ signsB_perm = copySignPerElem(Vector3(1.0f),bperm_row * axisB);
+ scalesA_perm = mulPerElem( signsA_perm, halfA_perm );
+ scalesB_perm = mulPerElem( signsB_perm, halfB_perm );
+
+ faceOffsetAB_perm = offsetAB_perm + matrixAB_perm.getCol2() * scalesB_perm.getZ();
+ faceOffsetAB_perm.setZ( faceOffsetAB_perm.getZ() - scalesA_perm.getZ() );
+
+ faceOffsetBA_perm = offsetBA_perm + matrixBA_perm.getCol2() * scalesA_perm.getZ();
+ faceOffsetBA_perm.setZ( faceOffsetBA_perm.getZ() - scalesB_perm.getZ() );
+
+ if ( maxGap < 0.0f ) {
+ // if boxes overlap, this will separate the faces for finding points of penetration.
+
+ faceOffsetAB_perm -= aperm_row * axisA * maxGap * 1.01f;
+ faceOffsetBA_perm -= bperm_row * axisB * maxGap * 1.01f;
+ }
+
+ // for each vertex/face or edge/edge pair of the two faces, find the closest points.
+ //
+ // these points each have an associated box feature (vertex, edge, or face). if each
+ // point is in the external Voronoi region of the other's feature, they are the
+ // closest points of the boxes, and the algorithm can exit.
+ //
+ // the feature pairs are arranged so that in the general case, the first test will
+ // succeed. degenerate cases (parallel faces) may require up to all tests in the
+ // worst case.
+ //
+ // if for some reason no case passes the Voronoi test, the features with the minimum
+ // distance are returned.
+
+ Point3 localPointA_perm, localPointB_perm;
+ float minDistSqr;
+ bool done;
+
+ Vector3 hA_perm( halfA_perm ), hB_perm( halfB_perm );
+
+ localPointA_perm.setZ( scalesA_perm.getZ() );
+ localPointB_perm.setZ( scalesB_perm.getZ() );
+ scalesA_perm.setZ(0.0f);
+ scalesB_perm.setZ(0.0f);
+
+ int otherFaceDimA, otherFaceDimB;
+ FeatureType featureA, featureB;
+
+ if ( axisType == CROSS_AXIS ) {
+ EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ otherFaceDimA, otherFaceDimB, featureA, featureB,
+ hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+ scalesA_perm, scalesB_perm, true );
+
+ if ( !done ) {
+ VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
+
+ if ( !done ) {
+ VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
+ }
+ }
+ } else if ( axisType == B_AXIS ) {
+ VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, true );
+
+ if ( !done ) {
+ VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
+
+ if ( !done ) {
+ EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ otherFaceDimA, otherFaceDimB, featureA, featureB,
+ hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+ scalesA_perm, scalesB_perm, false );
+ }
+ }
+ } else {
+ VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, true );
+
+ if ( !done ) {
+ VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ featureA, featureB,
+ hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
+
+ if ( !done ) {
+ EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+ otherFaceDimA, otherFaceDimB, featureA, featureB,
+ hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+ matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+ scalesA_perm, scalesB_perm, false );
+ }
+ }
+ }
+
+ // convert local points from face-local to box-local coordinate system
+
+ boxPointA.localPoint = Point3( aperm_col * Vector3( localPointA_perm ) );
+ boxPointB.localPoint = Point3( bperm_col * Vector3( localPointB_perm ) );
+
+ // find which features of the boxes are involved.
+ // the only feature pairs which occur in this function are VF, FV, and EE, even though the
+ // closest points might actually lie on sub-features, as in a VF contact might be used for
+ // what's actually a VV contact. this means some feature pairs could possibly seem distinct
+ // from others, although their contact positions are the same. don't know yet whether this
+ // matters.
+
+ int sA[3], sB[3];
+
+ sA[0] = boxPointA.localPoint.getX() > 0.0f;
+ sA[1] = boxPointA.localPoint.getY() > 0.0f;
+ sA[2] = boxPointA.localPoint.getZ() > 0.0f;
+
+ sB[0] = boxPointB.localPoint.getX() > 0.0f;
+ sB[1] = boxPointB.localPoint.getY() > 0.0f;
+ sB[2] = boxPointB.localPoint.getZ() > 0.0f;
+
+ if ( featureA == F ) {
+ boxPointA.setFaceFeature( dimA[2], sA[dimA[2]] );
+ } else if ( featureA == E ) {
+ boxPointA.setEdgeFeature( dimA[2], sA[dimA[2]], dimA[otherFaceDimA], sA[dimA[otherFaceDimA]] );
+ } else {
+ boxPointA.setVertexFeature( sA[0], sA[1], sA[2] );
+ }
+
+ if ( featureB == F ) {
+ boxPointB.setFaceFeature( dimB[2], sB[dimB[2]] );
+ } else if ( featureB == E ) {
+ boxPointB.setEdgeFeature( dimB[2], sB[dimB[2]], dimB[otherFaceDimB], sB[dimB[otherFaceDimB]] );
+ } else {
+ boxPointB.setVertexFeature( sB[0], sB[1], sB[2] );
+ }
+
+ normal = transformA * axisA;
+
+ if ( maxGap < 0.0f ) {
+ return (maxGap);
+ } else {
+ return (sqrtf( minDistSqr ));
+ }
+}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
new file mode 100644
index 00000000000..c58e257c026
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
@@ -0,0 +1,66 @@
+/*
+ Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#ifndef __BOXBOXDISTANCE_H__
+#define __BOXBOXDISTANCE_H__
+
+
+#include "Box.h"
+
+using namespace Vectormath::Aos;
+
+//---------------------------------------------------------------------------
+// boxBoxDistance:
+//
+// description:
+// this computes info that can be used for the collision response of two boxes. when the boxes
+// do not overlap, the points are set to the closest points of the boxes, and a positive
+// distance between them is returned. if the boxes do overlap, a negative distance is returned
+// and the points are set to two points that would touch after the boxes are translated apart.
+// the contact normal gives the direction to repel or separate the boxes when they touch or
+// overlap (it's being approximated here as one of the 15 "separating axis" directions).
+//
+// returns:
+// positive or negative distance between two boxes.
+//
+// args:
+// Vector3& normal: set to a unit contact normal pointing from box A to box B.
+//
+// BoxPoint& boxPointA, BoxPoint& boxPointB:
+// set to a closest point or point of penetration on each box.
+//
+// Box boxA, Box boxB:
+// boxes, represented as 3 half-widths
+//
+// const Transform3& transformA, const Transform3& transformB:
+// box transformations, in world coordinates
+//
+// float distanceThreshold:
+// the algorithm will exit early if it finds that the boxes are more distant than this
+// threshold, and not compute a contact normal or points. if this distance returned
+// exceeds the threshold, all the other output data may not have been computed. by
+// default, this is set to MAX_FLOAT so it will have no effect.
+//
+//---------------------------------------------------------------------------
+
+float
+boxBoxDistance(Vector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
+ PE_REF(Box) boxA, const Transform3 & transformA, PE_REF(Box) boxB,
+ const Transform3 & transformB,
+ float distanceThreshold = FLT_MAX );
+
+#endif /* __BOXBOXDISTANCE_H__ */
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt
new file mode 100644
index 00000000000..5b4a907058f
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt
@@ -0,0 +1 @@
+Empty placeholder for future Libspe2 SPU task
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
new file mode 100644
index 00000000000..fe61955572f
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
@@ -0,0 +1,214 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "SpuSampleTask.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "../PlatformDefinitions.h"
+#include "../SpuFakeDma.h"
+#include "LinearMath/btMinMax.h"
+
+#ifdef __SPU__
+#include <spu_printf.h>
+#else
+#include <stdio.h>
+#define spu_printf printf
+#endif
+
+#define MAX_NUM_BODIES 8192
+
+struct SampleTask_LocalStoreMemory
+{
+ ATTRIBUTE_ALIGNED16(char gLocalRigidBody [sizeof(btRigidBody)+16]);
+ ATTRIBUTE_ALIGNED16(void* gPointerArray[MAX_NUM_BODIES]);
+
+};
+
+
+
+
+//-- MAIN METHOD
+void processSampleTask(void* userPtr, void* lsMemory)
+{
+ // BT_PROFILE("processSampleTask");
+
+ SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory;
+
+ SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr;
+ SpuSampleTaskDesc& taskDesc = *taskDescPtr;
+
+ switch (taskDesc.m_sampleCommand)
+ {
+ case CMD_SAMPLE_INTEGRATE_BODIES:
+ {
+ btTransform predictedTrans;
+ btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
+
+ int batchSize = taskDesc.m_sampleValue;
+ if (batchSize>MAX_NUM_BODIES)
+ {
+ spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
+ break;
+ }
+ int dmaArraySize = batchSize*sizeof(void*);
+
+ uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
+
+ // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
+
+ if (dmaArraySize>=16)
+ {
+ cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ } else
+ {
+ stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize);
+ }
+
+
+ for ( int i=0;i<batchSize;i++)
+ {
+ ///DMA rigid body
+
+ void* localPtr = &localMemory->gLocalRigidBody[0];
+ void* shortAdd = localMemory->gPointerArray[i];
+ uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
+
+ // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
+
+ int dmaBodySize = sizeof(btRigidBody);
+
+ cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+ float timeStep = 1.f/60.f;
+
+ btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
+ if (body)
+ {
+ if (body->isActive() && (!body->isStaticOrKinematicObject()))
+ {
+ body->predictIntegratedTransform(timeStep, predictedTrans);
+ body->proceedToTransform( predictedTrans);
+ void* ptr = (void*)localPtr;
+ // spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress);
+
+ cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+ }
+ }
+
+ }
+ break;
+ }
+
+
+ case CMD_SAMPLE_PREDICT_MOTION_BODIES:
+ {
+ btTransform predictedTrans;
+ btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
+
+ int batchSize = taskDesc.m_sampleValue;
+ int dmaArraySize = batchSize*sizeof(void*);
+
+ if (batchSize>MAX_NUM_BODIES)
+ {
+ spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
+ break;
+ }
+
+ uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
+
+ // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
+
+ if (dmaArraySize>=16)
+ {
+ cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ } else
+ {
+ stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize);
+ }
+
+
+ for ( int i=0;i<batchSize;i++)
+ {
+ ///DMA rigid body
+
+ void* localPtr = &localMemory->gLocalRigidBody[0];
+ void* shortAdd = localMemory->gPointerArray[i];
+ uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
+
+ // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
+
+ int dmaBodySize = sizeof(btRigidBody);
+
+ cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+ float timeStep = 1.f/60.f;
+
+ btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
+ if (body)
+ {
+ if (!body->isStaticOrKinematicObject())
+ {
+ if (body->isActive())
+ {
+ body->integrateVelocities( timeStep);
+ //damping
+ body->applyDamping(timeStep);
+
+ body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
+
+ void* ptr = (void*)localPtr;
+ cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(1));
+ }
+ }
+ }
+
+ }
+ break;
+ }
+
+
+
+ default:
+ {
+
+ }
+ };
+}
+
+
+#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(SampleTask_LocalStoreMemory gLocalStoreMemory);
+
+void* createSampleLocalStoreMemory()
+{
+ return &gLocalStoreMemory;
+}
+#else
+void* createSampleLocalStoreMemory()
+{
+ return new SampleTask_LocalStoreMemory;
+};
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
new file mode 100644
index 00000000000..c8ebdfd6232
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
@@ -0,0 +1,54 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef SPU_SAMPLE_TASK_H
+#define SPU_SAMPLE_TASK_H
+
+#include "../PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMatrix3x3.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+
+enum
+{
+ CMD_SAMPLE_INTEGRATE_BODIES = 1,
+ CMD_SAMPLE_PREDICT_MOTION_BODIES
+};
+
+
+
+ATTRIBUTE_ALIGNED16(struct) SpuSampleTaskDesc
+{
+ BT_DECLARE_ALIGNED_ALLOCATOR();
+
+ uint32_t m_sampleCommand;
+ uint32_t m_taskId;
+
+ uint64_t m_mainMemoryPtr;
+ int m_sampleValue;
+
+
+};
+
+
+void processSampleTask(void* userPtr, void* lsMemory);
+void* createSampleLocalStoreMemory();
+
+
+#endif //SPU_SAMPLE_TASK_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt
new file mode 100644
index 00000000000..5b4a907058f
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt
@@ -0,0 +1 @@
+Empty placeholder for future Libspe2 SPU task
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp
new file mode 100644
index 00000000000..11cb9e7c3f5
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp
@@ -0,0 +1,222 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define __CELLOS_LV2__ 1
+
+#define USE_SAMPLE_PROCESS 1
+#ifdef USE_SAMPLE_PROCESS
+
+
+#include "SpuSampleTaskProcess.h"
+#include <stdio.h>
+
+#ifdef __SPU__
+
+
+
+void SampleThreadFunc(void* userPtr,void* lsMemory)
+{
+ //do nothing
+ printf("hello world\n");
+}
+
+
+void* SamplelsMemoryFunc()
+{
+ //don't create local store memory, just return 0
+ return 0;
+}
+
+
+#else
+
+
+#include "btThreadSupportInterface.h"
+
+//# include "SPUAssert.h"
+#include <string.h>
+
+
+
+extern "C" {
+ extern char SPU_SAMPLE_ELF_SYMBOL[];
+}
+
+
+
+
+
+SpuSampleTaskProcess::SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+ m_taskBusy.resize(m_maxNumOutstandingTasks);
+ m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
+
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ m_taskBusy[i] = false;
+ }
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+
+ m_initialized = false;
+
+ m_threadInterface->startSPU();
+
+
+}
+
+SpuSampleTaskProcess::~SpuSampleTaskProcess()
+{
+ m_threadInterface->stopSPU();
+
+}
+
+
+
+void SpuSampleTaskProcess::initialize()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("SpuSampleTaskProcess::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ m_taskBusy[i] = false;
+ }
+ m_numBusyTasks = 0;
+ m_currentTask = 0;
+ m_initialized = true;
+
+}
+
+
+void SpuSampleTaskProcess::issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("SpuSampleTaskProcess::issueTask (m_currentTask= %d\)n", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+ m_taskBusy[m_currentTask] = true;
+ m_numBusyTasks++;
+
+ SpuSampleTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
+ {
+ // send task description in event message
+ // no error checking here...
+ // but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
+
+ taskDesc.m_mainMemoryPtr = reinterpret_cast<uint64_t>(sampleMainMemPtr);
+ taskDesc.m_sampleValue = sampleValue;
+ taskDesc.m_sampleCommand = sampleCommand;
+
+ //some bookkeeping to recognize finished tasks
+ taskDesc.m_taskId = m_currentTask;
+ }
+
+
+ m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
+
+ // if all tasks busy, wait for spu event to clear the task.
+
+ if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+ {
+ unsigned int taskId;
+ unsigned int outputSize;
+
+ for (int i=0;i<m_maxNumOutstandingTasks;i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+ //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+ postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+ // find new task buffer
+ for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+ {
+ if (!m_taskBusy[i])
+ {
+ m_currentTask = i;
+ break;
+ }
+ }
+}
+
+
+///Optional PPU-size post processing for each task
+void SpuSampleTaskProcess::postProcess(int taskId, int outputSize)
+{
+
+}
+
+
+void SpuSampleTaskProcess::flush()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+ printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+
+ // all tasks are issued, wait for all tasks to be complete
+ while(m_numBusyTasks > 0)
+ {
+// Consolidating SPU code
+ unsigned int taskId;
+ unsigned int outputSize;
+
+ for (int i=0;i<m_maxNumOutstandingTasks;i++)
+ {
+ if (m_taskBusy[i])
+ {
+ taskId = i;
+ break;
+ }
+ }
+ {
+
+ m_threadInterface->waitForResponse(&taskId, &outputSize);
+ }
+
+ //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+ postProcess(taskId, outputSize);
+
+ m_taskBusy[taskId] = false;
+
+ m_numBusyTasks--;
+ }
+
+
+}
+
+#endif
+
+
+#endif //USE_SAMPLE_PROCESS
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h
new file mode 100644
index 00000000000..d733a9a8528
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h
@@ -0,0 +1,153 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_SAMPLE_TASK_PROCESS_H
+#define SPU_SAMPLE_TASK_PROCESS_H
+
+#include <assert.h>
+
+
+#include "PlatformDefinitions.h"
+
+#include <stdlib.h>
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+#include "SpuSampleTask/SpuSampleTask.h"
+
+
+//just add your commands here, try to keep them globally unique for debugging purposes
+#define CMD_SAMPLE_TASK_COMMAND 10
+
+
+
+/// SpuSampleTaskProcess handles SPU processing of collision pairs.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class SpuSampleTaskProcess
+{
+ // track task buffers that are being used, and total busy tasks
+ btAlignedObjectArray<bool> m_taskBusy;
+ btAlignedObjectArray<SpuSampleTaskDesc>m_spuSampleTaskDesc;
+
+ int m_numBusyTasks;
+
+ // the current task and the current entry to insert a new work unit
+ int m_currentTask;
+
+ bool m_initialized;
+
+ void postProcess(int taskId, int outputSize);
+
+ class btThreadSupportInterface* m_threadInterface;
+
+ int m_maxNumOutstandingTasks;
+
+
+
+public:
+ SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
+
+ ~SpuSampleTaskProcess();
+
+ ///call initialize in the beginning of the frame, before addCollisionPairToTask
+ void initialize();
+
+ void issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand);
+
+ ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+ void flush();
+};
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+////////////////////MAIN/////////////////////////////
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+void * SamplelsMemoryFunc();
+void SampleThreadFunc(void* userPtr,void* lsMemory);
+
+//#define DEBUG_LIBSPE2_MAINLOOP
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+ printf("SPU is up \n");
+
+ ATTRIBUTE_ALIGNED128(btSpuStatus status);
+ ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
+ unsigned int received_message = Spu_Mailbox_Event_Nothing;
+ bool shutdown = false;
+
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ status.m_status = Spu_Status_Free;
+ status.m_lsMemory.p = SamplelsMemoryFunc();
+
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ while (!shutdown)
+ {
+ received_message = spu_read_in_mbox();
+
+
+
+ switch(received_message)
+ {
+ case Spu_Mailbox_Event_Shutdown:
+ shutdown = true;
+ break;
+ case Spu_Mailbox_Event_Task:
+ // refresh the status
+#ifdef DEBUG_LIBSPE2_MAINLOOP
+ printf("SPU recieved Task \n");
+#endif //DEBUG_LIBSPE2_MAINLOOP
+ cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ btAssert(status.m_status==Spu_Status_Occupied);
+
+ cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+ SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
+ break;
+ case Spu_Mailbox_Event_Nothing:
+ default:
+ break;
+ }
+
+ // set to status free and wait for next task
+ status.m_status = Spu_Status_Free;
+ cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+ cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+
+ }
+ return 0;
+}
+//////////////////////////////////////////////////////
+#endif
+
+
+
+#endif // SPU_SAMPLE_TASK_PROCESS_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSync.h b/extern/bullet2/BulletMultiThreaded/SpuSync.h
new file mode 100644
index 00000000000..b90d0fcbfd4
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/SpuSync.h
@@ -0,0 +1,148 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2007 Starbreeze Studios
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+Written by: Marten Svanfeldt
+*/
+
+#ifndef SPU_SYNC_H
+#define SPU_SYNC_H
+
+
+#include "PlatformDefinitions.h"
+
+
+#if defined(WIN32)
+
+#define WIN32_LEAN_AND_MEAN
+#ifdef _XBOX
+#include <Xtl.h>
+#else
+#include <Windows.h>
+#endif
+
+///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
+class btSpinlock
+{
+public:
+ //typedef volatile LONG SpinVariable;
+ typedef CRITICAL_SECTION SpinVariable;
+
+ btSpinlock (SpinVariable* var)
+ : spinVariable (var)
+ {}
+
+ void Init ()
+ {
+ //*spinVariable = 0;
+ InitializeCriticalSection(spinVariable);
+ }
+
+ void Lock ()
+ {
+ EnterCriticalSection(spinVariable);
+ }
+
+ void Unlock ()
+ {
+ LeaveCriticalSection(spinVariable);
+ }
+
+private:
+ SpinVariable* spinVariable;
+};
+
+
+#elif defined (__CELLOS_LV2__)
+
+//#include <cell/atomic.h>
+#include <cell/sync/mutex.h>
+
+///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
+class btSpinlock
+{
+public:
+ typedef CellSyncMutex SpinVariable;
+
+ btSpinlock (SpinVariable* var)
+ : spinVariable (var)
+ {}
+
+ void Init ()
+ {
+#ifndef __SPU__
+ //*spinVariable = 1;
+ cellSyncMutexInitialize(spinVariable);
+#endif
+ }
+
+
+
+ void Lock ()
+ {
+#ifdef __SPU__
+ // lock semaphore
+ /*while (cellAtomicTestAndDecr32(atomic_buf, (uint64_t)spinVariable) == 0)
+ {
+
+ };*/
+ cellSyncMutexLock((uint64_t)spinVariable);
+#endif
+ }
+
+ void Unlock ()
+ {
+#ifdef __SPU__
+ //cellAtomicIncr32(atomic_buf, (uint64_t)spinVariable);
+ cellSyncMutexUnlock((uint64_t)spinVariable);
+#endif
+ }
+
+
+private:
+ SpinVariable* spinVariable;
+ ATTRIBUTE_ALIGNED128(uint32_t atomic_buf[32]);
+};
+
+#else
+//create a dummy implementation (without any locking) useful for serial processing
+class btSpinlock
+{
+public:
+ typedef int SpinVariable;
+
+ btSpinlock (SpinVariable* var)
+ : spinVariable (var)
+ {}
+
+ void Init ()
+ {
+ }
+
+ void Lock ()
+ {
+ }
+
+ void Unlock ()
+ {
+ }
+
+private:
+ SpinVariable* spinVariable;
+};
+
+
+#endif
+
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp
new file mode 100644
index 00000000000..42b60a460e0
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp
@@ -0,0 +1,262 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "Win32ThreadSupport.h"
+
+#ifdef USE_WIN32_THREADING
+
+#include <windows.h>
+
+#include "SpuCollisionTaskProcess.h"
+
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+
+
+///The number of threads should be equal to the number of available cores
+///@todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
+
+///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+///Setup and initialize SPU/CELL/Libspe2
+Win32ThreadSupport::Win32ThreadSupport(const Win32ThreadConstructionInfo & threadConstructionInfo)
+{
+ m_maxNumTasks = threadConstructionInfo.m_numThreads;
+ startThreads(threadConstructionInfo);
+}
+
+///cleanup/shutdown Libspe2
+Win32ThreadSupport::~Win32ThreadSupport()
+{
+ stopSPU();
+}
+
+
+
+
+#include <stdio.h>
+
+DWORD WINAPI Thread_no_1( LPVOID lpParam )
+{
+
+ Win32ThreadSupport::btSpuStatus* status = (Win32ThreadSupport::btSpuStatus*)lpParam;
+
+
+ while (1)
+ {
+ WaitForSingleObject(status->m_eventStartHandle,INFINITE);
+
+ void* userPtr = status->m_userPtr;
+
+ if (userPtr)
+ {
+ btAssert(status->m_status);
+ status->m_userThreadFunc(userPtr,status->m_lsMemory);
+ status->m_status = 2;
+ SetEvent(status->m_eventCompletetHandle);
+ } else
+ {
+ //exit Thread
+ status->m_status = 3;
+ SetEvent(status->m_eventCompletetHandle);
+ printf("Thread with taskId %i with handle %p exiting\n",status->m_taskId, status->m_threadHandle);
+ break;
+ }
+
+ }
+
+ printf("Thread TERMINATED\n");
+ return 0;
+
+}
+
+///send messages to SPUs
+void Win32ThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+ /// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc);
+
+ ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
+
+
+
+ switch (uiCommand)
+ {
+ case CMD_GATHER_AND_PROCESS_PAIRLIST:
+ {
+
+
+//#define SINGLE_THREADED 1
+#ifdef SINGLE_THREADED
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[0];
+ spuStatus.m_userPtr=(void*)uiArgument0;
+ spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
+ HANDLE handle =0;
+#else
+
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[taskId];
+ btAssert(taskId>=0);
+ btAssert(int(taskId)<m_activeSpuStatus.size());
+
+ spuStatus.m_commandId = uiCommand;
+ spuStatus.m_status = 1;
+ spuStatus.m_userPtr = (void*)uiArgument0;
+
+ ///fire event to start new task
+ SetEvent(spuStatus.m_eventStartHandle);
+
+#endif //CollisionTask_LocalStoreMemory
+
+
+
+ break;
+ }
+ default:
+ {
+ ///not implemented
+ btAssert(0);
+ }
+
+ };
+
+
+}
+
+
+///check for messages from SPUs
+void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+ ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+
+ ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+ btAssert(m_activeSpuStatus.size());
+
+ int last = -1;
+#ifndef SINGLE_THREADED
+ DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
+ btAssert(res != WAIT_FAILED);
+ last = res - WAIT_OBJECT_0;
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[last];
+ btAssert(spuStatus.m_threadHandle);
+ btAssert(spuStatus.m_eventCompletetHandle);
+
+ //WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+ btAssert(spuStatus.m_status > 1);
+ spuStatus.m_status = 0;
+
+ ///need to find an active spu
+ btAssert(last>=0);
+
+#else
+ last=0;
+ btSpuStatus& spuStatus = m_activeSpuStatus[last];
+#endif //SINGLE_THREADED
+
+
+
+ *puiArgument0 = spuStatus.m_taskId;
+ *puiArgument1 = spuStatus.m_status;
+
+
+}
+
+
+
+void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo)
+{
+
+ m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
+ m_completeHandles.resize(threadConstructionInfo.m_numThreads);
+
+ m_maxNumTasks = threadConstructionInfo.m_numThreads;
+
+ for (int i=0;i<threadConstructionInfo.m_numThreads;i++)
+ {
+ printf("starting thread %d\n",i);
+
+ btSpuStatus& spuStatus = m_activeSpuStatus[i];
+
+ LPSECURITY_ATTRIBUTES lpThreadAttributes=NULL;
+ SIZE_T dwStackSize=threadConstructionInfo.m_threadStackSize;
+ LPTHREAD_START_ROUTINE lpStartAddress=&Thread_no_1;
+ LPVOID lpParameter=&spuStatus;
+ DWORD dwCreationFlags=0;
+ LPDWORD lpThreadId=0;
+
+ spuStatus.m_userPtr=0;
+
+ sprintf(spuStatus.m_eventStartHandleName,"eventStart%s%d",threadConstructionInfo.m_uniqueName,i);
+ spuStatus.m_eventStartHandle = CreateEventA (0,false,false,spuStatus.m_eventStartHandleName);
+
+ sprintf(spuStatus.m_eventCompletetHandleName,"eventComplete%s%d",threadConstructionInfo.m_uniqueName,i);
+ spuStatus.m_eventCompletetHandle = CreateEventA (0,false,false,spuStatus.m_eventCompletetHandleName);
+
+ m_completeHandles[i] = spuStatus.m_eventCompletetHandle;
+
+ HANDLE handle = CreateThread(lpThreadAttributes,dwStackSize,lpStartAddress,lpParameter, dwCreationFlags,lpThreadId);
+ SetThreadPriority(handle,THREAD_PRIORITY_HIGHEST);
+ //SetThreadPriority(handle,THREAD_PRIORITY_TIME_CRITICAL);
+
+ SetThreadAffinityMask(handle, 1<<i);
+
+ spuStatus.m_taskId = i;
+ spuStatus.m_commandId = 0;
+ spuStatus.m_status = 0;
+ spuStatus.m_threadHandle = handle;
+ spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+ spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+
+ printf("started thread %d with threadHandle %p\n",i,handle);
+
+ }
+
+}
+
+void Win32ThreadSupport::startSPU()
+{
+}
+
+
+///tell the task scheduler we are done with the SPU tasks
+void Win32ThreadSupport::stopSPU()
+{
+ int i;
+ for (i=0;i<m_activeSpuStatus.size();i++)
+ {
+ btSpuStatus& spuStatus = m_activeSpuStatus[i];
+ if (spuStatus.m_status>0)
+ {
+ WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+ }
+
+
+ spuStatus.m_userPtr = 0;
+ SetEvent(spuStatus.m_eventStartHandle);
+ WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+
+ CloseHandle(spuStatus.m_eventCompletetHandle);
+ CloseHandle(spuStatus.m_eventStartHandle);
+ CloseHandle(spuStatus.m_threadHandle);
+ }
+
+ m_activeSpuStatus.clear();
+ m_completeHandles.clear();
+
+}
+
+#endif //USE_WIN32_THREADING
diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h
new file mode 100644
index 00000000000..c61ad901c07
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h
@@ -0,0 +1,132 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+#ifdef USE_WIN32_THREADING //platform specific defines are defined in PlatformDefinitions.h
+
+#ifndef WIN32_THREAD_SUPPORT_H
+#define WIN32_THREAD_SUPPORT_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+
+typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*Win32lsMemorySetupFunc)();
+
+
+
+
+
+
+///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class Win32ThreadSupport : public btThreadSupportInterface
+{
+public:
+ ///placeholder, until libspe2 support is there
+ struct btSpuStatus
+ {
+ uint32_t m_taskId;
+ uint32_t m_commandId;
+ uint32_t m_status;
+
+ Win32ThreadFunc m_userThreadFunc;
+ void* m_userPtr; //for taskDesc etc
+ void* m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc
+
+ void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
+
+ void* m_eventStartHandle;
+ char m_eventStartHandleName[32];
+
+ void* m_eventCompletetHandle;
+ char m_eventCompletetHandleName[32];
+
+
+ };
+private:
+
+ btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
+ btAlignedObjectArray<void*> m_completeHandles;
+
+ int m_maxNumTasks;
+public:
+ ///Setup and initialize SPU/CELL/Libspe2
+
+ struct Win32ThreadConstructionInfo
+ {
+ Win32ThreadConstructionInfo(char* uniqueName,
+ Win32ThreadFunc userThreadFunc,
+ Win32lsMemorySetupFunc lsMemoryFunc,
+ int numThreads=1,
+ int threadStackSize=65535
+ )
+ :m_uniqueName(uniqueName),
+ m_userThreadFunc(userThreadFunc),
+ m_lsMemoryFunc(lsMemoryFunc),
+ m_numThreads(numThreads),
+ m_threadStackSize(threadStackSize)
+ {
+
+ }
+
+ char* m_uniqueName;
+ Win32ThreadFunc m_userThreadFunc;
+ Win32lsMemorySetupFunc m_lsMemoryFunc;
+ int m_numThreads;
+ int m_threadStackSize;
+
+ };
+
+
+
+ Win32ThreadSupport(const Win32ThreadConstructionInfo& threadConstructionInfo);
+
+///cleanup/shutdown Libspe2
+ virtual ~Win32ThreadSupport();
+
+ void startThreads(const Win32ThreadConstructionInfo& threadInfo);
+
+
+///send messages to SPUs
+ virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+
+///check for messages from SPUs
+ virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ virtual void startSPU();
+
+///tell the task scheduler we are done with the SPU tasks
+ virtual void stopSPU();
+
+ virtual void setNumTasks(int numTasks)
+ {
+ m_maxNumTasks = numTasks;
+ }
+
+ virtual int getNumTasks() const
+ {
+ return m_maxNumTasks;
+ }
+
+};
+
+#endif //WIN32_THREAD_SUPPORT_H
+
+#endif //USE_WIN32_THREADING
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
new file mode 100644
index 00000000000..84a5e59f0af
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
@@ -0,0 +1,590 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
+#include "BulletMultiThreaded/btGpuDefines.h"
+#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h"
+#include "BulletMultiThreaded/btGpuUtilsSharedCode.h"
+
+
+
+#include "LinearMath/btAlignedAllocator.h"
+#include "LinearMath/btQuickprof.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+
+
+
+#include "btGpuDefines.h"
+#include "btGpuUtilsSharedDefs.h"
+
+#include "btGpu3DGridBroadphaseSharedDefs.h"
+
+#include "btGpu3DGridBroadphase.h"
+#include <string.h> //for memset
+
+
+#include <stdio.h>
+
+
+
+static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
+
+
+
+btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell,
+ btScalar cellFactorAABB) :
+ btSimpleBroadphase(maxSmallProxies,
+// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
+ new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
+ m_bInitialized(false),
+ m_numBodies(0)
+{
+ _initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ,
+ maxSmallProxies, maxLargeProxies, maxPairsPerBody,
+ maxBodiesPerCell, cellFactorAABB);
+}
+
+
+
+btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
+ const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell,
+ btScalar cellFactorAABB) :
+ btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
+ m_bInitialized(false),
+ m_numBodies(0)
+{
+ _initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ,
+ maxSmallProxies, maxLargeProxies, maxPairsPerBody,
+ maxBodiesPerCell, cellFactorAABB);
+}
+
+
+
+btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
+{
+ //btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
+ assert(m_bInitialized);
+ _finalize();
+}
+
+
+
+void btGpu3DGridBroadphase::_initialize( const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell,
+ btScalar cellFactorAABB)
+{
+ // set various paramerers
+ m_ownsPairCache = true;
+ m_params.m_gridSizeX = gridSizeX;
+ m_params.m_gridSizeY = gridSizeY;
+ m_params.m_gridSizeZ = gridSizeZ;
+ m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
+ btVector3 w_org = worldAabbMin;
+ m_params.m_worldOriginX = w_org.getX();
+ m_params.m_worldOriginY = w_org.getY();
+ m_params.m_worldOriginZ = w_org.getZ();
+ btVector3 w_size = worldAabbMax - worldAabbMin;
+ m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX;
+ m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY;
+ m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ;
+ m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ);
+ m_maxRadius *= btScalar(0.5f);
+ m_params.m_numBodies = m_numBodies;
+ m_params.m_maxBodiesPerCell = maxBodiesPerCell;
+
+ m_numLargeHandles = 0;
+ m_maxLargeHandles = maxLargeProxies;
+
+ m_maxPairsPerBody = maxPairsPerBody;
+
+ m_cellFactorAABB = cellFactorAABB;
+
+ m_LastLargeHandleIndex = -1;
+
+ assert(!m_bInitialized);
+ // allocate host storage
+ m_hBodiesHash = new unsigned int[m_maxHandles * 2];
+ memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
+
+ m_hCellStart = new unsigned int[m_params.m_numCells];
+ memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
+
+ m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
+ // --------------- for now, init with m_maxPairsPerBody for each body
+ m_hPairBuffStartCurr[0] = 0;
+ m_hPairBuffStartCurr[1] = 0;
+ for(int i = 1; i <= m_maxHandles; i++)
+ {
+ m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
+ m_hPairBuffStartCurr[i * 2 + 1] = 0;
+ }
+ //----------------
+ unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
+ m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
+
+ m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
+ memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
+
+ m_hPairScan = new unsigned int[m_maxHandles + 1];
+
+ m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody];
+
+// large proxies
+
+ // allocate handles buffer and put all handles on free list
+ m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
+ m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
+ m_firstFreeLargeHandle = 0;
+ {
+ for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
+ {
+ m_pLargeHandles[i].SetNextFree(i + 1);
+ m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
+ }
+ m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
+ }
+
+// debug data
+ m_numPairsAdded = 0;
+ m_numOverflows = 0;
+
+ m_bInitialized = true;
+}
+
+
+
+void btGpu3DGridBroadphase::_finalize()
+{
+ assert(m_bInitialized);
+ delete [] m_hBodiesHash;
+ delete [] m_hCellStart;
+ delete [] m_hPairBuffStartCurr;
+ delete [] m_hAABB;
+ delete [] m_hPairBuff;
+ delete [] m_hPairScan;
+ delete [] m_hPairOut;
+ btAlignedFree(m_pLargeHandlesRawPtr);
+ m_bInitialized = false;
+}
+
+
+
+void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+ if(m_numHandles <= 0)
+ {
+ BT_PROFILE("addLarge2LargePairsToCache");
+ addLarge2LargePairsToCache(dispatcher);
+ return;
+ }
+ // update constants
+ setParameters(&m_params);
+ // prepare AABB array
+ prepareAABB();
+ // calculate hash
+ calcHashAABB();
+ // sort bodies based on hash
+ sortHash();
+ // find start of each cell
+ findCellStart();
+ // findOverlappingPairs (small/small)
+ findOverlappingPairs();
+ // findOverlappingPairs (small/large)
+ findPairsLarge();
+ // add pairs to CPU cache
+ computePairCacheChanges();
+ scanOverlappingPairBuff();
+ squeezeOverlappingPairBuff();
+ addPairsToCache(dispatcher);
+ // find and add large/large pairs to CPU cache
+ addLarge2LargePairsToCache(dispatcher);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
+{
+ m_numPairsAdded = 0;
+ m_numPairsRemoved = 0;
+ for(int i = 0; i < m_numHandles; i++)
+ {
+ unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
+ if(!num)
+ {
+ continue;
+ }
+ unsigned int* pInp = m_hPairOut + m_hPairScan[i];
+ unsigned int index0 = m_hAABB[i * 2].uw;
+ btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
+ for(unsigned int j = 0; j < num; j++)
+ {
+ unsigned int indx1_s = pInp[j];
+ unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
+ btSimpleBroadphaseProxy* proxy1;
+ if(index1 < (unsigned int)m_maxHandles)
+ {
+ proxy1 = &m_pHandles[index1];
+ }
+ else
+ {
+ index1 -= m_maxHandles;
+ btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
+ proxy1 = &m_pLargeHandles[index1];
+ }
+ if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
+ {
+ m_pairCache->addOverlappingPair(proxy0,proxy1);
+ m_numPairsAdded++;
+ }
+ else
+ {
+ m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
+ m_numPairsRemoved++;
+ }
+ }
+ }
+}
+
+
+
+btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
+{
+ btBroadphaseProxy* proxy;
+ bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
+ if(bIsLarge)
+ {
+ if (m_numLargeHandles >= m_maxLargeHandles)
+ {
+ ///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
+ btAssert(0);
+ return 0; //should never happen, but don't let the game crash ;-)
+ }
+ btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
+ int newHandleIndex = allocLargeHandle();
+ proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
+ }
+ else
+ {
+ proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
+ }
+ return proxy;
+}
+
+
+
+void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
+{
+ bool bIsLarge = isLargeProxy(proxy);
+ if(bIsLarge)
+ {
+
+ btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
+ freeLargeHandle(proxy0);
+ m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
+ }
+ else
+ {
+ btSimpleBroadphase::destroyProxy(proxy, dispatcher);
+ }
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
+{
+ m_hPairBuffStartCurr[0] = 0;
+ m_hPairBuffStartCurr[1] = 0;
+ for(int i = 1; i <= m_maxHandles; i++)
+ {
+ m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
+ m_hPairBuffStartCurr[i * 2 + 1] = 0;
+ }
+}
+
+
+
+bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
+{
+ btVector3 diag = aabbMax - aabbMin;
+
+ ///use the bounding sphere radius of this bounding box, to include rotation
+ btScalar radius = diag.length() * btScalar(0.5f);
+ radius *= m_cellFactorAABB; // user-defined factor
+
+ return (radius > m_maxRadius);
+}
+
+
+
+bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
+{
+ return (proxy->getUid() >= (m_maxHandles+2));
+}
+
+
+
+void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
+{
+ int i,j;
+ if (m_numLargeHandles <= 0)
+ {
+ return;
+ }
+ int new_largest_index = -1;
+ for(i = 0; i <= m_LastLargeHandleIndex; i++)
+ {
+ btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
+ if(!proxy0->m_clientObject)
+ {
+ continue;
+ }
+ new_largest_index = i;
+ for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
+ {
+ btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
+ if(!proxy1->m_clientObject)
+ {
+ continue;
+ }
+ btAssert(proxy0 != proxy1);
+ btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
+ btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+ if(aabbOverlap(p0,p1))
+ {
+ if (!m_pairCache->findPair(proxy0,proxy1))
+ {
+ m_pairCache->addOverlappingPair(proxy0,proxy1);
+ }
+ }
+ else
+ {
+ if(m_pairCache->findPair(proxy0,proxy1))
+ {
+ m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
+ }
+ }
+ }
+ }
+ m_LastLargeHandleIndex = new_largest_index;
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
+{
+ btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
+ for (int i=0; i <= m_LastLargeHandleIndex; i++)
+ {
+ btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
+ if(!proxy->m_clientObject)
+ {
+ continue;
+ }
+ rayCallback.process(proxy);
+ }
+}
+
+
+
+//
+// overrides for CPU version
+//
+
+
+
+void btGpu3DGridBroadphase::prepareAABB()
+{
+ BT_PROFILE("prepareAABB");
+ bt3DGrid3F1U* pBB = m_hAABB;
+ int i;
+ int new_largest_index = -1;
+ unsigned int num_small = 0;
+ for(i = 0; i <= m_LastHandleIndex; i++)
+ {
+ btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
+ if(!proxy0->m_clientObject)
+ {
+ continue;
+ }
+ new_largest_index = i;
+ pBB->fx = proxy0->m_aabbMin.getX();
+ pBB->fy = proxy0->m_aabbMin.getY();
+ pBB->fz = proxy0->m_aabbMin.getZ();
+ pBB->uw = i;
+ pBB++;
+ pBB->fx = proxy0->m_aabbMax.getX();
+ pBB->fy = proxy0->m_aabbMax.getY();
+ pBB->fz = proxy0->m_aabbMax.getZ();
+ pBB->uw = num_small;
+ pBB++;
+ num_small++;
+ }
+ m_LastHandleIndex = new_largest_index;
+ new_largest_index = -1;
+ unsigned int num_large = 0;
+ for(i = 0; i <= m_LastLargeHandleIndex; i++)
+ {
+ btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
+ if(!proxy0->m_clientObject)
+ {
+ continue;
+ }
+ new_largest_index = i;
+ pBB->fx = proxy0->m_aabbMin.getX();
+ pBB->fy = proxy0->m_aabbMin.getY();
+ pBB->fz = proxy0->m_aabbMin.getZ();
+ pBB->uw = i + m_maxHandles;
+ pBB++;
+ pBB->fx = proxy0->m_aabbMax.getX();
+ pBB->fy = proxy0->m_aabbMax.getY();
+ pBB->fz = proxy0->m_aabbMax.getZ();
+ pBB->uw = num_large + m_maxHandles;
+ pBB++;
+ num_large++;
+ }
+ m_LastLargeHandleIndex = new_largest_index;
+ // paranoid checks
+ btAssert(num_small == m_numHandles);
+ btAssert(num_large == m_numLargeHandles);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
+{
+ s3DGridBroadphaseParams = *hostParams;
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::calcHashAABB()
+{
+ BT_PROFILE("bt3DGrid_calcHashAABB");
+ btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::sortHash()
+{
+ class bt3DGridHashKey
+ {
+ public:
+ unsigned int hash;
+ unsigned int index;
+ void quickSort(bt3DGridHashKey* pData, int lo, int hi)
+ {
+ int i=lo, j=hi;
+ bt3DGridHashKey x = pData[(lo+hi)/2];
+ do
+ {
+ while(pData[i].hash > x.hash) i++;
+ while(x.hash > pData[j].hash) j--;
+ if(i <= j)
+ {
+ bt3DGridHashKey t = pData[i];
+ pData[i] = pData[j];
+ pData[j] = t;
+ i++; j--;
+ }
+ } while(i <= j);
+ if(lo < j) pData->quickSort(pData, lo, j);
+ if(i < hi) pData->quickSort(pData, i, hi);
+ }
+ };
+ BT_PROFILE("bt3DGrid_sortHash");
+ bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
+ pHash->quickSort(pHash, 0, m_numHandles - 1);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::findCellStart()
+{
+ BT_PROFILE("bt3DGrid_findCellStart");
+ btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::findOverlappingPairs()
+{
+ BT_PROFILE("bt3DGrid_findOverlappingPairs");
+ btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::findPairsLarge()
+{
+ BT_PROFILE("bt3DGrid_findPairsLarge");
+ btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::computePairCacheChanges()
+{
+ BT_PROFILE("bt3DGrid_computePairCacheChanges");
+ btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles);
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::scanOverlappingPairBuff()
+{
+ BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
+ m_hPairScan[0] = 0;
+ for(int i = 1; i <= m_numHandles; i++)
+ {
+ unsigned int delta = m_hPairScan[i];
+ m_hPairScan[i] = m_hPairScan[i-1] + delta;
+ }
+ return;
+}
+
+
+
+void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
+{
+ BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
+ btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles);
+ return;
+}
+
+
+
+#include "btGpu3DGridBroadphaseSharedCode.h"
+
+
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h
new file mode 100644
index 00000000000..1d49a0557ae
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h
@@ -0,0 +1,138 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASE_H
+#define BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
+
+class btGpu3DGridBroadphase : public btSimpleBroadphase
+{
+protected:
+ bool m_bInitialized;
+ unsigned int m_numBodies;
+ unsigned int m_numCells;
+ unsigned int m_maxPairsPerBody;
+ btScalar m_cellFactorAABB;
+ unsigned int m_maxBodiesPerCell;
+ bt3DGridBroadphaseParams m_params;
+ btScalar m_maxRadius;
+ // CPU data
+ unsigned int* m_hBodiesHash;
+ unsigned int* m_hCellStart;
+ unsigned int* m_hPairBuffStartCurr;
+ bt3DGrid3F1U* m_hAABB;
+ unsigned int* m_hPairBuff;
+ unsigned int* m_hPairScan;
+ unsigned int* m_hPairOut;
+// large proxies
+ int m_numLargeHandles;
+ int m_maxLargeHandles;
+ int m_LastLargeHandleIndex;
+ btSimpleBroadphaseProxy* m_pLargeHandles;
+ void* m_pLargeHandlesRawPtr;
+ int m_firstFreeLargeHandle;
+ int allocLargeHandle()
+ {
+ btAssert(m_numLargeHandles < m_maxLargeHandles);
+ int freeLargeHandle = m_firstFreeLargeHandle;
+ m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
+ m_numLargeHandles++;
+ if(freeLargeHandle > m_LastLargeHandleIndex)
+ {
+ m_LastLargeHandleIndex = freeLargeHandle;
+ }
+ return freeLargeHandle;
+ }
+ void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
+ {
+ int handle = int(proxy - m_pLargeHandles);
+ btAssert((handle >= 0) && (handle < m_maxHandles));
+ if(handle == m_LastLargeHandleIndex)
+ {
+ m_LastLargeHandleIndex--;
+ }
+ proxy->SetNextFree(m_firstFreeLargeHandle);
+ m_firstFreeLargeHandle = handle;
+ proxy->m_clientObject = 0;
+ m_numLargeHandles--;
+ }
+ bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
+ bool isLargeProxy(btBroadphaseProxy* proxy);
+// debug
+ unsigned int m_numPairsAdded;
+ unsigned int m_numPairsRemoved;
+ unsigned int m_numOverflows;
+//
+public:
+ btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell = 8,
+ btScalar cellFactorAABB = btScalar(1.0f));
+ btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
+ const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell = 8,
+ btScalar cellFactorAABB = btScalar(1.0f));
+ virtual ~btGpu3DGridBroadphase();
+ virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
+
+ virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
+ virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+ virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
+ virtual void resetPool(btDispatcher* dispatcher);
+
+protected:
+ void _initialize( const btVector3& worldAabbMin,const btVector3& worldAabbMax,
+ int gridSizeX, int gridSizeY, int gridSizeZ,
+ int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+ int maxBodiesPerCell = 8,
+ btScalar cellFactorAABB = btScalar(1.0f));
+ void _finalize();
+ void addPairsToCache(btDispatcher* dispatcher);
+ void addLarge2LargePairsToCache(btDispatcher* dispatcher);
+
+// overrides for CPU version
+ virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
+ virtual void prepareAABB();
+ virtual void calcHashAABB();
+ virtual void sortHash();
+ virtual void findCellStart();
+ virtual void findOverlappingPairs();
+ virtual void findPairsLarge();
+ virtual void computePairCacheChanges();
+ virtual void scanOverlappingPairBuff();
+ virtual void squeezeOverlappingPairBuff();
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif //BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
new file mode 100644
index 00000000000..e0afb87bb82
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
@@ -0,0 +1,430 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+// K E R N E L F U N C T I O N S
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+// calculate position in uniform grid
+BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
+{
+ int3 gridPos;
+ gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
+ gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
+ gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
+ return gridPos;
+} // bt3DGrid_calcGridPos()
+
+//----------------------------------------------------------------------------------------
+
+// calculate address in grid from position (clamping to edges)
+BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
+{
+ gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
+ gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
+ gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
+ return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
+} // bt3DGrid_calcGridHash()
+
+//----------------------------------------------------------------------------------------
+
+// calculate grid hash value for each body using its AABB
+BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ bt3DGrid3F1U bbMin = pAABB[index*2];
+ bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
+ float4 pos;
+ pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+ pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+ pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+ // get address in grid
+ int3 gridPos = bt3DGrid_calcGridPos(pos);
+ uint gridHash = bt3DGrid_calcGridHash(gridPos);
+ // store grid hash and body index
+ pHash[index] = BT_GPU_make_uint2(gridHash, index);
+} // calcHashAABBD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ uint2 sortedData = pHash[index];
+ // Load hash data into shared memory so that we can look
+ // at neighboring body's hash value without loading
+ // two hash values per thread
+ BT_GPU___shared__ uint sharedHash[257];
+ sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
+ if((index > 0) && (BT_GPU_threadIdx.x == 0))
+ {
+ // first thread in block must load neighbor body hash
+ volatile uint2 prevData = pHash[index-1];
+ sharedHash[0] = prevData.x;
+ }
+ BT_GPU___syncthreads();
+ if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
+ {
+ cellStart[sortedData.x] = index;
+ }
+} // findCellStartD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
+{
+ return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
+ (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
+ (min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
+} // cudaTestAABBOverlap()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ void findPairsInCell( int3 gridPos,
+ uint index,
+ uint2* pHash,
+ uint* pCellStart,
+ bt3DGrid3F1U* pAABB,
+ uint* pPairBuff,
+ uint2* pPairBuffStartCurr,
+ uint numBodies)
+{
+ if ( (gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
+ || (gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
+ || (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1))
+ {
+ return;
+ }
+ uint gridHash = bt3DGrid_calcGridHash(gridPos);
+ // get start of bucket for this cell
+ uint bucketStart = pCellStart[gridHash];
+ if (bucketStart == 0xffffffff)
+ {
+ return; // cell empty
+ }
+ // iterate over bodies in this cell
+ uint2 sortedData = pHash[index];
+ uint unsorted_indx = sortedData.y;
+ bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+ bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+ uint handleIndex = min0.uw;
+ uint2 start_curr = pPairBuffStartCurr[handleIndex];
+ uint start = start_curr.x;
+ uint curr = start_curr.y;
+ uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+ uint curr_max = start_curr_next.x - start - 1;
+ uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
+ bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
+ for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
+ {
+ uint2 cellData = pHash[index2];
+ if (cellData.x != gridHash)
+ {
+ break; // no longer in same bucket
+ }
+ uint unsorted_indx2 = cellData.y;
+ if (unsorted_indx2 < unsorted_indx) // check not colliding with self
+ {
+ bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
+ bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
+ if(cudaTestAABBOverlap(min0, max0, min1, max1))
+ {
+ uint handleIndex2 = min1.uw;
+ uint k;
+ for(k = 0; k < curr; k++)
+ {
+ uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+ if(old_pair == handleIndex2)
+ {
+ pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+ break;
+ }
+ }
+ if(k == curr)
+ {
+ if(curr >= curr_max)
+ { // not a good solution, but let's avoid crash
+ break;
+ }
+ pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+ curr++;
+ }
+ }
+ }
+ }
+ pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+ return;
+} // findPairsInCell()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
+ uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ uint2 sortedData = pHash[index];
+ uint unsorted_indx = sortedData.y;
+ bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+ bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+ float4 pos;
+ pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+ pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+ pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+ // get address in grid
+ int3 gridPos = bt3DGrid_calcGridPos(pos);
+ // examine only neighbouring cells
+ for(int z=-1; z<=1; z++) {
+ for(int y=-1; y<=1; y++) {
+ for(int x=-1; x<=1; x++) {
+ findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
+ }
+ }
+ }
+} // findOverlappingPairsD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
+ uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ uint2 sortedData = pHash[index];
+ uint unsorted_indx = sortedData.y;
+ bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+ bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+ uint handleIndex = min0.uw;
+ uint2 start_curr = pPairBuffStartCurr[handleIndex];
+ uint start = start_curr.x;
+ uint curr = start_curr.y;
+ uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+ uint curr_max = start_curr_next.x - start - 1;
+ for(uint i = 0; i < numLarge; i++)
+ {
+ uint indx2 = numBodies + i;
+ bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
+ bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
+ if(cudaTestAABBOverlap(min0, max0, min1, max1))
+ {
+ uint k;
+ uint handleIndex2 = min1.uw;
+ for(k = 0; k < curr; k++)
+ {
+ uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+ if(old_pair == handleIndex2)
+ {
+ pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+ break;
+ }
+ }
+ if(k == curr)
+ {
+ pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+ if(curr >= curr_max)
+ { // not a good solution, but let's avoid crash
+ break;
+ }
+ curr++;
+ }
+ }
+ }
+ pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+ return;
+} // findPairsLargeD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
+ uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ bt3DGrid3F1U bbMin = pAABB[index * 2];
+ uint handleIndex = bbMin.uw;
+ uint2 start_curr = pPairBuffStartCurr[handleIndex];
+ uint start = start_curr.x;
+ uint curr = start_curr.y;
+ uint *pInp = pPairBuff + start;
+ uint num_changes = 0;
+ for(uint k = 0; k < curr; k++, pInp++)
+ {
+ if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+ {
+ num_changes++;
+ }
+ }
+ pPairScan[index+1] = num_changes;
+} // computePairCacheChangesD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
+ uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+ int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+ if(index >= (int)numBodies)
+ {
+ return;
+ }
+ bt3DGrid3F1U bbMin = pAABB[index * 2];
+ uint handleIndex = bbMin.uw;
+ uint2 start_curr = pPairBuffStartCurr[handleIndex];
+ uint start = start_curr.x;
+ uint curr = start_curr.y;
+ uint* pInp = pPairBuff + start;
+ uint* pOut = pPairOut + pPairScan[index];
+ uint* pOut2 = pInp;
+ uint num = 0;
+ for(uint k = 0; k < curr; k++, pInp++)
+ {
+ if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+ {
+ *pOut = *pInp;
+ pOut++;
+ }
+ if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
+ {
+ *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
+ pOut2++;
+ num++;
+ }
+ }
+ pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
+} // squeezeOverlappingPairBuffD()
+
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+// E N D O F K E R N E L F U N C T I O N S
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
+{
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+ // execute the kernel
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
+ // check if kernel invocation generated an error
+ BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
+} // calcHashAABB()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
+{
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+ BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
+ BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
+} // findCellStart()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
+{
+#if B_CUDA_USE_TEX
+ BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
+ BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
+#if B_CUDA_USE_TEX
+ BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findOverlappingPairs()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
+{
+#if B_CUDA_USE_TEX
+ BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
+ BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
+#if B_CUDA_USE_TEX
+ BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findPairsLarge()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
+ BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
+} // computePairCacheChanges()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+ int numThreads, numBlocks;
+ BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+ BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
+ BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
+} // btCuda_squeezeOverlappingPairBuff()
+
+//------------------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
new file mode 100644
index 00000000000..607bda7edfd
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
@@ -0,0 +1,61 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// Keep this file free from Bullet headers
+// it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
+
+void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
+
+void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies);
+
+void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
+
+void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
new file mode 100644
index 00000000000..616a40094ca
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
@@ -0,0 +1,67 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// Keep this file free from Bullet headers
+// it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
+//----------------------------------------------------------------------------------------
+
+#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
+#define BT_3DGRID_PAIR_NEW_FLG (0x20000000)
+#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGridBroadphaseParams
+{
+ unsigned int m_gridSizeX;
+ unsigned int m_gridSizeY;
+ unsigned int m_gridSizeZ;
+ unsigned int m_numCells;
+ float m_worldOriginX;
+ float m_worldOriginY;
+ float m_worldOriginZ;
+ float m_cellSizeX;
+ float m_cellSizeY;
+ float m_cellSizeZ;
+ unsigned int m_numBodies;
+ unsigned int m_maxBodiesPerCell;
+};
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGrid3F1U
+{
+ float fx;
+ float fy;
+ float fz;
+ unsigned int uw;
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuDefines.h b/extern/bullet2/BulletMultiThreaded/btGpuDefines.h
new file mode 100644
index 00000000000..f9315ab6496
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpuDefines.h
@@ -0,0 +1,211 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+// definitions for "GPU on CPU" code
+
+
+#ifndef BT_GPU_DEFINES_H
+#define BT_GPU_DEFINES_H
+
+typedef unsigned int uint;
+
+struct int2
+{
+ int x, y;
+};
+
+struct uint2
+{
+ unsigned int x, y;
+};
+
+struct int3
+{
+ int x, y, z;
+};
+
+struct uint3
+{
+ unsigned int x, y, z;
+};
+
+struct float4
+{
+ float x, y, z, w;
+};
+
+struct float3
+{
+ float x, y, z;
+};
+
+
+#define BT_GPU___device__ inline
+#define BT_GPU___devdata__
+#define BT_GPU___constant__
+#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
+#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
+#define BT_GPU_params s3DGridBroadphaseParams
+#define BT_GPU___mul24(a, b) ((a)*(b))
+#define BT_GPU___global__ inline
+#define BT_GPU___shared__ static
+#define BT_GPU___syncthreads()
+#define CUDART_PI_F SIMD_PI
+
+static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
+{
+ uint2 t; t.x = x; t.y = y; return t;
+}
+#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
+
+static inline int3 bt3dGrid_make_int3(int x, int y, int z)
+{
+ int3 t; t.x = x; t.y = y; t.z = z; return t;
+}
+#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
+
+static inline float3 bt3dGrid_make_float3(float x, float y, float z)
+{
+ float3 t; t.x = x; t.y = y; t.z = z; return t;
+}
+#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
+
+static inline float3 bt3dGrid_make_float34(float4 f)
+{
+ float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
+}
+#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
+
+static inline float3 bt3dGrid_make_float31(float f)
+{
+ float3 t; t.x = t.y = t.z = f; return t;
+}
+#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
+
+static inline float4 bt3dGrid_make_float42(float3 v, float f)
+{
+ float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
+}
+#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b)
+
+static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
+{
+ float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
+}
+#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d)
+
+inline int3 operator+(int3 a, int3 b)
+{
+ return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
+inline float4 operator+(const float4& a, const float4& b)
+{
+ float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
+}
+inline float4 operator*(const float4& a, float fact)
+{
+ float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
+}
+inline float4 operator*(float fact, float4& a)
+{
+ return (a * fact);
+}
+inline float4& operator*=(float4& a, float fact)
+{
+ a = fact * a;
+ return a;
+}
+inline float4& operator+=(float4& a, const float4& b)
+{
+ a = a + b;
+ return a;
+}
+
+inline float3 operator+(const float3& a, const float3& b)
+{
+ float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
+}
+inline float3 operator-(const float3& a, const float3& b)
+{
+ float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
+}
+static inline float bt3dGrid_dot(float3& a, float3& b)
+{
+ return a.x*b.x+a.y*b.y+a.z*b.z;
+}
+#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
+
+static inline float bt3dGrid_dot4(float4& a, float4& b)
+{
+ return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
+}
+#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
+
+static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
+{
+ float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r;
+}
+#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
+
+
+inline float3 operator*(const float3& a, float fact)
+{
+ float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
+}
+
+
+inline float3& operator+=(float3& a, const float3& b)
+{
+ a = a + b;
+ return a;
+}
+inline float3& operator-=(float3& a, const float3& b)
+{
+ a = a - b;
+ return a;
+}
+inline float3& operator*=(float3& a, float fact)
+{
+ a = a * fact;
+ return a;
+}
+inline float3 operator-(const float3& v)
+{
+ float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
+}
+
+
+#define BT_GPU_FETCH(a, b) a[b]
+#define BT_GPU_FETCH4(a, b) a[b]
+#define BT_GPU_PREF(func) btGpu_##func
+#define BT_GPU_SAFE_CALL(func) func
+#define BT_GPU_Memset memset
+#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
+#define BT_GPU_BindTexture(a, b, c, d)
+#define BT_GPU_UnbindTexture(a)
+
+static uint2 s_blockIdx, s_blockDim, s_threadIdx;
+#define BT_GPU_blockIdx s_blockIdx
+#define BT_GPU_blockDim s_blockDim
+#define BT_GPU_threadIdx s_threadIdx
+#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
+
+#define BT_GPU_CHECK_ERROR(s)
+
+
+#endif //BT_GPU_DEFINES_H
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h
new file mode 100644
index 00000000000..5761e7901ee
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h
@@ -0,0 +1,55 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared code for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// Keep this file free from Bullet headers
+// will be compiled by both CPU and CUDA compilers
+// file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpuUtilsSharedDefs.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b)
+{
+ return (a % b != 0) ? (a / b + 1) : (a / b);
+} // iDivUp()
+
+//----------------------------------------------------------------------------------------
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
+{
+ numThreads = BT_GPU_min(blockSize, n);
+ numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
+} // computeGridSize()
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h
new file mode 100644
index 00000000000..dccfda54cbc
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h
@@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+// Shared definitions for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// Keep this file free from Bullet headers
+// it is included into both CUDA and CPU code
+// file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+#ifndef BTGPUUTILSDHAREDDEFS_H
+#define BTGPUUTILSDHAREDDEFS_H
+
+
+extern "C"
+{
+
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b);
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
+
+void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
+void BT_GPU_PREF(freeArray)(void* devPtr);
+void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
+void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
+void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
+void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
+void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
+
+
+} // extern "C"
+
+
+#endif // BTGPUUTILSDHAREDDEFS_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp
new file mode 100644
index 00000000000..84774b22706
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp
@@ -0,0 +1,74 @@
+/*
+ Copyright (C) 2010 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#include "btParallelConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+btParallelConstraintSolver::btParallelConstraintSolver()
+{
+
+ //initialize MiniCL here
+
+}
+
+btParallelConstraintSolver::~btParallelConstraintSolver()
+{
+ //exit MiniCL
+
+}
+
+
+btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+ {
+ int i;
+ btPersistentManifold* manifold = 0;
+// btCollisionObject* colObj0=0,*colObj1=0;
+
+
+ for (i=0;i<numManifolds;i++)
+ {
+ manifold = manifoldPtr[i];
+ convertContact(manifold,infoGlobal);
+ }
+
+ }
+
+ btContactSolverInfo info = infoGlobal;
+
+
+
+ int numConstraintPool = m_tmpSolverContactConstraintPool.size();
+ int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
+
+ ///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
+ m_orderTmpConstraintPool.resize(numConstraintPool);
+ m_orderFrictionConstraintPool.resize(numFrictionPool);
+ {
+ int i;
+ for (i=0;i<numConstraintPool;i++)
+ {
+ m_orderTmpConstraintPool[i] = i;
+ }
+ for (i=0;i<numFrictionPool;i++)
+ {
+ m_orderFrictionConstraintPool[i] = i;
+ }
+ }
+
+ return 0.f;
+}
+
diff --git a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h
new file mode 100644
index 00000000000..c347f96f5a0
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (C) 2010 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
+#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+
+class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
+{
+protected:
+
+public:
+
+ btParallelConstraintSolver();
+
+ virtual ~btParallelConstraintSolver();
+
+ //virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
+
+ btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+
+
+};
+
+
+
+#endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H \ No newline at end of file
diff --git a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp
new file mode 100644
index 00000000000..8192aa4684a
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp
@@ -0,0 +1,22 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btThreadSupportInterface.h"
+
+btThreadSupportInterface::~btThreadSupportInterface()
+{
+
+}
+
diff --git a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h
new file mode 100644
index 00000000000..730ffa9ea0b
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h
@@ -0,0 +1,50 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef THREAD_SUPPORT_INTERFACE_H
+#define THREAD_SUPPORT_INTERFACE_H
+
+
+//#include <LinearMath/btScalar.h> //for uint32_t etc.
+#include "PlatformDefinitions.h"
+#include "PpuAddressSpace.h"
+
+class btThreadSupportInterface
+{
+public:
+
+ virtual ~btThreadSupportInterface();
+
+///send messages to SPUs
+ virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1) =0;
+
+///check for messages from SPUs
+ virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0;
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+ virtual void startSPU() =0;
+
+///tell the task scheduler we are done with the SPU tasks
+ virtual void stopSPU()=0;
+
+ ///tell the task scheduler to use no more than numTasks tasks
+ virtual void setNumTasks(int numTasks)=0;
+
+ virtual int getNumTasks() const = 0;
+
+};
+
+#endif //THREAD_SUPPORT_INTERFACE_H
+
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
new file mode 100644
index 00000000000..c5eeeebd7a1
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
@@ -0,0 +1,225 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+private:
+ unsigned int mData;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline boolInVec( ) { };
+
+ // Construct from a value converted from float
+ //
+ inline boolInVec(floatInVec vec);
+
+ // Explicit cast from bool
+ //
+ explicit inline boolInVec(bool scalar);
+
+ // Explicit cast to bool
+ //
+ inline bool getAsBool() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+ // Implicit cast to bool
+ //
+ inline operator bool() const;
+#endif
+
+ // Boolean negation operator
+ //
+ inline const boolInVec operator ! () const;
+
+ // Assignment operator
+ //
+ inline boolInVec& operator = (boolInVec vec);
+
+ // Boolean and assignment operator
+ //
+ inline boolInVec& operator &= (boolInVec vec);
+
+ // Boolean exclusive or assignment operator
+ //
+ inline boolInVec& operator ^= (boolInVec vec);
+
+ // Boolean or assignment operator
+ //
+ inline boolInVec& operator |= (boolInVec vec);
+
+};
+
+// Equal operator
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+
+// And operator
+//
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+
+// Exclusive or operator
+//
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+
+// Or operator
+//
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// Conditionally select between two values
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+ *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+ mData = -(int)scalar;
+}
+
+inline
+bool
+boolInVec::getAsBool() const
+{
+ return (mData > 0);
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+boolInVec::operator bool() const
+{
+ return getAsBool();
+}
+#endif
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+ return boolInVec(!mData);
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+ mData = vec.mData;
+ return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+ *this = *this & vec;
+ return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+ *this = *this ^ vec;
+ return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+ *this = *this | vec;
+ return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+ return boolInVec(vec0.getAsBool() == vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+ return !(vec0 == vec1);
+}
+
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+ return boolInVec(vec0.getAsBool() & vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+ return boolInVec(vec0.getAsBool() | vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+ return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+ return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
new file mode 100644
index 00000000000..12d89e43d3e
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
@@ -0,0 +1,343 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+// A class representing a scalar float value contained in a vector register
+// This class does not support fastmath
+class floatInVec
+{
+private:
+ float mData;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline floatInVec( ) { };
+
+ // Construct from a value converted from bool
+ //
+ inline floatInVec(boolInVec vec);
+
+ // Explicit cast from float
+ //
+ explicit inline floatInVec(float scalar);
+
+ // Explicit cast to float
+ //
+ inline float getAsFloat() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+ // Implicit cast to float
+ //
+ inline operator float() const;
+#endif
+
+ // Post increment (add 1.0f)
+ //
+ inline const floatInVec operator ++ (int);
+
+ // Post decrement (subtract 1.0f)
+ //
+ inline const floatInVec operator -- (int);
+
+ // Pre increment (add 1.0f)
+ //
+ inline floatInVec& operator ++ ();
+
+ // Pre decrement (subtract 1.0f)
+ //
+ inline floatInVec& operator -- ();
+
+ // Negation operator
+ //
+ inline const floatInVec operator - () const;
+
+ // Assignment operator
+ //
+ inline floatInVec& operator = (floatInVec vec);
+
+ // Multiplication assignment operator
+ //
+ inline floatInVec& operator *= (floatInVec vec);
+
+ // Division assignment operator
+ //
+ inline floatInVec& operator /= (floatInVec vec);
+
+ // Addition assignment operator
+ //
+ inline floatInVec& operator += (floatInVec vec);
+
+ // Subtraction assignment operator
+ //
+ inline floatInVec& operator -= (floatInVec vec);
+
+};
+
+// Multiplication operator
+//
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+
+// Division operator
+//
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+
+// Addition operator
+//
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+
+// Subtraction operator
+//
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+
+// Less than operator
+//
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+
+// Less than or equal operator
+//
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+
+// Greater than operator
+//
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+
+// Greater than or equal operator
+//
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+
+// Equal operator
+//
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// Conditionally select between two values
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+ mData = float(vec.getAsBool());
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+ mData = scalar;
+}
+
+inline
+float
+floatInVec::getAsFloat() const
+{
+ return mData;
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+floatInVec::operator float() const
+{
+ return getAsFloat();
+}
+#endif
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+ float olddata = mData;
+ operator ++();
+ return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+ float olddata = mData;
+ operator --();
+ return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+ *this += floatInVec(1.0f);
+ return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+ *this -= floatInVec(1.0f);
+ return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+ return floatInVec(-mData);
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+ mData = vec.mData;
+ return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+ *this = *this * vec;
+ return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+ *this = *this / vec;
+ return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+ *this = *this + vec;
+ return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+ *this = *this - vec;
+ return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+ return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+ return floatInVec(num.getAsFloat() / den.getAsFloat());
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+ return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+ return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+ return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+ return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+ return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+ return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+ return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+ return !(vec0 == vec1);
+}
+
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+ return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
new file mode 100644
index 00000000000..e103243d1e0
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
@@ -0,0 +1,1630 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+ mCol0 = mat.mCol0;
+ mCol1 = mat.mCol1;
+ mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+ mCol0 = Vector3( scalar );
+ mCol1 = Vector3( scalar );
+ mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+ float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+ qx = unitQuat.getX();
+ qy = unitQuat.getY();
+ qz = unitQuat.getZ();
+ qw = unitQuat.getW();
+ qx2 = ( qx + qx );
+ qy2 = ( qy + qy );
+ qz2 = ( qz + qz );
+ qxqx2 = ( qx * qx2 );
+ qxqy2 = ( qx * qy2 );
+ qxqz2 = ( qx * qz2 );
+ qxqw2 = ( qw * qx2 );
+ qyqy2 = ( qy * qy2 );
+ qyqz2 = ( qy * qz2 );
+ qyqw2 = ( qw * qy2 );
+ qzqz2 = ( qz * qz2 );
+ qzqw2 = ( qw * qz2 );
+ mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+ mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+ mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+ mCol0 = _col0;
+ mCol1 = _col1;
+ mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+ mCol0 = _col0;
+ return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+ mCol1 = _col1;
+ return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+ mCol2 = _col2;
+ return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+ *(&mCol0 + col) = vec;
+ return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+ mCol0.setElem( row, vec.getElem( 0 ) );
+ mCol1.setElem( row, vec.getElem( 1 ) );
+ mCol2.setElem( row, vec.getElem( 2 ) );
+ return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+ Vector3 tmpV3_0;
+ tmpV3_0 = this->getCol( col );
+ tmpV3_0.setElem( row, val );
+ this->setCol( col, tmpV3_0 );
+ return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+ return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+ return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+ return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+ return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+ return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+ mCol0 = mat.mCol0;
+ mCol1 = mat.mCol1;
+ mCol2 = mat.mCol2;
+ return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+ return Matrix3(
+ Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+ Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+ Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+ );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+ Vector3 tmp0, tmp1, tmp2;
+ float detinv;
+ tmp0 = cross( mat.getCol1(), mat.getCol2() );
+ tmp1 = cross( mat.getCol2(), mat.getCol0() );
+ tmp2 = cross( mat.getCol0(), mat.getCol1() );
+ detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
+ return Matrix3(
+ Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
+ Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
+ Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
+ );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+ return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+ return Matrix3(
+ ( mCol0 + mat.mCol0 ),
+ ( mCol1 + mat.mCol1 ),
+ ( mCol2 + mat.mCol2 )
+ );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+ return Matrix3(
+ ( mCol0 - mat.mCol0 ),
+ ( mCol1 - mat.mCol1 ),
+ ( mCol2 - mat.mCol2 )
+ );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+ *this = *this + mat;
+ return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+ *this = *this - mat;
+ return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+ return Matrix3(
+ ( -mCol0 ),
+ ( -mCol1 ),
+ ( -mCol2 )
+ );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+ return Matrix3(
+ absPerElem( mat.getCol0() ),
+ absPerElem( mat.getCol1() ),
+ absPerElem( mat.getCol2() )
+ );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+ return Matrix3(
+ ( mCol0 * scalar ),
+ ( mCol1 * scalar ),
+ ( mCol2 * scalar )
+ );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+ *this = *this * scalar;
+ return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+ return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+ return Vector3(
+ ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+ ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+ ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+ );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+ return Matrix3(
+ ( *this * mat.mCol0 ),
+ ( *this * mat.mCol1 ),
+ ( *this * mat.mCol2 )
+ );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+ *this = *this * mat;
+ return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+ return Matrix3(
+ mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+ mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+ mulPerElem( mat0.getCol2(), mat1.getCol2() )
+ );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+ return Matrix3(
+ Vector3::xAxis( ),
+ Vector3::yAxis( ),
+ Vector3::zAxis( )
+ );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix3(
+ Vector3::xAxis( ),
+ Vector3( 0.0f, c, s ),
+ Vector3( 0.0f, -s, c )
+ );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix3(
+ Vector3( c, 0.0f, -s ),
+ Vector3::yAxis( ),
+ Vector3( s, 0.0f, c )
+ );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix3(
+ Vector3( c, s, 0.0f ),
+ Vector3( -s, c, 0.0f ),
+ Vector3::zAxis( )
+ );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+ float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+ sX = sinf( radiansXYZ.getX() );
+ cX = cosf( radiansXYZ.getX() );
+ sY = sinf( radiansXYZ.getY() );
+ cY = cosf( radiansXYZ.getY() );
+ sZ = sinf( radiansXYZ.getZ() );
+ cZ = cosf( radiansXYZ.getZ() );
+ tmp0 = ( cZ * sY );
+ tmp1 = ( sZ * sY );
+ return Matrix3(
+ Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+ Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+ Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
+ );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
+{
+ float x, y, z, s, c, oneMinusC, xy, yz, zx;
+ s = sinf( radians );
+ c = cosf( radians );
+ x = unitVec.getX();
+ y = unitVec.getY();
+ z = unitVec.getZ();
+ xy = ( x * y );
+ yz = ( y * z );
+ zx = ( z * x );
+ oneMinusC = ( 1.0f - c );
+ return Matrix3(
+ Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
+ Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
+ Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
+ );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+ return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+ return Matrix3(
+ Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+ Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+ Vector3( 0.0f, 0.0f, scaleVec.getZ() )
+ );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+ return Matrix3(
+ ( mat.getCol0() * scaleVec.getX( ) ),
+ ( mat.getCol1() * scaleVec.getY( ) ),
+ ( mat.getCol2() * scaleVec.getZ( ) )
+ );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+ return Matrix3(
+ mulPerElem( mat.getCol0(), scaleVec ),
+ mulPerElem( mat.getCol1(), scaleVec ),
+ mulPerElem( mat.getCol2(), scaleVec )
+ );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+ return Matrix3(
+ select( mat0.getCol0(), mat1.getCol0(), select1 ),
+ select( mat0.getCol1(), mat1.getCol1(), select1 ),
+ select( mat0.getCol2(), mat1.getCol2(), select1 )
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+ print( mat.getRow( 0 ) );
+ print( mat.getRow( 1 ) );
+ print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+ printf("%s:\n", name);
+ print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+ mCol0 = mat.mCol0;
+ mCol1 = mat.mCol1;
+ mCol2 = mat.mCol2;
+ mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+ mCol0 = Vector4( scalar );
+ mCol1 = Vector4( scalar );
+ mCol2 = Vector4( scalar );
+ mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+ mCol0 = Vector4( mat.getCol0(), 0.0f );
+ mCol1 = Vector4( mat.getCol1(), 0.0f );
+ mCol2 = Vector4( mat.getCol2(), 0.0f );
+ mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+ mCol0 = _col0;
+ mCol1 = _col1;
+ mCol2 = _col2;
+ mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+ mCol0 = Vector4( mat.getCol0(), 0.0f );
+ mCol1 = Vector4( mat.getCol1(), 0.0f );
+ mCol2 = Vector4( mat.getCol2(), 0.0f );
+ mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+ Matrix3 mat;
+ mat = Matrix3( unitQuat );
+ mCol0 = Vector4( mat.getCol0(), 0.0f );
+ mCol1 = Vector4( mat.getCol1(), 0.0f );
+ mCol2 = Vector4( mat.getCol2(), 0.0f );
+ mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+ mCol0 = _col0;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+ mCol1 = _col1;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+ mCol2 = _col2;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+ mCol3 = _col3;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+ *(&mCol0 + col) = vec;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+ mCol0.setElem( row, vec.getElem( 0 ) );
+ mCol1.setElem( row, vec.getElem( 1 ) );
+ mCol2.setElem( row, vec.getElem( 2 ) );
+ mCol3.setElem( row, vec.getElem( 3 ) );
+ return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+ Vector4 tmpV3_0;
+ tmpV3_0 = this->getCol( col );
+ tmpV3_0.setElem( row, val );
+ this->setCol( col, tmpV3_0 );
+ return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+ return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+ return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+ return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+ return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+ return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+ return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+ mCol0 = mat.mCol0;
+ mCol1 = mat.mCol1;
+ mCol2 = mat.mCol2;
+ mCol3 = mat.mCol3;
+ return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+ return Matrix4(
+ Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+ Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+ Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+ Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+ );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+ Vector4 res0, res1, res2, res3;
+ float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+ mA = mat.getCol0().getX();
+ mB = mat.getCol0().getY();
+ mC = mat.getCol0().getZ();
+ mD = mat.getCol0().getW();
+ mE = mat.getCol1().getX();
+ mF = mat.getCol1().getY();
+ mG = mat.getCol1().getZ();
+ mH = mat.getCol1().getW();
+ mI = mat.getCol2().getX();
+ mJ = mat.getCol2().getY();
+ mK = mat.getCol2().getZ();
+ mL = mat.getCol2().getW();
+ mM = mat.getCol3().getX();
+ mN = mat.getCol3().getY();
+ mO = mat.getCol3().getZ();
+ mP = mat.getCol3().getW();
+ tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+ tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+ tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+ tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+ tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+ tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+ res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+ res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+ res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+ res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+ detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
+ res1.setX( ( mI * tmp1 ) );
+ res1.setY( ( mM * tmp0 ) );
+ res1.setZ( ( mA * tmp1 ) );
+ res1.setW( ( mE * tmp0 ) );
+ res3.setX( ( mI * tmp3 ) );
+ res3.setY( ( mM * tmp2 ) );
+ res3.setZ( ( mA * tmp3 ) );
+ res3.setW( ( mE * tmp2 ) );
+ res2.setX( ( mI * tmp5 ) );
+ res2.setY( ( mM * tmp4 ) );
+ res2.setZ( ( mA * tmp5 ) );
+ res2.setW( ( mE * tmp4 ) );
+ tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+ tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+ tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+ tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+ tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+ tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+ res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
+ res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
+ res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
+ res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
+ res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
+ res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
+ res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
+ res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
+ res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
+ res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
+ res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
+ res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
+ return Matrix4(
+ ( res0 * detInv ),
+ ( res1 * detInv ),
+ ( res2 * detInv ),
+ ( res3 * detInv )
+ );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+ Transform3 affineMat;
+ affineMat.setCol0( mat.getCol0().getXYZ( ) );
+ affineMat.setCol1( mat.getCol1().getXYZ( ) );
+ affineMat.setCol2( mat.getCol2().getXYZ( ) );
+ affineMat.setCol3( mat.getCol3().getXYZ( ) );
+ return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+ Transform3 affineMat;
+ affineMat.setCol0( mat.getCol0().getXYZ( ) );
+ affineMat.setCol1( mat.getCol1().getXYZ( ) );
+ affineMat.setCol2( mat.getCol2().getXYZ( ) );
+ affineMat.setCol3( mat.getCol3().getXYZ( ) );
+ return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+ float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+ mA = mat.getCol0().getX();
+ mB = mat.getCol0().getY();
+ mC = mat.getCol0().getZ();
+ mD = mat.getCol0().getW();
+ mE = mat.getCol1().getX();
+ mF = mat.getCol1().getY();
+ mG = mat.getCol1().getZ();
+ mH = mat.getCol1().getW();
+ mI = mat.getCol2().getX();
+ mJ = mat.getCol2().getY();
+ mK = mat.getCol2().getZ();
+ mL = mat.getCol2().getW();
+ mM = mat.getCol3().getX();
+ mN = mat.getCol3().getY();
+ mO = mat.getCol3().getZ();
+ mP = mat.getCol3().getW();
+ tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+ tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+ tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+ tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+ tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+ tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+ dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+ dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+ dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+ dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+ return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+ return Matrix4(
+ ( mCol0 + mat.mCol0 ),
+ ( mCol1 + mat.mCol1 ),
+ ( mCol2 + mat.mCol2 ),
+ ( mCol3 + mat.mCol3 )
+ );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+ return Matrix4(
+ ( mCol0 - mat.mCol0 ),
+ ( mCol1 - mat.mCol1 ),
+ ( mCol2 - mat.mCol2 ),
+ ( mCol3 - mat.mCol3 )
+ );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+ *this = *this + mat;
+ return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+ *this = *this - mat;
+ return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+ return Matrix4(
+ ( -mCol0 ),
+ ( -mCol1 ),
+ ( -mCol2 ),
+ ( -mCol3 )
+ );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+ return Matrix4(
+ absPerElem( mat.getCol0() ),
+ absPerElem( mat.getCol1() ),
+ absPerElem( mat.getCol2() ),
+ absPerElem( mat.getCol3() )
+ );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+ return Matrix4(
+ ( mCol0 * scalar ),
+ ( mCol1 * scalar ),
+ ( mCol2 * scalar ),
+ ( mCol3 * scalar )
+ );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+ *this = *this * scalar;
+ return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+ return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+ return Vector4(
+ ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
+ ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
+ ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
+ ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
+ );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+ return Vector4(
+ ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+ ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+ ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
+ ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
+ );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+ return Vector4(
+ ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+ ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+ ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
+ ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
+ );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+ return Matrix4(
+ ( *this * mat.mCol0 ),
+ ( *this * mat.mCol1 ),
+ ( *this * mat.mCol2 ),
+ ( *this * mat.mCol3 )
+ );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+ *this = *this * mat;
+ return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+ return Matrix4(
+ ( *this * tfrm.getCol0() ),
+ ( *this * tfrm.getCol1() ),
+ ( *this * tfrm.getCol2() ),
+ ( *this * Point3( tfrm.getCol3() ) )
+ );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+ *this = *this * tfrm;
+ return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+ return Matrix4(
+ mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+ mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+ mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+ mulPerElem( mat0.getCol3(), mat1.getCol3() )
+ );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+ return Matrix4(
+ Vector4::xAxis( ),
+ Vector4::yAxis( ),
+ Vector4::zAxis( ),
+ Vector4::wAxis( )
+ );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+ mCol0.setXYZ( mat3.getCol0() );
+ mCol1.setXYZ( mat3.getCol1() );
+ mCol2.setXYZ( mat3.getCol2() );
+ return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+ return Matrix3(
+ mCol0.getXYZ( ),
+ mCol1.getXYZ( ),
+ mCol2.getXYZ( )
+ );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+ mCol3.setXYZ( translateVec );
+ return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+ return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix4(
+ Vector4::xAxis( ),
+ Vector4( 0.0f, c, s, 0.0f ),
+ Vector4( 0.0f, -s, c, 0.0f ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix4(
+ Vector4( c, 0.0f, -s, 0.0f ),
+ Vector4::yAxis( ),
+ Vector4( s, 0.0f, c, 0.0f ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Matrix4(
+ Vector4( c, s, 0.0f, 0.0f ),
+ Vector4( -s, c, 0.0f, 0.0f ),
+ Vector4::zAxis( ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+ float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+ sX = sinf( radiansXYZ.getX() );
+ cX = cosf( radiansXYZ.getX() );
+ sY = sinf( radiansXYZ.getY() );
+ cY = cosf( radiansXYZ.getY() );
+ sZ = sinf( radiansXYZ.getZ() );
+ cZ = cosf( radiansXYZ.getZ() );
+ tmp0 = ( cZ * sY );
+ tmp1 = ( sZ * sY );
+ return Matrix4(
+ Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
+ Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
+ Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
+{
+ float x, y, z, s, c, oneMinusC, xy, yz, zx;
+ s = sinf( radians );
+ c = cosf( radians );
+ x = unitVec.getX();
+ y = unitVec.getY();
+ z = unitVec.getZ();
+ xy = ( x * y );
+ yz = ( y * z );
+ zx = ( z * x );
+ oneMinusC = ( 1.0f - c );
+ return Matrix4(
+ Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
+ Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
+ Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+ return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+ return Matrix4(
+ Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
+ Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
+ Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
+ Vector4::wAxis( )
+ );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+ return Matrix4(
+ ( mat.getCol0() * scaleVec.getX( ) ),
+ ( mat.getCol1() * scaleVec.getY( ) ),
+ ( mat.getCol2() * scaleVec.getZ( ) ),
+ mat.getCol3()
+ );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+ Vector4 scale4;
+ scale4 = Vector4( scaleVec, 1.0f );
+ return Matrix4(
+ mulPerElem( mat.getCol0(), scale4 ),
+ mulPerElem( mat.getCol1(), scale4 ),
+ mulPerElem( mat.getCol2(), scale4 ),
+ mulPerElem( mat.getCol3(), scale4 )
+ );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+ return Matrix4(
+ Vector4::xAxis( ),
+ Vector4::yAxis( ),
+ Vector4::zAxis( ),
+ Vector4( translateVec, 1.0f )
+ );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+ Matrix4 m4EyeFrame;
+ Vector3 v3X, v3Y, v3Z;
+ v3Y = normalize( upVec );
+ v3Z = normalize( ( eyePos - lookAtPos ) );
+ v3X = normalize( cross( v3Y, v3Z ) );
+ v3Y = cross( v3Z, v3X );
+ m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+ return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+ float f, rangeInv;
+ f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+ rangeInv = ( 1.0f / ( zNear - zFar ) );
+ return Matrix4(
+ Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
+ Vector4( 0.0f, f, 0.0f, 0.0f ),
+ Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
+ Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
+ );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+ float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+ sum_rl = ( right + left );
+ sum_tb = ( top + bottom );
+ sum_nf = ( zNear + zFar );
+ inv_rl = ( 1.0f / ( right - left ) );
+ inv_tb = ( 1.0f / ( top - bottom ) );
+ inv_nf = ( 1.0f / ( zNear - zFar ) );
+ n2 = ( zNear + zNear );
+ return Matrix4(
+ Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
+ Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
+ Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
+ Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
+ );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+ float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+ sum_rl = ( right + left );
+ sum_tb = ( top + bottom );
+ sum_nf = ( zNear + zFar );
+ inv_rl = ( 1.0f / ( right - left ) );
+ inv_tb = ( 1.0f / ( top - bottom ) );
+ inv_nf = ( 1.0f / ( zNear - zFar ) );
+ return Matrix4(
+ Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
+ Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
+ Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
+ Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
+ );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+ return Matrix4(
+ select( mat0.getCol0(), mat1.getCol0(), select1 ),
+ select( mat0.getCol1(), mat1.getCol1(), select1 ),
+ select( mat0.getCol2(), mat1.getCol2(), select1 ),
+ select( mat0.getCol3(), mat1.getCol3(), select1 )
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+ print( mat.getRow( 0 ) );
+ print( mat.getRow( 1 ) );
+ print( mat.getRow( 2 ) );
+ print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+ printf("%s:\n", name);
+ print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+ mCol0 = tfrm.mCol0;
+ mCol1 = tfrm.mCol1;
+ mCol2 = tfrm.mCol2;
+ mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+ mCol0 = Vector3( scalar );
+ mCol1 = Vector3( scalar );
+ mCol2 = Vector3( scalar );
+ mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+ mCol0 = _col0;
+ mCol1 = _col1;
+ mCol2 = _col2;
+ mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+ this->setUpper3x3( tfrm );
+ this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+ this->setUpper3x3( Matrix3( unitQuat ) );
+ this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+ mCol0 = _col0;
+ return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+ mCol1 = _col1;
+ return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+ mCol2 = _col2;
+ return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+ mCol3 = _col3;
+ return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+ *(&mCol0 + col) = vec;
+ return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+ mCol0.setElem( row, vec.getElem( 0 ) );
+ mCol1.setElem( row, vec.getElem( 1 ) );
+ mCol2.setElem( row, vec.getElem( 2 ) );
+ mCol3.setElem( row, vec.getElem( 3 ) );
+ return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+ Vector3 tmpV3_0;
+ tmpV3_0 = this->getCol( col );
+ tmpV3_0.setElem( row, val );
+ this->setCol( col, tmpV3_0 );
+ return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+ return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+ return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+ return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+ return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+ return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+ return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+ return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+ return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+ mCol0 = tfrm.mCol0;
+ mCol1 = tfrm.mCol1;
+ mCol2 = tfrm.mCol2;
+ mCol3 = tfrm.mCol3;
+ return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+ Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+ float detinv;
+ tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+ tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+ tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+ detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
+ inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
+ inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
+ inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
+ return Transform3(
+ inv0,
+ inv1,
+ inv2,
+ Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+ );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+ Vector3 inv0, inv1, inv2;
+ inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+ inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+ inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+ return Transform3(
+ inv0,
+ inv1,
+ inv2,
+ Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+ );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+ return Transform3(
+ absPerElem( tfrm.getCol0() ),
+ absPerElem( tfrm.getCol1() ),
+ absPerElem( tfrm.getCol2() ),
+ absPerElem( tfrm.getCol3() )
+ );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+ return Vector3(
+ ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+ ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+ ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+ );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+ return Point3(
+ ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+ ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+ ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
+ );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+ return Transform3(
+ ( *this * tfrm.mCol0 ),
+ ( *this * tfrm.mCol1 ),
+ ( *this * tfrm.mCol2 ),
+ Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+ );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+ *this = *this * tfrm;
+ return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+ return Transform3(
+ mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+ mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+ mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+ mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+ );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+ return Transform3(
+ Vector3::xAxis( ),
+ Vector3::yAxis( ),
+ Vector3::zAxis( ),
+ Vector3( 0.0f )
+ );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+ mCol0 = tfrm.getCol0();
+ mCol1 = tfrm.getCol1();
+ mCol2 = tfrm.getCol2();
+ return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+ return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+ mCol3 = translateVec;
+ return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+ return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Transform3(
+ Vector3::xAxis( ),
+ Vector3( 0.0f, c, s ),
+ Vector3( 0.0f, -s, c ),
+ Vector3( 0.0f )
+ );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Transform3(
+ Vector3( c, 0.0f, -s ),
+ Vector3::yAxis( ),
+ Vector3( s, 0.0f, c ),
+ Vector3( 0.0f )
+ );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+ float s, c;
+ s = sinf( radians );
+ c = cosf( radians );
+ return Transform3(
+ Vector3( c, s, 0.0f ),
+ Vector3( -s, c, 0.0f ),
+ Vector3::zAxis( ),
+ Vector3( 0.0f )
+ );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+ float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+ sX = sinf( radiansXYZ.getX() );
+ cX = cosf( radiansXYZ.getX() );
+ sY = sinf( radiansXYZ.getY() );
+ cY = cosf( radiansXYZ.getY() );
+ sZ = sinf( radiansXYZ.getZ() );
+ cZ = cosf( radiansXYZ.getZ() );
+ tmp0 = ( cZ * sY );
+ tmp1 = ( sZ * sY );
+ return Transform3(
+ Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+ Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+ Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
+ Vector3( 0.0f )
+ );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
+{
+ return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+ return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+ return Transform3(
+ Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+ Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+ Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
+ Vector3( 0.0f )
+ );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+ return Transform3(
+ ( tfrm.getCol0() * scaleVec.getX( ) ),
+ ( tfrm.getCol1() * scaleVec.getY( ) ),
+ ( tfrm.getCol2() * scaleVec.getZ( ) ),
+ tfrm.getCol3()
+ );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+ return Transform3(
+ mulPerElem( tfrm.getCol0(), scaleVec ),
+ mulPerElem( tfrm.getCol1(), scaleVec ),
+ mulPerElem( tfrm.getCol2(), scaleVec ),
+ mulPerElem( tfrm.getCol3(), scaleVec )
+ );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+ return Transform3(
+ Vector3::xAxis( ),
+ Vector3::yAxis( ),
+ Vector3::zAxis( ),
+ translateVec
+ );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+ return Transform3(
+ select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+ select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+ select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+ select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+ print( tfrm.getRow( 0 ) );
+ print( tfrm.getRow( 1 ) );
+ print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+ printf("%s:\n", name);
+ print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+ float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+ int negTrace, ZgtX, ZgtY, YgtX;
+ int largestXorY, largestYorZ, largestZorX;
+
+ xx = tfrm.getCol0().getX();
+ yx = tfrm.getCol0().getY();
+ zx = tfrm.getCol0().getZ();
+ xy = tfrm.getCol1().getX();
+ yy = tfrm.getCol1().getY();
+ zy = tfrm.getCol1().getZ();
+ xz = tfrm.getCol2().getX();
+ yz = tfrm.getCol2().getY();
+ zz = tfrm.getCol2().getZ();
+
+ trace = ( ( xx + yy ) + zz );
+
+ negTrace = ( trace < 0.0f );
+ ZgtX = zz > xx;
+ ZgtY = zz > yy;
+ YgtX = yy > xx;
+ largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+ largestYorZ = ( YgtX || ZgtX ) && negTrace;
+ largestZorX = ( ZgtY || !YgtX ) && negTrace;
+
+ if ( largestXorY )
+ {
+ zz = -zz;
+ xy = -xy;
+ }
+ if ( largestYorZ )
+ {
+ xx = -xx;
+ yz = -yz;
+ }
+ if ( largestZorX )
+ {
+ yy = -yy;
+ zx = -zx;
+ }
+
+ radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+ scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+ tmpx = ( ( zy - yz ) * scale );
+ tmpy = ( ( xz - zx ) * scale );
+ tmpz = ( ( yx - xy ) * scale );
+ tmpw = ( radicand * scale );
+ qx = tmpx;
+ qy = tmpy;
+ qz = tmpz;
+ qw = tmpw;
+
+ if ( largestXorY )
+ {
+ qx = tmpw;
+ qy = tmpz;
+ qz = tmpy;
+ qw = tmpx;
+ }
+ if ( largestYorZ )
+ {
+ tmpx = qx;
+ tmpz = qz;
+ qx = qy;
+ qy = tmpx;
+ qz = qw;
+ qw = tmpz;
+ }
+
+ mX = qx;
+ mY = qy;
+ mZ = qz;
+ mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+ return Matrix3(
+ ( tfrm0 * tfrm1.getX( ) ),
+ ( tfrm0 * tfrm1.getY( ) ),
+ ( tfrm0 * tfrm1.getZ( ) )
+ );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+ return Matrix4(
+ ( tfrm0 * tfrm1.getX( ) ),
+ ( tfrm0 * tfrm1.getY( ) ),
+ ( tfrm0 * tfrm1.getZ( ) ),
+ ( tfrm0 * tfrm1.getW( ) )
+ );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+ return Vector3(
+ ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
+ ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
+ ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
+ );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+ return Matrix3(
+ Vector3( 0.0f, vec.getZ(), -vec.getY() ),
+ Vector3( -vec.getZ(), 0.0f, vec.getX() ),
+ Vector3( vec.getY(), -vec.getX(), 0.0f )
+ );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+ return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
new file mode 100644
index 00000000000..764e01708f9
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
@@ -0,0 +1,433 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( const Quat & quat )
+{
+ mX = quat.mX;
+ mY = quat.mY;
+ mZ = quat.mZ;
+ mW = quat.mW;
+}
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+ mX = _x;
+ mY = _y;
+ mZ = _z;
+ mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, float _w )
+{
+ this->setXYZ( xyz );
+ this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+ mX = vec.getX();
+ mY = vec.getY();
+ mZ = vec.getZ();
+ mW = vec.getW();
+}
+
+inline Quat::Quat( float scalar )
+{
+ mX = scalar;
+ mY = scalar;
+ mZ = scalar;
+ mW = scalar;
+}
+
+inline const Quat Quat::identity( )
+{
+ return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
+{
+ return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+ Quat start;
+ float recipSinAngle, scale0, scale1, cosAngle, angle;
+ cosAngle = dot( unitQuat0, unitQuat1 );
+ if ( cosAngle < 0.0f ) {
+ cosAngle = -cosAngle;
+ start = ( -unitQuat0 );
+ } else {
+ start = unitQuat0;
+ }
+ if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+ angle = acosf( cosAngle );
+ recipSinAngle = ( 1.0f / sinf( angle ) );
+ scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+ scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+ } else {
+ scale0 = ( 1.0f - t );
+ scale1 = t;
+ }
+ return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+ Quat tmp0, tmp1;
+ tmp0 = slerp( t, unitQuat0, unitQuat3 );
+ tmp1 = slerp( t, unitQuat1, unitQuat2 );
+ return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline void loadXYZW( Quat & quat, const float * fptr )
+{
+ quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Quat & quat, float * fptr )
+{
+ fptr[0] = quat.getX();
+ fptr[1] = quat.getY();
+ fptr[2] = quat.getZ();
+ fptr[3] = quat.getW();
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+ mX = quat.mX;
+ mY = quat.mY;
+ mZ = quat.mZ;
+ mW = quat.mW;
+ return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+ mX = vec.getX();
+ mY = vec.getY();
+ mZ = vec.getZ();
+ return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+ return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+ mX = _x;
+ return *this;
+}
+
+inline float Quat::getX( ) const
+{
+ return mX;
+}
+
+inline Quat & Quat::setY( float _y )
+{
+ mY = _y;
+ return *this;
+}
+
+inline float Quat::getY( ) const
+{
+ return mY;
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+ mZ = _z;
+ return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+ return mZ;
+}
+
+inline Quat & Quat::setW( float _w )
+{
+ mW = _w;
+ return *this;
+}
+
+inline float Quat::getW( ) const
+{
+ return mW;
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+ *(&mX + idx) = value;
+ return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline float & Quat::operator []( int idx )
+{
+ return *(&mX + idx);
+}
+
+inline float Quat::operator []( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+ return Quat(
+ ( mX + quat.mX ),
+ ( mY + quat.mY ),
+ ( mZ + quat.mZ ),
+ ( mW + quat.mW )
+ );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+ return Quat(
+ ( mX - quat.mX ),
+ ( mY - quat.mY ),
+ ( mZ - quat.mZ ),
+ ( mW - quat.mW )
+ );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+ return Quat(
+ ( mX * scalar ),
+ ( mY * scalar ),
+ ( mZ * scalar ),
+ ( mW * scalar )
+ );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+ *this = *this + quat;
+ return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+ *this = *this - quat;
+ return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+ *this = *this * scalar;
+ return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+ return Quat(
+ ( mX / scalar ),
+ ( mY / scalar ),
+ ( mZ / scalar ),
+ ( mW / scalar )
+ );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+ *this = *this / scalar;
+ return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+ return Quat(
+ -mX,
+ -mY,
+ -mZ,
+ -mW
+ );
+}
+
+inline const Quat operator *( float scalar, const Quat & quat )
+{
+ return quat * scalar;
+}
+
+inline float dot( const Quat & quat0, const Quat & quat1 )
+{
+ float result;
+ result = ( quat0.getX() * quat1.getX() );
+ result = ( result + ( quat0.getY() * quat1.getY() ) );
+ result = ( result + ( quat0.getZ() * quat1.getZ() ) );
+ result = ( result + ( quat0.getW() * quat1.getW() ) );
+ return result;
+}
+
+inline float norm( const Quat & quat )
+{
+ float result;
+ result = ( quat.getX() * quat.getX() );
+ result = ( result + ( quat.getY() * quat.getY() ) );
+ result = ( result + ( quat.getZ() * quat.getZ() ) );
+ result = ( result + ( quat.getW() * quat.getW() ) );
+ return result;
+}
+
+inline float length( const Quat & quat )
+{
+ return ::sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+ float lenSqr, lenInv;
+ lenSqr = norm( quat );
+ lenInv = ( 1.0f / sqrtf( lenSqr ) );
+ return Quat(
+ ( quat.getX() * lenInv ),
+ ( quat.getY() * lenInv ),
+ ( quat.getZ() * lenInv ),
+ ( quat.getW() * lenInv )
+ );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+ float cosHalfAngleX2, recipCosHalfAngleX2;
+ cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
+ recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+ return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
+}
+
+inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
+{
+ float s, c, angle;
+ angle = ( radians * 0.5f );
+ s = sinf( angle );
+ c = cosf( angle );
+ return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+ float s, c, angle;
+ angle = ( radians * 0.5f );
+ s = sinf( angle );
+ c = cosf( angle );
+ return Quat( s, 0.0f, 0.0f, c );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+ float s, c, angle;
+ angle = ( radians * 0.5f );
+ s = sinf( angle );
+ c = cosf( angle );
+ return Quat( 0.0f, s, 0.0f, c );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+ float s, c, angle;
+ angle = ( radians * 0.5f );
+ s = sinf( angle );
+ c = cosf( angle );
+ return Quat( 0.0f, 0.0f, s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+ return Quat(
+ ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
+ ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
+ ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
+ ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
+ );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+ *this = *this * quat;
+ return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+ float tmpX, tmpY, tmpZ, tmpW;
+ tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
+ tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
+ tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
+ tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
+ return Vector3(
+ ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
+ ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
+ ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
+ );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+ return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
+{
+ return Quat(
+ ( select1 )? quat1.getX() : quat0.getX(),
+ ( select1 )? quat1.getY() : quat0.getY(),
+ ( select1 )? quat1.getZ() : quat0.getZ(),
+ ( select1 )? quat1.getW() : quat0.getW()
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+ printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+ printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
new file mode 100644
index 00000000000..46d4d6b3e5c
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
@@ -0,0 +1,1426 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+ mX = vec.mX;
+ mY = vec.mY;
+ mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+ mX = _x;
+ mY = _y;
+ mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+ mX = pnt.getX();
+ mY = pnt.getY();
+ mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+ mX = scalar;
+ mY = scalar;
+ mZ = scalar;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+ return Vector3( 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+ return Vector3( 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+ return Vector3( 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+ float recipSinAngle, scale0, scale1, cosAngle, angle;
+ cosAngle = dot( unitVec0, unitVec1 );
+ if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+ angle = acosf( cosAngle );
+ recipSinAngle = ( 1.0f / sinf( angle ) );
+ scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+ scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+ } else {
+ scale0 = ( 1.0f - t );
+ scale1 = t;
+ }
+ return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZ( Vector3 & vec, const float * fptr )
+{
+ vec = Vector3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Vector3 & vec, float * fptr )
+{
+ fptr[0] = vec.getX();
+ fptr[1] = vec.getY();
+ fptr[2] = vec.getZ();
+}
+
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 3; i++) {
+ unsigned short fp16 = hfptr[i];
+ unsigned int sign = fp16 >> 15;
+ unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+ unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+ if (exponent == 0) {
+ // zero
+ mantissa = 0;
+
+ } else if (exponent == 31) {
+ // infinity or nan -> infinity
+ exponent = 255;
+ mantissa = 0;
+
+ } else {
+ exponent += 127 - 15;
+ mantissa <<= 13;
+ }
+
+ Data32 d;
+ d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+ vec[i] = d.f32;
+ }
+}
+
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 3; i++) {
+ Data32 d;
+ d.f32 = vec[i];
+
+ unsigned int sign = d.u32 >> 31;
+ unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+ unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+ if (exponent == 0) {
+ // zero or denorm -> zero
+ mantissa = 0;
+
+ } else if (exponent == 255 && mantissa != 0) {
+ // nan -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent >= 127 - 15 + 31) {
+ // overflow or infinity -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent <= 127 - 15) {
+ // underflow -> zero
+ exponent = 0;
+ mantissa = 0;
+
+ } else {
+ exponent -= 127 - 15;
+ mantissa >>= 13;
+ }
+
+ hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+ }
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+ mX = vec.mX;
+ mY = vec.mY;
+ mZ = vec.mZ;
+ return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+ mX = _x;
+ return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+ return mX;
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+ mY = _y;
+ return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+ return mY;
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+ mZ = _z;
+ return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+ return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+ *(&mX + idx) = value;
+ return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline float & Vector3::operator []( int idx )
+{
+ return *(&mX + idx);
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+ return Vector3(
+ ( mX + vec.mX ),
+ ( mY + vec.mY ),
+ ( mZ + vec.mZ )
+ );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+ return Vector3(
+ ( mX - vec.mX ),
+ ( mY - vec.mY ),
+ ( mZ - vec.mZ )
+ );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+ return Point3(
+ ( mX + pnt.getX() ),
+ ( mY + pnt.getY() ),
+ ( mZ + pnt.getZ() )
+ );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+ return Vector3(
+ ( mX * scalar ),
+ ( mY * scalar ),
+ ( mZ * scalar )
+ );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+ *this = *this + vec;
+ return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+ *this = *this - vec;
+ return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+ *this = *this * scalar;
+ return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+ return Vector3(
+ ( mX / scalar ),
+ ( mY / scalar ),
+ ( mZ / scalar )
+ );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+ *this = *this / scalar;
+ return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+ return Vector3(
+ -mX,
+ -mY,
+ -mZ
+ );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 & vec )
+{
+ return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ ( vec0.getX() * vec1.getX() ),
+ ( vec0.getY() * vec1.getY() ),
+ ( vec0.getZ() * vec1.getZ() )
+ );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ ( vec0.getX() / vec1.getX() ),
+ ( vec0.getY() / vec1.getY() ),
+ ( vec0.getZ() / vec1.getZ() )
+ );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+ return Vector3(
+ ( 1.0f / vec.getX() ),
+ ( 1.0f / vec.getY() ),
+ ( 1.0f / vec.getZ() )
+ );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+ return Vector3(
+ sqrtf( vec.getX() ),
+ sqrtf( vec.getY() ),
+ sqrtf( vec.getZ() )
+ );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+ return Vector3(
+ ( 1.0f / sqrtf( vec.getX() ) ),
+ ( 1.0f / sqrtf( vec.getY() ) ),
+ ( 1.0f / sqrtf( vec.getZ() ) )
+ );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+ return Vector3(
+ fabsf( vec.getX() ),
+ fabsf( vec.getY() ),
+ fabsf( vec.getZ() )
+ );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+ ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+ ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
+ );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+ (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+ (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
+ );
+}
+
+inline float maxElem( const Vector3 & vec )
+{
+ float result;
+ result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+ result = (vec.getZ() > result)? vec.getZ() : result;
+ return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+ (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+ (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
+ );
+}
+
+inline float minElem( const Vector3 & vec )
+{
+ float result;
+ result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+ result = (vec.getZ() < result)? vec.getZ() : result;
+ return result;
+}
+
+inline float sum( const Vector3 & vec )
+{
+ float result;
+ result = ( vec.getX() + vec.getY() );
+ result = ( result + vec.getZ() );
+ return result;
+}
+
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ float result;
+ result = ( vec0.getX() * vec1.getX() );
+ result = ( result + ( vec0.getY() * vec1.getY() ) );
+ result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+ return result;
+}
+
+inline float lengthSqr( const Vector3 & vec )
+{
+ float result;
+ result = ( vec.getX() * vec.getX() );
+ result = ( result + ( vec.getY() * vec.getY() ) );
+ result = ( result + ( vec.getZ() * vec.getZ() ) );
+ return result;
+}
+
+inline float length( const Vector3 & vec )
+{
+ return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+ float lenSqr, lenInv;
+ lenSqr = lengthSqr( vec );
+ lenInv = ( 1.0f / sqrtf( lenSqr ) );
+ return Vector3(
+ ( vec.getX() * lenInv ),
+ ( vec.getY() * lenInv ),
+ ( vec.getZ() * lenInv )
+ );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+ return Vector3(
+ ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
+ ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
+ ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
+ );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
+{
+ return Vector3(
+ ( select1 )? vec1.getX() : vec0.getX(),
+ ( select1 )? vec1.getY() : vec0.getY(),
+ ( select1 )? vec1.getZ() : vec0.getZ()
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+ printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+ printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+ mX = vec.mX;
+ mY = vec.mY;
+ mZ = vec.mZ;
+ mW = vec.mW;
+}
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+ mX = _x;
+ mY = _y;
+ mZ = _z;
+ mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, float _w )
+{
+ this->setXYZ( xyz );
+ this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+ mX = vec.getX();
+ mY = vec.getY();
+ mZ = vec.getZ();
+ mW = 0.0f;
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+ mX = pnt.getX();
+ mY = pnt.getY();
+ mZ = pnt.getZ();
+ mW = 1.0f;
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+ mX = quat.getX();
+ mY = quat.getY();
+ mZ = quat.getZ();
+ mW = quat.getW();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+ mX = scalar;
+ mY = scalar;
+ mZ = scalar;
+ mW = scalar;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+ return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+ return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+ return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+ return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+ float recipSinAngle, scale0, scale1, cosAngle, angle;
+ cosAngle = dot( unitVec0, unitVec1 );
+ if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+ angle = acosf( cosAngle );
+ recipSinAngle = ( 1.0f / sinf( angle ) );
+ scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+ scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+ } else {
+ scale0 = ( 1.0f - t );
+ scale1 = t;
+ }
+ return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZW( Vector4 & vec, const float * fptr )
+{
+ vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Vector4 & vec, float * fptr )
+{
+ fptr[0] = vec.getX();
+ fptr[1] = vec.getY();
+ fptr[2] = vec.getZ();
+ fptr[3] = vec.getW();
+}
+
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 4; i++) {
+ unsigned short fp16 = hfptr[i];
+ unsigned int sign = fp16 >> 15;
+ unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+ unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+ if (exponent == 0) {
+ // zero
+ mantissa = 0;
+
+ } else if (exponent == 31) {
+ // infinity or nan -> infinity
+ exponent = 255;
+ mantissa = 0;
+
+ } else {
+ exponent += 127 - 15;
+ mantissa <<= 13;
+ }
+
+ Data32 d;
+ d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+ vec[i] = d.f32;
+ }
+}
+
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 4; i++) {
+ Data32 d;
+ d.f32 = vec[i];
+
+ unsigned int sign = d.u32 >> 31;
+ unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+ unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+ if (exponent == 0) {
+ // zero or denorm -> zero
+ mantissa = 0;
+
+ } else if (exponent == 255 && mantissa != 0) {
+ // nan -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent >= 127 - 15 + 31) {
+ // overflow or infinity -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent <= 127 - 15) {
+ // underflow -> zero
+ exponent = 0;
+ mantissa = 0;
+
+ } else {
+ exponent -= 127 - 15;
+ mantissa >>= 13;
+ }
+
+ hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+ }
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+ mX = vec.mX;
+ mY = vec.mY;
+ mZ = vec.mZ;
+ mW = vec.mW;
+ return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+ mX = vec.getX();
+ mY = vec.getY();
+ mZ = vec.getZ();
+ return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+ return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+ mX = _x;
+ return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+ return mX;
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+ mY = _y;
+ return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+ return mY;
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+ mZ = _z;
+ return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+ return mZ;
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+ mW = _w;
+ return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+ return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+ *(&mX + idx) = value;
+ return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline float & Vector4::operator []( int idx )
+{
+ return *(&mX + idx);
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+ return Vector4(
+ ( mX + vec.mX ),
+ ( mY + vec.mY ),
+ ( mZ + vec.mZ ),
+ ( mW + vec.mW )
+ );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+ return Vector4(
+ ( mX - vec.mX ),
+ ( mY - vec.mY ),
+ ( mZ - vec.mZ ),
+ ( mW - vec.mW )
+ );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+ return Vector4(
+ ( mX * scalar ),
+ ( mY * scalar ),
+ ( mZ * scalar ),
+ ( mW * scalar )
+ );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+ *this = *this + vec;
+ return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+ *this = *this - vec;
+ return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+ *this = *this * scalar;
+ return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+ return Vector4(
+ ( mX / scalar ),
+ ( mY / scalar ),
+ ( mZ / scalar ),
+ ( mW / scalar )
+ );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+ *this = *this / scalar;
+ return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+ return Vector4(
+ -mX,
+ -mY,
+ -mZ,
+ -mW
+ );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 & vec )
+{
+ return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return Vector4(
+ ( vec0.getX() * vec1.getX() ),
+ ( vec0.getY() * vec1.getY() ),
+ ( vec0.getZ() * vec1.getZ() ),
+ ( vec0.getW() * vec1.getW() )
+ );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return Vector4(
+ ( vec0.getX() / vec1.getX() ),
+ ( vec0.getY() / vec1.getY() ),
+ ( vec0.getZ() / vec1.getZ() ),
+ ( vec0.getW() / vec1.getW() )
+ );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+ return Vector4(
+ ( 1.0f / vec.getX() ),
+ ( 1.0f / vec.getY() ),
+ ( 1.0f / vec.getZ() ),
+ ( 1.0f / vec.getW() )
+ );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+ return Vector4(
+ sqrtf( vec.getX() ),
+ sqrtf( vec.getY() ),
+ sqrtf( vec.getZ() ),
+ sqrtf( vec.getW() )
+ );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+ return Vector4(
+ ( 1.0f / sqrtf( vec.getX() ) ),
+ ( 1.0f / sqrtf( vec.getY() ) ),
+ ( 1.0f / sqrtf( vec.getZ() ) ),
+ ( 1.0f / sqrtf( vec.getW() ) )
+ );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+ return Vector4(
+ fabsf( vec.getX() ),
+ fabsf( vec.getY() ),
+ fabsf( vec.getZ() ),
+ fabsf( vec.getW() )
+ );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return Vector4(
+ ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+ ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+ ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
+ ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
+ );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return Vector4(
+ (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+ (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+ (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
+ (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
+ );
+}
+
+inline float maxElem( const Vector4 & vec )
+{
+ float result;
+ result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+ result = (vec.getZ() > result)? vec.getZ() : result;
+ result = (vec.getW() > result)? vec.getW() : result;
+ return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ return Vector4(
+ (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+ (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+ (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
+ (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
+ );
+}
+
+inline float minElem( const Vector4 & vec )
+{
+ float result;
+ result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+ result = (vec.getZ() < result)? vec.getZ() : result;
+ result = (vec.getW() < result)? vec.getW() : result;
+ return result;
+}
+
+inline float sum( const Vector4 & vec )
+{
+ float result;
+ result = ( vec.getX() + vec.getY() );
+ result = ( result + vec.getZ() );
+ result = ( result + vec.getW() );
+ return result;
+}
+
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+ float result;
+ result = ( vec0.getX() * vec1.getX() );
+ result = ( result + ( vec0.getY() * vec1.getY() ) );
+ result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+ result = ( result + ( vec0.getW() * vec1.getW() ) );
+ return result;
+}
+
+inline float lengthSqr( const Vector4 & vec )
+{
+ float result;
+ result = ( vec.getX() * vec.getX() );
+ result = ( result + ( vec.getY() * vec.getY() ) );
+ result = ( result + ( vec.getZ() * vec.getZ() ) );
+ result = ( result + ( vec.getW() * vec.getW() ) );
+ return result;
+}
+
+inline float length( const Vector4 & vec )
+{
+ return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+ float lenSqr, lenInv;
+ lenSqr = lengthSqr( vec );
+ lenInv = ( 1.0f / sqrtf( lenSqr ) );
+ return Vector4(
+ ( vec.getX() * lenInv ),
+ ( vec.getY() * lenInv ),
+ ( vec.getZ() * lenInv ),
+ ( vec.getW() * lenInv )
+ );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
+{
+ return Vector4(
+ ( select1 )? vec1.getX() : vec0.getX(),
+ ( select1 )? vec1.getY() : vec0.getY(),
+ ( select1 )? vec1.getZ() : vec0.getZ(),
+ ( select1 )? vec1.getW() : vec0.getW()
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+ printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+ printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+ mX = pnt.mX;
+ mY = pnt.mY;
+ mZ = pnt.mZ;
+}
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+ mX = _x;
+ mY = _y;
+ mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+ mX = vec.getX();
+ mY = vec.getY();
+ mZ = vec.getZ();
+}
+
+inline Point3::Point3( float scalar )
+{
+ mX = scalar;
+ mY = scalar;
+ mZ = scalar;
+}
+
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void loadXYZ( Point3 & pnt, const float * fptr )
+{
+ pnt = Point3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Point3 & pnt, float * fptr )
+{
+ fptr[0] = pnt.getX();
+ fptr[1] = pnt.getY();
+ fptr[2] = pnt.getZ();
+}
+
+inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 3; i++) {
+ unsigned short fp16 = hfptr[i];
+ unsigned int sign = fp16 >> 15;
+ unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+ unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+ if (exponent == 0) {
+ // zero
+ mantissa = 0;
+
+ } else if (exponent == 31) {
+ // infinity or nan -> infinity
+ exponent = 255;
+ mantissa = 0;
+
+ } else {
+ exponent += 127 - 15;
+ mantissa <<= 13;
+ }
+
+ Data32 d;
+ d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+ vec[i] = d.f32;
+ }
+}
+
+inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr )
+{
+ union Data32 {
+ unsigned int u32;
+ float f32;
+ };
+
+ for (int i = 0; i < 3; i++) {
+ Data32 d;
+ d.f32 = vec[i];
+
+ unsigned int sign = d.u32 >> 31;
+ unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+ unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+ if (exponent == 0) {
+ // zero or denorm -> zero
+ mantissa = 0;
+
+ } else if (exponent == 255 && mantissa != 0) {
+ // nan -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent >= 127 - 15 + 31) {
+ // overflow or infinity -> infinity
+ exponent = 31;
+ mantissa = 0;
+
+ } else if (exponent <= 127 - 15) {
+ // underflow -> zero
+ exponent = 0;
+ mantissa = 0;
+
+ } else {
+ exponent -= 127 - 15;
+ mantissa >>= 13;
+ }
+
+ hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+ }
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+ mX = pnt.mX;
+ mY = pnt.mY;
+ mZ = pnt.mZ;
+ return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+ mX = _x;
+ return *this;
+}
+
+inline float Point3::getX( ) const
+{
+ return mX;
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+ mY = _y;
+ return *this;
+}
+
+inline float Point3::getY( ) const
+{
+ return mY;
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+ mZ = _z;
+ return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+ return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+ *(&mX + idx) = value;
+ return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline float & Point3::operator []( int idx )
+{
+ return *(&mX + idx);
+}
+
+inline float Point3::operator []( int idx ) const
+{
+ return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+ return Vector3(
+ ( mX - pnt.mX ),
+ ( mY - pnt.mY ),
+ ( mZ - pnt.mZ )
+ );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+ return Point3(
+ ( mX + vec.getX() ),
+ ( mY + vec.getY() ),
+ ( mZ + vec.getZ() )
+ );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+ return Point3(
+ ( mX - vec.getX() ),
+ ( mY - vec.getY() ),
+ ( mZ - vec.getZ() )
+ );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+ *this = *this + vec;
+ return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+ *this = *this - vec;
+ return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return Point3(
+ ( pnt0.getX() * pnt1.getX() ),
+ ( pnt0.getY() * pnt1.getY() ),
+ ( pnt0.getZ() * pnt1.getZ() )
+ );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return Point3(
+ ( pnt0.getX() / pnt1.getX() ),
+ ( pnt0.getY() / pnt1.getY() ),
+ ( pnt0.getZ() / pnt1.getZ() )
+ );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+ return Point3(
+ ( 1.0f / pnt.getX() ),
+ ( 1.0f / pnt.getY() ),
+ ( 1.0f / pnt.getZ() )
+ );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+ return Point3(
+ sqrtf( pnt.getX() ),
+ sqrtf( pnt.getY() ),
+ sqrtf( pnt.getZ() )
+ );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+ return Point3(
+ ( 1.0f / sqrtf( pnt.getX() ) ),
+ ( 1.0f / sqrtf( pnt.getY() ) ),
+ ( 1.0f / sqrtf( pnt.getZ() ) )
+ );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+ return Point3(
+ fabsf( pnt.getX() ),
+ fabsf( pnt.getY() ),
+ fabsf( pnt.getZ() )
+ );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return Point3(
+ ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
+ ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
+ ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
+ );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return Point3(
+ (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
+ (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
+ (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+ );
+}
+
+inline float maxElem( const Point3 & pnt )
+{
+ float result;
+ result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
+ result = (pnt.getZ() > result)? pnt.getZ() : result;
+ return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return Point3(
+ (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
+ (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
+ (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+ );
+}
+
+inline float minElem( const Point3 & pnt )
+{
+ float result;
+ result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
+ result = (pnt.getZ() < result)? pnt.getZ() : result;
+ return result;
+}
+
+inline float sum( const Point3 & pnt )
+{
+ float result;
+ result = ( pnt.getX() + pnt.getY() );
+ result = ( result + pnt.getZ() );
+ return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, float scaleVal )
+{
+ return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+ return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+ float result;
+ result = ( pnt.getX() * unitVec.getX() );
+ result = ( result + ( pnt.getY() * unitVec.getY() ) );
+ result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
+ return result;
+}
+
+inline float distSqrFromOrigin( const Point3 & pnt )
+{
+ return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( const Point3 & pnt )
+{
+ return length( Vector3( pnt ) );
+}
+
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+ return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
+{
+ return Point3(
+ ( select1 )? pnt1.getX() : pnt0.getX(),
+ ( select1 )? pnt1.getY() : pnt0.getY(),
+ ( select1 )? pnt1.getZ() : pnt0.getZ()
+ );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+ printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+ printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
new file mode 100644
index 00000000000..d00456dfeb4
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
@@ -0,0 +1,1872 @@
+/*
+ Copyright (C) 2009 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_H
+#define _VECTORMATH_AOS_CPP_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+ float mX;
+ float mY;
+ float mZ;
+#ifndef __GNUC__
+ float d;
+#endif
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Vector3( ) { };
+
+ // Copy a 3-D vector
+ //
+ inline Vector3( const Vector3 & vec );
+
+ // Construct a 3-D vector from x, y, and z elements
+ //
+ inline Vector3( float x, float y, float z );
+
+ // Copy elements from a 3-D point into a 3-D vector
+ //
+ explicit inline Vector3( const Point3 & pnt );
+
+ // Set all elements of a 3-D vector to the same scalar value
+ //
+ explicit inline Vector3( float scalar );
+
+ // Assign one 3-D vector to another
+ //
+ inline Vector3 & operator =( const Vector3 & vec );
+
+ // Set the x element of a 3-D vector
+ //
+ inline Vector3 & setX( float x );
+
+ // Set the y element of a 3-D vector
+ //
+ inline Vector3 & setY( float y );
+
+ // Set the z element of a 3-D vector
+ //
+ inline Vector3 & setZ( float z );
+
+ // Get the x element of a 3-D vector
+ //
+ inline float getX( ) const;
+
+ // Get the y element of a 3-D vector
+ //
+ inline float getY( ) const;
+
+ // Get the z element of a 3-D vector
+ //
+ inline float getZ( ) const;
+
+ // Set an x, y, or z element of a 3-D vector by index
+ //
+ inline Vector3 & setElem( int idx, float value );
+
+ // Get an x, y, or z element of a 3-D vector by index
+ //
+ inline float getElem( int idx ) const;
+
+ // Subscripting operator to set or get an element
+ //
+ inline float & operator []( int idx );
+
+ // Subscripting operator to get an element
+ //
+ inline float operator []( int idx ) const;
+
+ // Add two 3-D vectors
+ //
+ inline const Vector3 operator +( const Vector3 & vec ) const;
+
+ // Subtract a 3-D vector from another 3-D vector
+ //
+ inline const Vector3 operator -( const Vector3 & vec ) const;
+
+ // Add a 3-D vector to a 3-D point
+ //
+ inline const Point3 operator +( const Point3 & pnt ) const;
+
+ // Multiply a 3-D vector by a scalar
+ //
+ inline const Vector3 operator *( float scalar ) const;
+
+ // Divide a 3-D vector by a scalar
+ //
+ inline const Vector3 operator /( float scalar ) const;
+
+ // Perform compound assignment and addition with a 3-D vector
+ //
+ inline Vector3 & operator +=( const Vector3 & vec );
+
+ // Perform compound assignment and subtraction by a 3-D vector
+ //
+ inline Vector3 & operator -=( const Vector3 & vec );
+
+ // Perform compound assignment and multiplication by a scalar
+ //
+ inline Vector3 & operator *=( float scalar );
+
+ // Perform compound assignment and division by a scalar
+ //
+ inline Vector3 & operator /=( float scalar );
+
+ // Negate all elements of a 3-D vector
+ //
+ inline const Vector3 operator -( ) const;
+
+ // Construct x axis
+ //
+ static inline const Vector3 xAxis( );
+
+ // Construct y axis
+ //
+ static inline const Vector3 yAxis( );
+
+ // Construct z axis
+ //
+ static inline const Vector3 zAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 3-D vector by a scalar
+//
+inline const Vector3 operator *( float scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+//
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE:
+// Floating-point behavior matches standard library function divf4.
+//
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function recipf4.
+//
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function sqrtf4.
+//
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function rsqrtf4.
+//
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+//
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+//
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+//
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+//
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+//
+inline float maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+//
+inline float minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+//
+inline float sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+//
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+//
+inline float lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+//
+inline float length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE:
+// The result is unpredictable when all elements of vec are at or near zero.
+//
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+//
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+//
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+//
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+//
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE:
+// Faster than separately creating a cross-product matrix and multiplying.
+//
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE:
+// Does not clamp t between 0 and 1.
+//
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE:
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+//
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+//
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+//
+//
+inline void loadXYZ( Vector3 & vec, const float * fptr );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+//
+inline void storeXYZ( const Vector3 & vec, float * fptr );
+
+// Load three-half-floats as a 3-D vector
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs.
+//
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr );
+
+// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+//
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+ float mX;
+ float mY;
+ float mZ;
+ float mW;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Vector4( ) { };
+
+ // Copy a 4-D vector
+ //
+ inline Vector4( const Vector4 & vec );
+
+ // Construct a 4-D vector from x, y, z, and w elements
+ //
+ inline Vector4( float x, float y, float z, float w );
+
+ // Construct a 4-D vector from a 3-D vector and a scalar
+ //
+ inline Vector4( const Vector3 & xyz, float w );
+
+ // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ //
+ explicit inline Vector4( const Vector3 & vec );
+
+ // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ //
+ explicit inline Vector4( const Point3 & pnt );
+
+ // Copy elements from a quaternion into a 4-D vector
+ //
+ explicit inline Vector4( const Quat & quat );
+
+ // Set all elements of a 4-D vector to the same scalar value
+ //
+ explicit inline Vector4( float scalar );
+
+ // Assign one 4-D vector to another
+ //
+ inline Vector4 & operator =( const Vector4 & vec );
+
+ // Set the x, y, and z elements of a 4-D vector
+ // NOTE:
+ // This function does not change the w element.
+ //
+ inline Vector4 & setXYZ( const Vector3 & vec );
+
+ // Get the x, y, and z elements of a 4-D vector
+ //
+ inline const Vector3 getXYZ( ) const;
+
+ // Set the x element of a 4-D vector
+ //
+ inline Vector4 & setX( float x );
+
+ // Set the y element of a 4-D vector
+ //
+ inline Vector4 & setY( float y );
+
+ // Set the z element of a 4-D vector
+ //
+ inline Vector4 & setZ( float z );
+
+ // Set the w element of a 4-D vector
+ //
+ inline Vector4 & setW( float w );
+
+ // Get the x element of a 4-D vector
+ //
+ inline float getX( ) const;
+
+ // Get the y element of a 4-D vector
+ //
+ inline float getY( ) const;
+
+ // Get the z element of a 4-D vector
+ //
+ inline float getZ( ) const;
+
+ // Get the w element of a 4-D vector
+ //
+ inline float getW( ) const;
+
+ // Set an x, y, z, or w element of a 4-D vector by index
+ //
+ inline Vector4 & setElem( int idx, float value );
+
+ // Get an x, y, z, or w element of a 4-D vector by index
+ //
+ inline float getElem( int idx ) const;
+
+ // Subscripting operator to set or get an element
+ //
+ inline float & operator []( int idx );
+
+ // Subscripting operator to get an element
+ //
+ inline float operator []( int idx ) const;
+
+ // Add two 4-D vectors
+ //
+ inline const Vector4 operator +( const Vector4 & vec ) const;
+
+ // Subtract a 4-D vector from another 4-D vector
+ //
+ inline const Vector4 operator -( const Vector4 & vec ) const;
+
+ // Multiply a 4-D vector by a scalar
+ //
+ inline const Vector4 operator *( float scalar ) const;
+
+ // Divide a 4-D vector by a scalar
+ //
+ inline const Vector4 operator /( float scalar ) const;
+
+ // Perform compound assignment and addition with a 4-D vector
+ //
+ inline Vector4 & operator +=( const Vector4 & vec );
+
+ // Perform compound assignment and subtraction by a 4-D vector
+ //
+ inline Vector4 & operator -=( const Vector4 & vec );
+
+ // Perform compound assignment and multiplication by a scalar
+ //
+ inline Vector4 & operator *=( float scalar );
+
+ // Perform compound assignment and division by a scalar
+ //
+ inline Vector4 & operator /=( float scalar );
+
+ // Negate all elements of a 4-D vector
+ //
+ inline const Vector4 operator -( ) const;
+
+ // Construct x axis
+ //
+ static inline const Vector4 xAxis( );
+
+ // Construct y axis
+ //
+ static inline const Vector4 yAxis( );
+
+ // Construct z axis
+ //
+ static inline const Vector4 zAxis( );
+
+ // Construct w axis
+ //
+ static inline const Vector4 wAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 4-D vector by a scalar
+//
+inline const Vector4 operator *( float scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+//
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE:
+// Floating-point behavior matches standard library function divf4.
+//
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function recipf4.
+//
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function sqrtf4.
+//
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE:
+// Floating-point behavior matches standard library function rsqrtf4.
+//
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+//
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+//
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+//
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+//
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+//
+inline float maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+//
+inline float minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+//
+inline float sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+//
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+//
+inline float lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+//
+inline float length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE:
+// The result is unpredictable when all elements of vec are at or near zero.
+//
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+//
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE:
+// Does not clamp t between 0 and 1.
+//
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE:
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+//
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+//
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+//
+//
+inline void loadXYZW( Vector4 & vec, const float * fptr );
+
+// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+//
+inline void storeXYZW( const Vector4 & vec, float * fptr );
+
+// Load four-half-floats as a 4-D vector
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs.
+//
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr );
+
+// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+//
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+ float mX;
+ float mY;
+ float mZ;
+#ifndef __GNUC__
+ float d;
+#endif
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Point3( ) { };
+
+ // Copy a 3-D point
+ //
+ inline Point3( const Point3 & pnt );
+
+ // Construct a 3-D point from x, y, and z elements
+ //
+ inline Point3( float x, float y, float z );
+
+ // Copy elements from a 3-D vector into a 3-D point
+ //
+ explicit inline Point3( const Vector3 & vec );
+
+ // Set all elements of a 3-D point to the same scalar value
+ //
+ explicit inline Point3( float scalar );
+
+ // Assign one 3-D point to another
+ //
+ inline Point3 & operator =( const Point3 & pnt );
+
+ // Set the x element of a 3-D point
+ //
+ inline Point3 & setX( float x );
+
+ // Set the y element of a 3-D point
+ //
+ inline Point3 & setY( float y );
+
+ // Set the z element of a 3-D point
+ //
+ inline Point3 & setZ( float z );
+
+ // Get the x element of a 3-D point
+ //
+ inline float getX( ) const;
+
+ // Get the y element of a 3-D point
+ //
+ inline float getY( ) const;
+
+ // Get the z element of a 3-D point
+ //
+ inline float getZ( ) const;
+
+ // Set an x, y, or z element of a 3-D point by index
+ //
+ inline Point3 & setElem( int idx, float value );
+
+ // Get an x, y, or z element of a 3-D point by index
+ //
+ inline float getElem( int idx ) const;
+
+ // Subscripting operator to set or get an element
+ //
+ inline float & operator []( int idx );
+
+ // Subscripting operator to get an element
+ //
+ inline float operator []( int idx ) const;
+
+ // Subtract a 3-D point from another 3-D point
+ //
+ inline const Vector3 operator -( const Point3 & pnt ) const;
+
+ // Add a 3-D point to a 3-D vector
+ //
+ inline const Point3 operator +( const Vector3 & vec ) const;
+
+ // Subtract a 3-D vector from a 3-D point
+ //
+ inline const Point3 operator -( const Vector3 & vec ) const;
+
+ // Perform compound assignment and addition with a 3-D vector
+ //
+ inline Point3 & operator +=( const Vector3 & vec );
+
+ // Perform compound assignment and subtraction by a 3-D vector
+ //
+ inline Point3 & operator -=( const Vector3 & vec );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply two 3-D points per element
+//
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE:
+// Floating-point behavior matches standard library function divf4.
+//
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE:
+// Floating-point behavior matches standard library function recipf4.
+//
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE:
+// Floating-point behavior matches standard library function sqrtf4.
+//
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE:
+// Floating-point behavior matches standard library function rsqrtf4.
+//
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+//
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+//
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+//
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+//
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+//
+inline float maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+//
+inline float minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+//
+inline float sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+//
+inline const Point3 scale( const Point3 & pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+//
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+//
+inline float projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+//
+inline float distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+//
+inline float distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+//
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+//
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE:
+// Does not clamp t between 0 and 1.
+//
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+//
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+//
+//
+inline void loadXYZ( Point3 & pnt, const float * fptr );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+//
+inline void storeXYZ( const Point3 & pnt, float * fptr );
+
+// Load three-half-floats as a 3-D point
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs.
+//
+inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr );
+
+// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE:
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+//
+inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+ float mX;
+ float mY;
+ float mZ;
+ float mW;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Quat( ) { };
+
+ // Copy a quaternion
+ //
+ inline Quat( const Quat & quat );
+
+ // Construct a quaternion from x, y, z, and w elements
+ //
+ inline Quat( float x, float y, float z, float w );
+
+ // Construct a quaternion from a 3-D vector and a scalar
+ //
+ inline Quat( const Vector3 & xyz, float w );
+
+ // Copy elements from a 4-D vector into a quaternion
+ //
+ explicit inline Quat( const Vector4 & vec );
+
+ // Convert a rotation matrix to a unit-length quaternion
+ //
+ explicit inline Quat( const Matrix3 & rotMat );
+
+ // Set all elements of a quaternion to the same scalar value
+ //
+ explicit inline Quat( float scalar );
+
+ // Assign one quaternion to another
+ //
+ inline Quat & operator =( const Quat & quat );
+
+ // Set the x, y, and z elements of a quaternion
+ // NOTE:
+ // This function does not change the w element.
+ //
+ inline Quat & setXYZ( const Vector3 & vec );
+
+ // Get the x, y, and z elements of a quaternion
+ //
+ inline const Vector3 getXYZ( ) const;
+
+ // Set the x element of a quaternion
+ //
+ inline Quat & setX( float x );
+
+ // Set the y element of a quaternion
+ //
+ inline Quat & setY( float y );
+
+ // Set the z element of a quaternion
+ //
+ inline Quat & setZ( float z );
+
+ // Set the w element of a quaternion
+ //
+ inline Quat & setW( float w );
+
+ // Get the x element of a quaternion
+ //
+ inline float getX( ) const;
+
+ // Get the y element of a quaternion
+ //
+ inline float getY( ) const;
+
+ // Get the z element of a quaternion
+ //
+ inline float getZ( ) const;
+
+ // Get the w element of a quaternion
+ //
+ inline float getW( ) const;
+
+ // Set an x, y, z, or w element of a quaternion by index
+ //
+ inline Quat & setElem( int idx, float value );
+
+ // Get an x, y, z, or w element of a quaternion by index
+ //
+ inline float getElem( int idx ) const;
+
+ // Subscripting operator to set or get an element
+ //
+ inline float & operator []( int idx );
+
+ // Subscripting operator to get an element
+ //
+ inline float operator []( int idx ) const;
+
+ // Add two quaternions
+ //
+ inline const Quat operator +( const Quat & quat ) const;
+
+ // Subtract a quaternion from another quaternion
+ //
+ inline const Quat operator -( const Quat & quat ) const;
+
+ // Multiply two quaternions
+ //
+ inline const Quat operator *( const Quat & quat ) const;
+
+ // Multiply a quaternion by a scalar
+ //
+ inline const Quat operator *( float scalar ) const;
+
+ // Divide a quaternion by a scalar
+ //
+ inline const Quat operator /( float scalar ) const;
+
+ // Perform compound assignment and addition with a quaternion
+ //
+ inline Quat & operator +=( const Quat & quat );
+
+ // Perform compound assignment and subtraction by a quaternion
+ //
+ inline Quat & operator -=( const Quat & quat );
+
+ // Perform compound assignment and multiplication by a quaternion
+ //
+ inline Quat & operator *=( const Quat & quat );
+
+ // Perform compound assignment and multiplication by a scalar
+ //
+ inline Quat & operator *=( float scalar );
+
+ // Perform compound assignment and division by a scalar
+ //
+ inline Quat & operator /=( float scalar );
+
+ // Negate all elements of a quaternion
+ //
+ inline const Quat operator -( ) const;
+
+ // Construct an identity quaternion
+ //
+ static inline const Quat identity( );
+
+ // Construct a quaternion to rotate between two unit-length 3-D vectors
+ // NOTE:
+ // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ //
+ static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+ // Construct a quaternion to rotate around a unit-length 3-D vector
+ //
+ static inline const Quat rotation( float radians, const Vector3 & unitVec );
+
+ // Construct a quaternion to rotate around the x axis
+ //
+ static inline const Quat rotationX( float radians );
+
+ // Construct a quaternion to rotate around the y axis
+ //
+ static inline const Quat rotationY( float radians );
+
+ // Construct a quaternion to rotate around the z axis
+ //
+ static inline const Quat rotationZ( float radians );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a quaternion by a scalar
+//
+inline const Quat operator *( float scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+//
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+//
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+//
+inline float dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+//
+inline float norm( const Quat & quat );
+
+// Compute the length of a quaternion
+//
+inline float length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE:
+// The result is unpredictable when all elements of quat are at or near zero.
+//
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE:
+// Does not clamp t between 0 and 1.
+//
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE:
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+//
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+//
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+//
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+//
+//
+inline void loadXYZW( Quat & quat, const float * fptr );
+
+// Store x, y, z, and w elements of a quaternion in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+//
+inline void storeXYZW( const Quat & quat, float * fptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+ Vector3 mCol0;
+ Vector3 mCol1;
+ Vector3 mCol2;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Matrix3( ) { };
+
+ // Copy a 3x3 matrix
+ //
+ inline Matrix3( const Matrix3 & mat );
+
+ // Construct a 3x3 matrix containing the specified columns
+ //
+ inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+ // Construct a 3x3 rotation matrix from a unit-length quaternion
+ //
+ explicit inline Matrix3( const Quat & unitQuat );
+
+ // Set all elements of a 3x3 matrix to the same scalar value
+ //
+ explicit inline Matrix3( float scalar );
+
+ // Assign one 3x3 matrix to another
+ //
+ inline Matrix3 & operator =( const Matrix3 & mat );
+
+ // Set column 0 of a 3x3 matrix
+ //
+ inline Matrix3 & setCol0( const Vector3 & col0 );
+
+ // Set column 1 of a 3x3 matrix
+ //
+ inline Matrix3 & setCol1( const Vector3 & col1 );
+
+ // Set column 2 of a 3x3 matrix
+ //
+ inline Matrix3 & setCol2( const Vector3 & col2 );
+
+ // Get column 0 of a 3x3 matrix
+ //
+ inline const Vector3 getCol0( ) const;
+
+ // Get column 1 of a 3x3 matrix
+ //
+ inline const Vector3 getCol1( ) const;
+
+ // Get column 2 of a 3x3 matrix
+ //
+ inline const Vector3 getCol2( ) const;
+
+ // Set the column of a 3x3 matrix referred to by the specified index
+ //
+ inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+ // Set the row of a 3x3 matrix referred to by the specified index
+ //
+ inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+ // Get the column of a 3x3 matrix referred to by the specified index
+ //
+ inline const Vector3 getCol( int col ) const;
+
+ // Get the row of a 3x3 matrix referred to by the specified index
+ //
+ inline const Vector3 getRow( int row ) const;
+
+ // Subscripting operator to set or get a column
+ //
+ inline Vector3 & operator []( int col );
+
+ // Subscripting operator to get a column
+ //
+ inline const Vector3 operator []( int col ) const;
+
+ // Set the element of a 3x3 matrix referred to by column and row indices
+ //
+ inline Matrix3 & setElem( int col, int row, float val );
+
+ // Get the element of a 3x3 matrix referred to by column and row indices
+ //
+ inline float getElem( int col, int row ) const;
+
+ // Add two 3x3 matrices
+ //
+ inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+ // Subtract a 3x3 matrix from another 3x3 matrix
+ //
+ inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+ // Negate all elements of a 3x3 matrix
+ //
+ inline const Matrix3 operator -( ) const;
+
+ // Multiply a 3x3 matrix by a scalar
+ //
+ inline const Matrix3 operator *( float scalar ) const;
+
+ // Multiply a 3x3 matrix by a 3-D vector
+ //
+ inline const Vector3 operator *( const Vector3 & vec ) const;
+
+ // Multiply two 3x3 matrices
+ //
+ inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+ // Perform compound assignment and addition with a 3x3 matrix
+ //
+ inline Matrix3 & operator +=( const Matrix3 & mat );
+
+ // Perform compound assignment and subtraction by a 3x3 matrix
+ //
+ inline Matrix3 & operator -=( const Matrix3 & mat );
+
+ // Perform compound assignment and multiplication by a scalar
+ //
+ inline Matrix3 & operator *=( float scalar );
+
+ // Perform compound assignment and multiplication by a 3x3 matrix
+ //
+ inline Matrix3 & operator *=( const Matrix3 & mat );
+
+ // Construct an identity 3x3 matrix
+ //
+ static inline const Matrix3 identity( );
+
+ // Construct a 3x3 matrix to rotate around the x axis
+ //
+ static inline const Matrix3 rotationX( float radians );
+
+ // Construct a 3x3 matrix to rotate around the y axis
+ //
+ static inline const Matrix3 rotationY( float radians );
+
+ // Construct a 3x3 matrix to rotate around the z axis
+ //
+ static inline const Matrix3 rotationZ( float radians );
+
+ // Construct a 3x3 matrix to rotate around the x, y, and z axes
+ //
+ static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+ // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ //
+ static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
+
+ // Construct a rotation matrix from a unit-length quaternion
+ //
+ static inline const Matrix3 rotation( const Quat & unitQuat );
+
+ // Construct a 3x3 matrix to perform scaling
+ //
+ static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+//
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+//
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+//
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+//
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE:
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+//
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+//
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+//
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+ Vector4 mCol0;
+ Vector4 mCol1;
+ Vector4 mCol2;
+ Vector4 mCol3;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Matrix4( ) { };
+
+ // Copy a 4x4 matrix
+ //
+ inline Matrix4( const Matrix4 & mat );
+
+ // Construct a 4x4 matrix containing the specified columns
+ //
+ inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+ // Construct a 4x4 matrix from a 3x4 transformation matrix
+ //
+ explicit inline Matrix4( const Transform3 & mat );
+
+ // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ //
+ inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+ // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ //
+ inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+ // Set all elements of a 4x4 matrix to the same scalar value
+ //
+ explicit inline Matrix4( float scalar );
+
+ // Assign one 4x4 matrix to another
+ //
+ inline Matrix4 & operator =( const Matrix4 & mat );
+
+ // Set the upper-left 3x3 submatrix
+ // NOTE:
+ // This function does not change the bottom row elements.
+ //
+ inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+ // Get the upper-left 3x3 submatrix of a 4x4 matrix
+ //
+ inline const Matrix3 getUpper3x3( ) const;
+
+ // Set translation component
+ // NOTE:
+ // This function does not change the bottom row elements.
+ //
+ inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+ // Get the translation component of a 4x4 matrix
+ //
+ inline const Vector3 getTranslation( ) const;
+
+ // Set column 0 of a 4x4 matrix
+ //
+ inline Matrix4 & setCol0( const Vector4 & col0 );
+
+ // Set column 1 of a 4x4 matrix
+ //
+ inline Matrix4 & setCol1( const Vector4 & col1 );
+
+ // Set column 2 of a 4x4 matrix
+ //
+ inline Matrix4 & setCol2( const Vector4 & col2 );
+
+ // Set column 3 of a 4x4 matrix
+ //
+ inline Matrix4 & setCol3( const Vector4 & col3 );
+
+ // Get column 0 of a 4x4 matrix
+ //
+ inline const Vector4 getCol0( ) const;
+
+ // Get column 1 of a 4x4 matrix
+ //
+ inline const Vector4 getCol1( ) const;
+
+ // Get column 2 of a 4x4 matrix
+ //
+ inline const Vector4 getCol2( ) const;
+
+ // Get column 3 of a 4x4 matrix
+ //
+ inline const Vector4 getCol3( ) const;
+
+ // Set the column of a 4x4 matrix referred to by the specified index
+ //
+ inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+ // Set the row of a 4x4 matrix referred to by the specified index
+ //
+ inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+ // Get the column of a 4x4 matrix referred to by the specified index
+ //
+ inline const Vector4 getCol( int col ) const;
+
+ // Get the row of a 4x4 matrix referred to by the specified index
+ //
+ inline const Vector4 getRow( int row ) const;
+
+ // Subscripting operator to set or get a column
+ //
+ inline Vector4 & operator []( int col );
+
+ // Subscripting operator to get a column
+ //
+ inline const Vector4 operator []( int col ) const;
+
+ // Set the element of a 4x4 matrix referred to by column and row indices
+ //
+ inline Matrix4 & setElem( int col, int row, float val );
+
+ // Get the element of a 4x4 matrix referred to by column and row indices
+ //
+ inline float getElem( int col, int row ) const;
+
+ // Add two 4x4 matrices
+ //
+ inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+ // Subtract a 4x4 matrix from another 4x4 matrix
+ //
+ inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+ // Negate all elements of a 4x4 matrix
+ //
+ inline const Matrix4 operator -( ) const;
+
+ // Multiply a 4x4 matrix by a scalar
+ //
+ inline const Matrix4 operator *( float scalar ) const;
+
+ // Multiply a 4x4 matrix by a 4-D vector
+ //
+ inline const Vector4 operator *( const Vector4 & vec ) const;
+
+ // Multiply a 4x4 matrix by a 3-D vector
+ //
+ inline const Vector4 operator *( const Vector3 & vec ) const;
+
+ // Multiply a 4x4 matrix by a 3-D point
+ //
+ inline const Vector4 operator *( const Point3 & pnt ) const;
+
+ // Multiply two 4x4 matrices
+ //
+ inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+ // Multiply a 4x4 matrix by a 3x4 transformation matrix
+ //
+ inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+ // Perform compound assignment and addition with a 4x4 matrix
+ //
+ inline Matrix4 & operator +=( const Matrix4 & mat );
+
+ // Perform compound assignment and subtraction by a 4x4 matrix
+ //
+ inline Matrix4 & operator -=( const Matrix4 & mat );
+
+ // Perform compound assignment and multiplication by a scalar
+ //
+ inline Matrix4 & operator *=( float scalar );
+
+ // Perform compound assignment and multiplication by a 4x4 matrix
+ //
+ inline Matrix4 & operator *=( const Matrix4 & mat );
+
+ // Perform compound assignment and multiplication by a 3x4 transformation matrix
+ //
+ inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+ // Construct an identity 4x4 matrix
+ //
+ static inline const Matrix4 identity( );
+
+ // Construct a 4x4 matrix to rotate around the x axis
+ //
+ static inline const Matrix4 rotationX( float radians );
+
+ // Construct a 4x4 matrix to rotate around the y axis
+ //
+ static inline const Matrix4 rotationY( float radians );
+
+ // Construct a 4x4 matrix to rotate around the z axis
+ //
+ static inline const Matrix4 rotationZ( float radians );
+
+ // Construct a 4x4 matrix to rotate around the x, y, and z axes
+ //
+ static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+ // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ //
+ static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
+
+ // Construct a rotation matrix from a unit-length quaternion
+ //
+ static inline const Matrix4 rotation( const Quat & unitQuat );
+
+ // Construct a 4x4 matrix to perform scaling
+ //
+ static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+ // Construct a 4x4 matrix to perform translation
+ //
+ static inline const Matrix4 translation( const Vector3 & translateVec );
+
+ // Construct viewing matrix based on eye position, position looked at, and up direction
+ //
+ static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+ // Construct a perspective projection matrix
+ //
+ static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+ // Construct a perspective projection matrix based on frustum
+ //
+ static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+ // Construct an orthographic projection matrix
+ //
+ static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+//
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+//
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+//
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+//
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE:
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+//
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE:
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0.
+//
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE:
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+//
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+//
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+//
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+ Vector3 mCol0;
+ Vector3 mCol1;
+ Vector3 mCol2;
+ Vector3 mCol3;
+
+public:
+ // Default constructor; does no initialization
+ //
+ inline Transform3( ) { };
+
+ // Copy a 3x4 transformation matrix
+ //
+ inline Transform3( const Transform3 & tfrm );
+
+ // Construct a 3x4 transformation matrix containing the specified columns
+ //
+ inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+ // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ //
+ inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+ // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ //
+ inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+ // Set all elements of a 3x4 transformation matrix to the same scalar value
+ //
+ explicit inline Transform3( float scalar );
+
+ // Assign one 3x4 transformation matrix to another
+ //
+ inline Transform3 & operator =( const Transform3 & tfrm );
+
+ // Set the upper-left 3x3 submatrix
+ //
+ inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+ // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ //
+ inline const Matrix3 getUpper3x3( ) const;
+
+ // Set translation component
+ //
+ inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+ // Get the translation component of a 3x4 transformation matrix
+ //
+ inline const Vector3 getTranslation( ) const;
+
+ // Set column 0 of a 3x4 transformation matrix
+ //
+ inline Transform3 & setCol0( const Vector3 & col0 );
+
+ // Set column 1 of a 3x4 transformation matrix
+ //
+ inline Transform3 & setCol1( const Vector3 & col1 );
+
+ // Set column 2 of a 3x4 transformation matrix
+ //
+ inline Transform3 & setCol2( const Vector3 & col2 );
+
+ // Set column 3 of a 3x4 transformation matrix
+ //
+ inline Transform3 & setCol3( const Vector3 & col3 );
+
+ // Get column 0 of a 3x4 transformation matrix
+ //
+ inline const Vector3 getCol0( ) const;
+
+ // Get column 1 of a 3x4 transformation matrix
+ //
+ inline const Vector3 getCol1( ) const;
+
+ // Get column 2 of a 3x4 transformation matrix
+ //
+ inline const Vector3 getCol2( ) const;
+
+ // Get column 3 of a 3x4 transformation matrix
+ //
+ inline const Vector3 getCol3( ) const;
+
+ // Set the column of a 3x4 transformation matrix referred to by the specified index
+ //
+ inline Transform3 & setCol( int col, const Vector3 & vec );
+
+ // Set the row of a 3x4 transformation matrix referred to by the specified index
+ //
+ inline Transform3 & setRow( int row, const Vector4 & vec );
+
+ // Get the column of a 3x4 transformation matrix referred to by the specified index
+ //
+ inline const Vector3 getCol( int col ) const;
+
+ // Get the row of a 3x4 transformation matrix referred to by the specified index
+ //
+ inline const Vector4 getRow( int row ) const;
+
+ // Subscripting operator to set or get a column
+ //
+ inline Vector3 & operator []( int col );
+
+ // Subscripting operator to get a column
+ //
+ inline const Vector3 operator []( int col ) const;
+
+ // Set the element of a 3x4 transformation matrix referred to by column and row indices
+ //
+ inline Transform3 & setElem( int col, int row, float val );
+
+ // Get the element of a 3x4 transformation matrix referred to by column and row indices
+ //
+ inline float getElem( int col, int row ) const;
+
+ // Multiply a 3x4 transformation matrix by a 3-D vector
+ //
+ inline const Vector3 operator *( const Vector3 & vec ) const;
+
+ // Multiply a 3x4 transformation matrix by a 3-D point
+ //
+ inline const Point3 operator *( const Point3 & pnt ) const;
+
+ // Multiply two 3x4 transformation matrices
+ //
+ inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+ // Perform compound assignment and multiplication by a 3x4 transformation matrix
+ //
+ inline Transform3 & operator *=( const Transform3 & tfrm );
+
+ // Construct an identity 3x4 transformation matrix
+ //
+ static inline const Transform3 identity( );
+
+ // Construct a 3x4 transformation matrix to rotate around the x axis
+ //
+ static inline const Transform3 rotationX( float radians );
+
+ // Construct a 3x4 transformation matrix to rotate around the y axis
+ //
+ static inline const Transform3 rotationY( float radians );
+
+ // Construct a 3x4 transformation matrix to rotate around the z axis
+ //
+ static inline const Transform3 rotationZ( float radians );
+
+ // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ //
+ static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+ // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ //
+ static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
+
+ // Construct a rotation matrix from a unit-length quaternion
+ //
+ static inline const Transform3 rotation( const Quat & unitQuat );
+
+ // Construct a 3x4 transformation matrix to perform scaling
+ //
+ static inline const Transform3 scale( const Vector3 & scaleVec );
+
+ // Construct a 3x4 transformation matrix to perform translation
+ //
+ static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE:
+// Faster than creating and multiplying a scale transformation matrix.
+//
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+//
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+//
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE:
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+//
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE:
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+//
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+//
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE:
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+//
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h b/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h
new file mode 100644
index 00000000000..5a4944a5500
--- /dev/null
+++ b/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h
@@ -0,0 +1,80 @@
+/*
+ Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms,
+ with or without modification, are permitted provided that the
+ following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Sony Computer Entertainment Inc nor the names
+ of its contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef AOS_VECTORMATH_BULLET_CONVERT_H
+#define AOS_VECTORMATH_BULLET_CONVERT_H
+
+
+///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH
+#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH)
+#include <vectormath_aos.h>
+#else
+#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
+#endif
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btMatrix3x3.h"
+
+inline Vectormath::Aos::Vector3 getVmVector3(const btVector3& bulletVec)
+{
+ return Vectormath::Aos::Vector3(bulletVec.getX(),bulletVec.getY(),bulletVec.getZ());
+}
+
+inline btVector3 getBtVector3(const Vectormath::Aos::Vector3& vmVec)
+{
+ return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
+}
+inline btVector3 getBtVector3(const Vectormath::Aos::Point3& vmVec)
+{
+ return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
+}
+
+inline Vectormath::Aos::Quat getVmQuat(const btQuaternion& bulletQuat)
+{
+ Vectormath::Aos::Quat vmQuat(bulletQuat.getX(),bulletQuat.getY(),bulletQuat.getZ(),bulletQuat.getW());
+ return vmQuat;
+}
+
+inline btQuaternion getBtQuat(const Vectormath::Aos::Quat& vmQuat)
+{
+ return btQuaternion (vmQuat.getX(),vmQuat.getY(),vmQuat.getZ(),vmQuat.getW());
+}
+
+inline Vectormath::Aos::Matrix3 getVmMatrix3(const btMatrix3x3& btMat)
+{
+ Vectormath::Aos::Matrix3 mat(
+ getVmVector3(btMat.getColumn(0)),
+ getVmVector3(btMat.getColumn(1)),
+ getVmVector3(btMat.getColumn(2)));
+ return mat;
+}
+
+
+#endif //AOS_VECTORMATH_BULLET_CONVERT_H