From ed59822857de7e7b41b33b79c306f5e9b8755c62 Mon Sep 17 00:00:00 2001 From: Joshua Leung Date: Thu, 17 Jun 2010 02:42:43 +0000 Subject: == SoC Bullet - Bullet Upgrade to 2.76 == Updated Blender's Bullet to 2.76 in this branch only. This update was done by: 1) deleting the contents of the existing extern/bullet2/src directory (leaving the .svn folder in place), 2) copy/pasting the contents of the bullet/src directory (from unzipped Bullet archive) into this newly cleared folder. Hopefully there aren't any patches that are still needed from the Bullet we had in source. --- Note: I didn't use Moguri's patch, since that was giving me compile errors with headers not being able to be found. [[Split portion of a mixed commit.]] --- extern/bullet2/BulletMultiThreaded/CMakeLists.txt | 92 + .../bullet2/BulletMultiThreaded/Makefile.original | 187 ++ extern/bullet2/BulletMultiThreaded/MiniCL.cpp | 517 ++++++ .../BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp | 74 + .../BulletMultiThreaded/MiniCLTask/MiniCLTask.h | 62 + .../BulletMultiThreaded/MiniCLTaskScheduler.cpp | 519 ++++++ .../BulletMultiThreaded/MiniCLTaskScheduler.h | 194 ++ .../BulletMultiThreaded/PlatformDefinitions.h | 84 + .../BulletMultiThreaded/PosixThreadSupport.cpp | 249 +++ .../BulletMultiThreaded/PosixThreadSupport.h | 124 ++ .../bullet2/BulletMultiThreaded/PpuAddressSpace.h | 20 + .../SequentialThreadSupport.cpp | 93 + .../BulletMultiThreaded/SequentialThreadSupport.h | 92 + .../SpuCollisionObjectWrapper.cpp | 48 + .../SpuCollisionObjectWrapper.h | 40 + .../SpuCollisionTaskProcess.cpp | 318 ++++ .../BulletMultiThreaded/SpuCollisionTaskProcess.h | 163 ++ .../SpuContactManifoldCollisionAlgorithm.cpp | 69 + .../SpuContactManifoldCollisionAlgorithm.h | 120 ++ .../bullet2/BulletMultiThreaded/SpuDoubleBuffer.h | 110 ++ extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp | 211 +++ extern/bullet2/BulletMultiThreaded/SpuFakeDma.h | 135 ++ .../SpuGatheringCollisionDispatcher.cpp | 251 +++ .../SpuGatheringCollisionDispatcher.h | 72 + .../BulletMultiThreaded/SpuLibspe2Support.cpp | 257 +++ .../BulletMultiThreaded/SpuLibspe2Support.h | 180 ++ .../SpuNarrowPhaseCollisionTask/Box.h | 172 ++ .../SpuCollisionShapes.cpp | 302 ++++ .../SpuCollisionShapes.h | 126 ++ .../SpuContactResult.cpp | 242 +++ .../SpuNarrowPhaseCollisionTask/SpuContactResult.h | 106 ++ .../SpuConvexPenetrationDepthSolver.h | 51 + .../SpuGatheringCollisionTask.cpp | 1381 +++++++++++++++ .../SpuGatheringCollisionTask.h | 140 ++ .../SpuNarrowPhaseCollisionTask/SpuLocalSupport.h | 19 + .../SpuMinkowskiPenetrationDepthSolver.cpp | 348 ++++ .../SpuMinkowskiPenetrationDepthSolver.h | 48 + .../SpuPreferredPenetrationDirections.h | 70 + .../SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp | 1155 ++++++++++++ .../SpuNarrowPhaseCollisionTask/boxBoxDistance.h | 66 + .../SpuNarrowPhaseCollisionTask/readme.txt | 1 + .../SpuSampleTask/SpuSampleTask.cpp | 214 +++ .../SpuSampleTask/SpuSampleTask.h | 54 + .../BulletMultiThreaded/SpuSampleTask/readme.txt | 1 + .../BulletMultiThreaded/SpuSampleTaskProcess.cpp | 222 +++ .../BulletMultiThreaded/SpuSampleTaskProcess.h | 153 ++ extern/bullet2/BulletMultiThreaded/SpuSync.h | 148 ++ .../BulletMultiThreaded/Win32ThreadSupport.cpp | 262 +++ .../BulletMultiThreaded/Win32ThreadSupport.h | 132 ++ .../BulletMultiThreaded/btGpu3DGridBroadphase.cpp | 590 ++++++ .../BulletMultiThreaded/btGpu3DGridBroadphase.h | 138 ++ .../btGpu3DGridBroadphaseSharedCode.h | 430 +++++ .../btGpu3DGridBroadphaseSharedDefs.h | 61 + .../btGpu3DGridBroadphaseSharedTypes.h | 67 + extern/bullet2/BulletMultiThreaded/btGpuDefines.h | 211 +++ .../BulletMultiThreaded/btGpuUtilsSharedCode.h | 55 + .../BulletMultiThreaded/btGpuUtilsSharedDefs.h | 52 + .../btParallelConstraintSolver.cpp | 74 + .../btParallelConstraintSolver.h | 42 + .../btThreadSupportInterface.cpp | 22 + .../BulletMultiThreaded/btThreadSupportInterface.h | 50 + .../vectormath/scalar/cpp/boolInVec.h | 225 +++ .../vectormath/scalar/cpp/floatInVec.h | 343 ++++ .../vectormath/scalar/cpp/mat_aos.h | 1630 +++++++++++++++++ .../vectormath/scalar/cpp/quat_aos.h | 433 +++++ .../vectormath/scalar/cpp/vec_aos.h | 1426 +++++++++++++++ .../vectormath/scalar/cpp/vectormath_aos.h | 1872 ++++++++++++++++++++ .../BulletMultiThreaded/vectormath2bullet.h | 80 + 68 files changed, 17495 insertions(+) create mode 100644 extern/bullet2/BulletMultiThreaded/CMakeLists.txt create mode 100644 extern/bullet2/BulletMultiThreaded/Makefile.original create mode 100644 extern/bullet2/BulletMultiThreaded/MiniCL.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h create mode 100644 extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h create mode 100644 extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h create mode 100644 extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h create mode 100644 extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h create mode 100644 extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuFakeDma.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h create mode 100644 extern/bullet2/BulletMultiThreaded/SpuSync.h create mode 100644 extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpuDefines.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h create mode 100644 extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h create mode 100644 extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h create mode 100644 extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp create mode 100644 extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h create mode 100644 extern/bullet2/BulletMultiThreaded/vectormath2bullet.h (limited to 'extern/bullet2/BulletMultiThreaded') diff --git a/extern/bullet2/BulletMultiThreaded/CMakeLists.txt b/extern/bullet2/BulletMultiThreaded/CMakeLists.txt new file mode 100644 index 00000000000..90f970afbfd --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/CMakeLists.txt @@ -0,0 +1,92 @@ +INCLUDE_DIRECTORIES( + ${BULLET_PHYSICS_SOURCE_DIR}/src + ${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/vectormath/scalar/cpp +) + +ADD_LIBRARY(BulletMultiThreaded + PlatformDefinitions.h + SpuFakeDma.cpp + SpuFakeDma.h + SpuDoubleBuffer.h + SpuLibspe2Support.cpp + SpuLibspe2Support.h + btThreadSupportInterface.cpp + btThreadSupportInterface.h + + Win32ThreadSupport.cpp + Win32ThreadSupport.h + PosixThreadSupport.cpp + PosixThreadSupport.h + SequentialThreadSupport.cpp + SequentialThreadSupport.h + SpuSampleTaskProcess.h + SpuSampleTaskProcess.cpp + + SpuCollisionObjectWrapper.cpp + SpuCollisionObjectWrapper.h + SpuCollisionTaskProcess.h + SpuCollisionTaskProcess.cpp + SpuGatheringCollisionDispatcher.h + SpuGatheringCollisionDispatcher.cpp + SpuContactManifoldCollisionAlgorithm.cpp + SpuContactManifoldCollisionAlgorithm.h + + btParallelConstraintSolver.cpp + btParallelConstraintSolver.h + + SpuNarrowPhaseCollisionTask/Box.h + SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp + SpuNarrowPhaseCollisionTask/boxBoxDistance.h + SpuNarrowPhaseCollisionTask/SpuContactResult.cpp + SpuNarrowPhaseCollisionTask/SpuContactResult.h + SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp + SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h + SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h + SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h + SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp + SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h + SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp + SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h + + +#Some GPU related stuff, mainly CUDA and perhaps OpenCL + btGpu3DGridBroadphase.cpp + btGpu3DGridBroadphase.h + btGpu3DGridBroadphaseSharedCode.h + btGpu3DGridBroadphaseSharedDefs.h + btGpu3DGridBroadphaseSharedTypes.h + btGpuDefines.h + btGpuUtilsSharedCode.h + btGpuUtilsSharedDefs.h + +#MiniCL provides a small subset of OpenCL + MiniCL.cpp + MiniCLTaskScheduler.cpp + MiniCLTaskScheduler.h + MiniCLTask/MiniCLTask.cpp + MiniCLTask/MiniCLTask.h + ../MiniCL/cl.h + ../MiniCL/cl_gl.h + ../MiniCL/cl_platform.h + ../MiniCL/cl_MiniCL_Defs.h +) + +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletCollision) +ENDIF (BUILD_SHARED_LIBS) + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + #INSTALL of other files requires CMake 2.6 + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF(INSTALL_EXTRA_LIBS) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletMultiThreaded DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletMultiThreaded DESTINATION lib) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (INSTALL_EXTRA_LIBS) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) \ No newline at end of file diff --git a/extern/bullet2/BulletMultiThreaded/Makefile.original b/extern/bullet2/BulletMultiThreaded/Makefile.original new file mode 100644 index 00000000000..1edc9811f8d --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/Makefile.original @@ -0,0 +1,187 @@ +__ARCH_BITS__ := 32 + +# define macros +NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask +SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask + +IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi) + +ifeq ("$(IBM_CELLSDK_VERSION)","3.0") + CELL_TOP ?= /opt/cell/sdk + CELL_SYSROOT := /opt/cell/sysroot +else + CELL_TOP ?= /opt/ibm/cell-sdk/prototype + CELL_SYSROOT := $(CELL_TOP)/sysroot +endif + + +USE_CCACHE=ccache +RM=rm -f +OUTDIR=./out +DEBUGFLAG=-DNDEBUG +LIBOUTDIR=../../lib/ibmsdk +COLLISIONDIR=../../src/BulletCollision +MATHDIR=../../src/LinearMath +ARCHITECTUREFLAG=-m$(__ARCH_BITS__) +ifeq "$(__ARCH_BITS__)" "64" + SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64 +else + SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ +endif + +SPU_DEFFLAGS+=-DUSE_PE_BOX_BOX + +SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc +SPU_INCLUDEDIR= -Ivectormath/scalar/cpp -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR) +#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h +SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h + +SPU_LFLAGS= -Wl,-N +SPU_LIBRARIES=-lstdc++ +SPU_EMBED=/usr/bin/ppu-embedspu +SPU_AR=/usr/bin/ar +SYMBOLNAME=spu_program + +ifeq "$(__ARCH_BITS__)" "64" + PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64 + PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc +else + PPU_DEFFLAGS= -DUSE_LIBSPE2 + PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc +endif + +PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h +PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR) +PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc +PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib +PPU_AR=/usr/bin/ar + +MakeOut : +# rm -f -R $(OUTDIR) ; mkdir $(OUTDIR) + @echo "usage: make spu, make ppu, make all, or make clean" +# SPU +SpuTaskFile : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp + +boxBoxDistance : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +SpuFakeDma : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SpuContactManifoldCollisionAlgorithm_spu : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp + +SpuCollisionShapes : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +SpuContactResult : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +#SpuGatheringCollisionTask : MakeOut +# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +SpuGjkPairDetector: MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +SpuMinkowskiPenetrationDepthSolver : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +SpuVoronoiSimplexSolver : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp + +#SpuLibspe2Support_spu : MakeOut +# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp + +## SPU-Bullet +btPersistentManifold : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp + +btOptimizedBvh : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp + +btCollisionObject : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp + +btTriangleCallback : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp + +btTriangleIndexVertexArray : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp + +btStridingMeshInterface : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp + +btAlignedAllocator : MakeOut + $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp + + +# PPU +SpuGatheringCollisionDispatcher : MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SequentialThreadSupport: MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SpuLibspe2Support: MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +btThreadSupportInterface: MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SpuCollisionTaskProcess : MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SpuContactManifoldCollisionAlgorithm : MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + +SpuSampleTaskProcess : MakeOut + $(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp + + + +spu : boxBoxDistance SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \ + SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \ + btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \ + btStridingMeshInterface btAlignedAllocator + $(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \ + $(OUTDIR)/SpuTaskFile.o \ + $(OUTDIR)/SpuFakeDma.o \ + $(OUTDIR)/boxBoxDistance.o \ + $(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \ + $(OUTDIR)/SpuContactResult.o \ + $(OUTDIR)/SpuCollisionShapes.o \ + $(OUTDIR)/SpuGjkPairDetector.o \ + $(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \ + $(OUTDIR)/SpuVoronoiSimplexSolver.o \ + $(OUTDIR)/btPersistentManifold.o \ + $(OUTDIR)/btTriangleCallback.o \ + $(OUTDIR)/btTriangleIndexVertexArray.o \ + $(OUTDIR)/btStridingMeshInterface.o \ + $(OUTDIR)/btAlignedAllocator.o \ + $(SPU_LFLAGS) $(SPU_LIBRARIES) + +spu-embed : spu + $(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o + $(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o + + + +ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \ + SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess + $(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \ + $(OUTDIR)/SpuCollisionTaskProcess.o \ + $(OUTDIR)/SpuSampleTaskProcess.o \ + $(OUTDIR)/SpuGatheringCollisionDispatcher.o \ + $(OUTDIR)/SpuLibspe2Support.o \ + $(OUTDIR)/btThreadSupportInterface.o \ + $(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o + +all : spu-embed ppu + +clean: + $(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a + + + + diff --git a/extern/bullet2/BulletMultiThreaded/MiniCL.cpp b/extern/bullet2/BulletMultiThreaded/MiniCL.cpp new file mode 100644 index 00000000000..b7f5a699312 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/MiniCL.cpp @@ -0,0 +1,517 @@ +/* + Copyright (C) 2010 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#include "MiniCL/cl.h" +#define __PHYSICS_COMMON_H__ 1 +#ifdef _WIN32 +#include "BulletMultiThreaded/Win32ThreadSupport.h" +#endif + +#include "BulletMultiThreaded/SequentialThreadSupport.h" +#include "MiniCLTaskScheduler.h" +#include "MiniCLTask/MiniCLTask.h" +#include "LinearMath/btMinMax.h" + +//#define DEBUG_MINICL_KERNELS 1 + + + + +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( + cl_device_id device , + cl_device_info param_name , + size_t param_value_size , + void * param_value , + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_DEVICE_NAME: + { + char deviceName[] = "CPU"; + unsigned int nameLen = strlen(deviceName)+1; + assert(param_value_size>strlen(deviceName)); + if (nameLen < param_value_size) + { + const char* cpuName = "CPU"; + sprintf((char*)param_value,"%s",cpuName); + } else + { + printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); + } + break; + } + case CL_DEVICE_TYPE: + { + if (param_value_size>=sizeof(cl_device_type)) + { + cl_device_type* deviceType = (cl_device_type*)param_value; + *deviceType = CL_DEVICE_TYPE_CPU; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); + } + break; + } + case CL_DEVICE_MAX_COMPUTE_UNITS: + { + if (param_value_size>=sizeof(cl_uint)) + { + cl_uint* numUnits = (cl_uint*)param_value; + *numUnits= 4; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + + break; + } + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + { + size_t workitem_size[3]; + + if (param_value_size>=sizeof(workitem_size)) + { + size_t* workItemSize = (size_t*)param_value; + workItemSize[0] = 64; + workItemSize[1] = 24; + workItemSize[2] = 16; + } else + { + printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + } + break; + } + case CL_DEVICE_MAX_CLOCK_FREQUENCY: + { + cl_uint* clock_frequency = (cl_uint*)param_value; + *clock_frequency = 3*1024; + break; + } + default: + { + printf("error: unsupported param_name:%d\n",param_name); + } + } + + + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + + +CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0 +{ + return 0; +} + + +// Enqueued Commands APIs +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue , + cl_mem buffer , + cl_bool /* blocking_read */, + size_t offset , + size_t cb , + void * ptr , + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + + ///wait for all work items to be completed + scheduler->flush(); + + memcpy(ptr,(char*)buffer + offset,cb); + return 0; +} + + +CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + + return 0; +} + + +// Program Object APIs +CL_API_ENTRY cl_program +clCreateProgramWithSource(cl_context context , + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + *errcode_ret = CL_SUCCESS; + return (cl_program)context; +} + +CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue , + cl_mem buffer , + cl_bool /* blocking_read */, + size_t offset, + size_t cb , + const void * ptr , + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + + ///wait for all work items to be completed + scheduler->flush(); + + memcpy((char*)buffer + offset, ptr,cb); + return 0; +} + +CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue) +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue; + ///wait for all work items to be completed + scheduler->flush(); + return 0; +} + + +CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel clKernel , + cl_uint work_dim , + const size_t * /* global_work_offset */, + const size_t * global_work_size , + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0 +{ + + + MiniCLKernel* kernel = (MiniCLKernel*) clKernel; + for (unsigned int ii=0;iim_scheduler->getMaxNumOutstandingTasks(); + int numWorkItems = global_work_size[ii]; + +// //at minimum 64 work items per task +// int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask); + int numWorkItemsPerTask = numWorkItems / maxTask; + if (!numWorkItemsPerTask) numWorkItemsPerTask = 1; + + for (int t=0;tm_scheduler->issueTask(t, endIndex, kernel); + t = endIndex; + } + } +/* + + void* bla = 0; + + scheduler->issueTask(bla,2,3); + scheduler->flush(); + + */ + + return 0; +} + +#define LOCAL_BUF_SIZE 32768 +static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16]; +static int* spLocalBufCurr = NULL; +static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call +static void* localBufMalloc(int size) +{ + int size16 = (size + 15) >> 4; // in 16-byte units + if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE) + { // reset + spLocalBufCurr = sLocalMemBuf; + while((long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes + sLocalBufUsed = 0; + } + void* ret = spLocalBufCurr; + spLocalBufCurr += size16 * 4; + sLocalBufUsed += size; + return ret; +} + + + +CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel , + cl_uint arg_index , + size_t arg_size , + const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLKernel* kernel = (MiniCLKernel* ) clKernel; + btAssert(arg_size <= MINICL_MAX_ARGLENGTH); + if (arg_index>MINI_CL_MAX_ARG) + { + printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG); + } else + { +// if (arg_size>=MINICL_MAX_ARGLENGTH) + if (arg_size != MINICL_MAX_ARGLENGTH) + { + printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH); + } + else + { + if(arg_value == NULL) + { // this is only for __local memory qualifier + void* ptr = localBufMalloc(arg_size); + kernel->m_argData[arg_index] = ptr; + } + else + { + memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size); + } + kernel->m_argSizes[arg_index] = arg_size; + if(arg_index >= kernel->m_numArgs) + { + kernel->m_numArgs = arg_index + 1; + kernel->updateLauncher(); + } + } + } + return 0; +} + +// Kernel Object APIs +CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program , + const char * kernel_name , + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program; + MiniCLKernel* kernel = new MiniCLKernel(); + int nameLen = strlen(kernel_name); + if(nameLen >= MINI_CL_MAX_KERNEL_NAME) + { + *errcode_ret = CL_INVALID_KERNEL_NAME; + return NULL; + } + strcpy(kernel->m_name, kernel_name); + kernel->m_numArgs = 0; + + //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name); + //if (kernel->m_kernelProgramCommandId>=0) + //{ + // *errcode_ret = CL_SUCCESS; + //} else + //{ + // *errcode_ret = CL_INVALID_KERNEL_NAME; + //} + kernel->m_scheduler = scheduler; + if(kernel->registerSelf() == NULL) + { + *errcode_ret = CL_INVALID_KERNEL_NAME; + return NULL; + } + else + { + *errcode_ret = CL_SUCCESS; + } + + return (cl_kernel)kernel; + +} + + +CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (*pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0 +{ + return CL_SUCCESS; +} + +CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context , + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + return (cl_program)context; +} + + +// Memory Object APIs +CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, + cl_mem_flags flags , + size_t size, + void * host_ptr , + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + cl_mem buf = (cl_mem)malloc(size); + if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr) + { + memcpy(buf,host_ptr,size); + } + *errcode_ret = 0; + return buf; +} + +// Command Queue APIs +CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context , + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + *errcode_ret = 0; + return (cl_command_queue) context; +} + +extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, + cl_context_info param_name , + size_t param_value_size , + void * param_value, + size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + + switch (param_name) + { + case CL_CONTEXT_DEVICES: + { + if (!param_value_size) + { + *param_value_size_ret = 13; + } else + { + const char* testName = "MiniCL_Test."; + sprintf((char*)param_value,"%s",testName); + } + break; + }; + default: + { + printf("unsupported\n"); + } + } + + return 0; +} + +CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 +{ + int maxNumOutstandingTasks = 4; +// int maxNumOutstandingTasks = 2; +// int maxNumOutstandingTasks = 1; + gMiniCLNumOutstandingTasks = maxNumOutstandingTasks; + const int maxNumOfThreadSupports = 8; + static int sUniqueThreadSupportIndex = 0; + static char* sUniqueThreadSupportName[maxNumOfThreadSupports] = + { + "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" + }; + +#ifdef DEBUG_MINICL_KERNELS + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); +#else + +#if _WIN32 + btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports); + Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( +// "MiniCL", + sUniqueThreadSupportName[sUniqueThreadSupportIndex++], + processMiniCLTask, //processCollisionTask, + createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory, + maxNumOutstandingTasks)); +#else + ///todo: add posix thread support for other platforms + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); +#endif + +#endif //DEBUG_MINICL_KERNELS + + + MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); + + *errcode_ret = 0; + return (cl_context)scheduler; +} + +CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0 +{ + + MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context; + + btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface(); + delete scheduler; + delete threadSupport; + + return 0; +} +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0 +{ + return CL_SUCCESS; +} + + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel , + cl_device_id /* device */, + cl_kernel_work_group_info wgi/* param_name */, + size_t sz /* param_value_size */, + void * ptr /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 +{ + if((wgi == CL_KERNEL_WORK_GROUP_SIZE) + &&(sz == sizeof(int)) + &&(ptr != NULL)) + { + MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel; + MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler; + *((int*)ptr) = scheduler->getMaxNumOutstandingTasks(); + return CL_SUCCESS; + } + else + { + return CL_INVALID_VALUE; + } +} diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp new file mode 100644 index 00000000000..babb1d24af5 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp @@ -0,0 +1,74 @@ +/* +Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#include "MiniCLTask.h" +#include "BulletMultiThreaded/PlatformDefinitions.h" +#include "BulletMultiThreaded/SpuFakeDma.h" +#include "LinearMath/btMinMax.h" +#include "MiniCLTask.h" +#include "BulletMultiThreaded/MiniCLTaskScheduler.h" + + +#ifdef __SPU__ +#include +#else +#include +#define spu_printf printf +#endif + +int gMiniCLNumOutstandingTasks = 0; + +struct MiniCLTask_LocalStoreMemory +{ + +}; + + +//-- MAIN METHOD +void processMiniCLTask(void* userPtr, void* lsMemory) +{ + // BT_PROFILE("processSampleTask"); + + MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory; + + MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr; + MiniCLTaskDesc& taskDesc = *taskDescPtr; + + for (unsigned int i=taskDesc.m_firstWorkUnit;im_launcher(&taskDesc, i); + } + +// printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit); + +} + + +#if defined(__CELLOS_LV2__) || defined (LIBSPE2) + +ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory gLocalStoreMemory); + +void* createMiniCLLocalStoreMemory() +{ + return &gLocalStoreMemory; +} +#else +void* createMiniCLLocalStoreMemory() +{ + return new MiniCLTask_LocalStoreMemory; +}; + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h new file mode 100644 index 00000000000..7e78be0855e --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h @@ -0,0 +1,62 @@ +/* +Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef MINICL__TASK_H +#define MINICL__TASK_H + +#include "BulletMultiThreaded/PlatformDefinitions.h" +#include "LinearMath/btScalar.h" + +#include "LinearMath/btAlignedAllocator.h" + + +#define MINICL_MAX_ARGLENGTH (sizeof(void*)) +#define MINI_CL_MAX_ARG 16 +#define MINI_CL_MAX_KERNEL_NAME 256 + +struct MiniCLKernel; + +ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + MiniCLTaskDesc() + { + for (int i=0;i + +#ifdef __SPU__ + + + +void SampleThreadFunc(void* userPtr,void* lsMemory) +{ + //do nothing + printf("hello world\n"); +} + + +void* SamplelsMemoryFunc() +{ + //don't create local store memory, just return 0 + return 0; +} + + +#else + + +#include "BulletMultiThreaded/btThreadSupportInterface.h" + +//# include "SPUAssert.h" +#include + +#include "MiniCL/cl_platform.h" + +extern "C" { + extern char SPU_SAMPLE_ELF_SYMBOL[]; +} + + +MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks) +:m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(maxNumOutstandingTasks) +{ + + m_taskBusy.resize(m_maxNumOutstandingTasks); + m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks); + + m_kernels.resize(0); + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + + m_initialized = false; + + m_threadInterface->startSPU(); + + +} + +MiniCLTaskScheduler::~MiniCLTaskScheduler() +{ + m_threadInterface->stopSPU(); + +} + + + +void MiniCLTaskScheduler::initialize() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("MiniCLTaskScheduler::initialize()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_initialized = true; + +} + + +void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel) +{ + +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask); +#endif //DEBUG_SPU_TASK_SCHEDULING + + m_taskBusy[m_currentTask] = true; + m_numBusyTasks++; + + MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask]; + { + // send task description in event message + taskDesc.m_firstWorkUnit = firstWorkUnit; + taskDesc.m_lastWorkUnit = lastWorkUnit; + taskDesc.m_kernel = kernel; + //some bookkeeping to recognize finished tasks + taskDesc.m_taskId = m_currentTask; + +// for (int i=0;im_numArgs; i++) + { + taskDesc.m_argSizes[i] = kernel->m_argSizes[i]; + if (taskDesc.m_argSizes[i]) + { + taskDesc.m_argData[i] = kernel->m_argData[i]; +// memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]); + } + } + } + + + m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask); + + // if all tasks busy, wait for spu event to clear the task. + + if (m_numBusyTasks >= m_maxNumOutstandingTasks) + { + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + + //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + // find new task buffer + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + break; + } + } +} + + +///Optional PPU-size post processing for each task +void MiniCLTaskScheduler::postProcess(int taskId, int outputSize) +{ + +} + + +void MiniCLTaskScheduler::flush() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("\nSpuCollisionTaskProcess::flush()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + + // all tasks are issued, wait for all tasks to be complete + while(m_numBusyTasks > 0) + { +// Consolidating SPU code + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + } + + //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + +} + + + +typedef void (*MiniCLKernelLauncher0)(int); +typedef void (*MiniCLKernelLauncher1)(void*, int); +typedef void (*MiniCLKernelLauncher2)(void*, void*, int); +typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); +typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int); + + +static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid); +} +static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + guid); +} +static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + guid); +} +static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + guid); +} +static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + guid); +} +static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + guid); +} +static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + guid); +} +static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + guid); +} +static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + guid); +} +static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))( taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + guid); +} +static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + guid); +} +static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + guid); +} +static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + guid); +} +static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + guid); +} +static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + guid); +} +static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + taskDesc->m_argData[14], + guid); +} +static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid) +{ + ((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], + taskDesc->m_argData[1], + taskDesc->m_argData[2], + taskDesc->m_argData[3], + taskDesc->m_argData[4], + taskDesc->m_argData[5], + taskDesc->m_argData[6], + taskDesc->m_argData[7], + taskDesc->m_argData[8], + taskDesc->m_argData[9], + taskDesc->m_argData[10], + taskDesc->m_argData[11], + taskDesc->m_argData[12], + taskDesc->m_argData[13], + taskDesc->m_argData[14], + taskDesc->m_argData[15], + guid); +} + +static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] = +{ + kernelLauncher0, + kernelLauncher1, + kernelLauncher2, + kernelLauncher3, + kernelLauncher4, + kernelLauncher5, + kernelLauncher6, + kernelLauncher7, + kernelLauncher8, + kernelLauncher9, + kernelLauncher10, + kernelLauncher11, + kernelLauncher12, + kernelLauncher13, + kernelLauncher14, + kernelLauncher15, + kernelLauncher16 +}; + +void MiniCLKernel::updateLauncher() +{ + m_launcher = spLauncherList[m_numArgs]; +} + +struct MiniCLKernelDescEntry +{ + void* pCode; + char* pName; +}; +static MiniCLKernelDescEntry spKernelDesc[256]; +static int sNumKernelDesc = 0; + +MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName) +{ + for(int i = 0; i < sNumKernelDesc; i++) + { + if(!strcmp(pName, spKernelDesc[i].pName)) + { // already registered + btAssert(spKernelDesc[i].pCode == pCode); + return; + } + } + spKernelDesc[sNumKernelDesc].pCode = pCode; + spKernelDesc[sNumKernelDesc].pName = pName; + sNumKernelDesc++; +} + + +MiniCLKernel* MiniCLKernel::registerSelf() +{ + m_scheduler->registerKernel(this); + for(int i = 0; i < sNumKernelDesc; i++) + { + if(!strcmp(m_name, spKernelDesc[i].pName)) + { + m_pCode = spKernelDesc[i].pCode; + return this; + } + } + return NULL; +} + +#endif + + +#endif //USE_SAMPLE_PROCESS diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h new file mode 100644 index 00000000000..3061a713436 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h @@ -0,0 +1,194 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#ifndef MINICL_TASK_SCHEDULER_H +#define MINICL_TASK_SCHEDULER_H + +#include + + +#include "BulletMultiThreaded/PlatformDefinitions.h" + +#include + +#include "LinearMath/btAlignedObjectArray.h" + + +#include "MiniCLTask/MiniCLTask.h" + +//just add your commands here, try to keep them globally unique for debugging purposes +#define CMD_SAMPLE_TASK_COMMAND 10 + +struct MiniCLKernel; + +/// MiniCLTaskScheduler handles SPU processing of collision pairs. +/// When PPU issues a task, it will look for completed task buffers +/// PPU will do postprocessing, dependent on workunit output (not likely) +class MiniCLTaskScheduler +{ + // track task buffers that are being used, and total busy tasks + btAlignedObjectArray m_taskBusy; + btAlignedObjectArray m_spuSampleTaskDesc; + + + btAlignedObjectArray m_kernels; + + + int m_numBusyTasks; + + // the current task and the current entry to insert a new work unit + int m_currentTask; + + bool m_initialized; + + void postProcess(int taskId, int outputSize); + + class btThreadSupportInterface* m_threadInterface; + + int m_maxNumOutstandingTasks; + + + +public: + MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks); + + ~MiniCLTaskScheduler(); + + ///call initialize in the beginning of the frame, before addCollisionPairToTask + void initialize(); + + void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel); + + ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished + void flush(); + + class btThreadSupportInterface* getThreadSupportInterface() + { + return m_threadInterface; + } + + int findProgramCommandIdByName(const char* programName) const; + + int getMaxNumOutstandingTasks() const + { + return m_maxNumOutstandingTasks; + } + + void registerKernel(MiniCLKernel* kernel) + { + m_kernels.push_back(kernel); + } +}; + +typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid); + +struct MiniCLKernel +{ + MiniCLTaskScheduler* m_scheduler; + +// int m_kernelProgramCommandId; + + char m_name[MINI_CL_MAX_KERNEL_NAME]; + unsigned int m_numArgs; + kernelLauncherCB m_launcher; + void* m_pCode; + void updateLauncher(); + MiniCLKernel* registerSelf(); + + void* m_argData[MINI_CL_MAX_ARG]; + int m_argSizes[MINI_CL_MAX_ARG]; +}; + + +#if defined(USE_LIBSPE2) && defined(__SPU__) +////////////////////MAIN///////////////////////////// +#include "../SpuLibspe2Support.h" +#include +#include +#include + +void * SamplelsMemoryFunc(); +void SampleThreadFunc(void* userPtr,void* lsMemory); + +//#define DEBUG_LIBSPE2_MAINLOOP + +int main(unsigned long long speid, addr64 argp, addr64 envp) +{ + printf("SPU is up \n"); + + ATTRIBUTE_ALIGNED128(btSpuStatus status); + ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ; + unsigned int received_message = Spu_Mailbox_Event_Nothing; + bool shutdown = false; + + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + status.m_status = Spu_Status_Free; + status.m_lsMemory.p = SamplelsMemoryFunc(); + + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + while (!shutdown) + { + received_message = spu_read_in_mbox(); + + + + switch(received_message) + { + case Spu_Mailbox_Event_Shutdown: + shutdown = true; + break; + case Spu_Mailbox_Event_Task: + // refresh the status +#ifdef DEBUG_LIBSPE2_MAINLOOP + printf("SPU recieved Task \n"); +#endif //DEBUG_LIBSPE2_MAINLOOP + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + btAssert(status.m_status==Spu_Status_Occupied); + + cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + SampleThreadFunc((void*)&taskDesc, reinterpret_cast (taskDesc.m_mainMemoryPtr) ); + break; + case Spu_Mailbox_Event_Nothing: + default: + break; + } + + // set to status free and wait for next task + status.m_status = Spu_Status_Free; + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + } + return 0; +} +////////////////////////////////////////////////////// +#endif + + + +#endif // MINICL_TASK_SCHEDULER_H + diff --git a/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h b/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h new file mode 100644 index 00000000000..16362f4bce3 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h @@ -0,0 +1,84 @@ +#ifndef TYPE_DEFINITIONS_H +#define TYPE_DEFINITIONS_H + +///This file provides some platform/compiler checks for common definitions + +#ifdef _WIN32 + +typedef union +{ + unsigned int u; + void *p; +} addr64; + +#define USE_WIN32_THREADING 1 + + #if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300) + #else + #endif //__MINGW32__ + + typedef unsigned char uint8_t; +#ifndef __PHYSICS_COMMON_H__ +#ifndef __BT_SKIP_UINT64_H + typedef unsigned long int uint64_t; +#endif //__BT_SKIP_UINT64_H + typedef unsigned int uint32_t; +#endif //__PHYSICS_COMMON_H__ + typedef unsigned short uint16_t; + + #include + #define memalign(alignment, size) malloc(size); + +#include //memcpy + + + + #include + #define spu_printf printf + +#else + #include + #include + #include //for memcpy + +#if defined (__CELLOS_LV2__) + // Playstation 3 Cell SDK +#include + +#else + // posix system + +#define USE_PTHREADS (1) + +#ifdef USE_LIBSPE2 +#include +#define spu_printf printf +#define DWORD unsigned int + + typedef union + { + unsigned long long ull; + unsigned int ui[2]; + void *p; + } addr64; + + +#else + +#include +#define spu_printf printf + +#endif // USE_LIBSPE2 + +#endif //__CELLOS_LV2__ + +#endif + + +/* Included here because we need uint*_t typedefs */ +#include "PpuAddressSpace.h" + +#endif //TYPE_DEFINITIONS_H + + + diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp new file mode 100644 index 00000000000..540f0dcf106 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp @@ -0,0 +1,249 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include +#include "PosixThreadSupport.h" +#ifdef USE_PTHREADS +#include +#include + +#include "SpuCollisionTaskProcess.h" +#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" + +#define checkPThreadFunction(returnValue) \ + if(0 != returnValue) { \ + printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \ + } + +// The number of threads should be equal to the number of available cores +// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor. + +// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +// Setup and initialize SPU/CELL/Libspe2 +PosixThreadSupport::PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo) +{ + startThreads(threadConstructionInfo); +} + +// cleanup/shutdown Libspe2 +PosixThreadSupport::~PosixThreadSupport() +{ + stopSPU(); +} + +#if (defined (__APPLE__)) +#define NAMED_SEMAPHORES +#endif + +// this semaphore will signal, if and how many threads are finished with their work +static sem_t* mainSemaphore; + +static sem_t* createSem(const char* baseName) +{ + static int semCount = 0; +#ifdef NAMED_SEMAPHORES + /// Named semaphore begin + char name[32]; + snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++); + sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0); + if (tempSem != reinterpret_cast(SEM_FAILED)) + { + //printf("Created \"%s\" Semaphore %x\n", name, tempSem); + } + else + { + //printf("Error creating Semaphore %d\n", errno); + exit(-1); + } + /// Named semaphore end +#else + sem_t* tempSem = new sem_t; + checkPThreadFunction(sem_init(tempSem, 0, 0)); +#endif + return tempSem; +} + +static void destroySem(sem_t* semaphore) +{ +#ifdef NAMED_SEMAPHORES + checkPThreadFunction(sem_close(semaphore)); +#else + checkPThreadFunction(sem_destroy(semaphore)); + delete semaphore; +#endif +} + +static void *threadFunction(void *argument) +{ + + PosixThreadSupport::btSpuStatus* status = (PosixThreadSupport::btSpuStatus*)argument; + + + while (1) + { + checkPThreadFunction(sem_wait(status->startSemaphore)); + + void* userPtr = status->m_userPtr; + + if (userPtr) + { + btAssert(status->m_status); + status->m_userThreadFunc(userPtr,status->m_lsMemory); + status->m_status = 2; + checkPThreadFunction(sem_post(mainSemaphore)); + status->threadUsed++; + } else { + //exit Thread + status->m_status = 3; + checkPThreadFunction(sem_post(mainSemaphore)); + printf("Thread with taskId %i exiting\n",status->m_taskId); + break; + } + + } + + printf("Thread TERMINATED\n"); + return 0; + +} + +///send messages to SPUs +void PosixThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId) +{ + /// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc); + + ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished + + + + switch (uiCommand) + { + case CMD_GATHER_AND_PROCESS_PAIRLIST: + { + btSpuStatus& spuStatus = m_activeSpuStatus[taskId]; + btAssert(taskId >= 0); + btAssert(taskId < m_activeSpuStatus.size()); + + spuStatus.m_commandId = uiCommand; + spuStatus.m_status = 1; + spuStatus.m_userPtr = (void*)uiArgument0; + + // fire event to start new task + checkPThreadFunction(sem_post(spuStatus.startSemaphore)); + break; + } + default: + { + ///not implemented + btAssert(0); + } + + }; + + +} + + +///check for messages from SPUs +void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) +{ + ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response + + ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback' + + + btAssert(m_activeSpuStatus.size()); + + // wait for any of the threads to finish + checkPThreadFunction(sem_wait(mainSemaphore)); + + // get at least one thread which has finished + size_t last = -1; + + for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) { + if(2 == m_activeSpuStatus[t].m_status) { + last = t; + break; + } + } + + btSpuStatus& spuStatus = m_activeSpuStatus[last]; + + btAssert(spuStatus.m_status > 1); + spuStatus.m_status = 0; + + // need to find an active spu + btAssert(last >= 0); + + *puiArgument0 = spuStatus.m_taskId; + *puiArgument1 = spuStatus.m_status; +} + + + +void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstructionInfo) +{ + printf("%s creating %i threads.\n", __FUNCTION__, threadConstructionInfo.m_numThreads); + m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads); + + mainSemaphore = createSem("main"); + + for (int i=0;i < threadConstructionInfo.m_numThreads;i++) + { + printf("starting thread %d\n",i); + + btSpuStatus& spuStatus = m_activeSpuStatus[i]; + + spuStatus.startSemaphore = createSem("threadLocal"); + + checkPThreadFunction(pthread_create(&spuStatus.thread, NULL, &threadFunction, (void*)&spuStatus)); + + spuStatus.m_userPtr=0; + + spuStatus.m_taskId = i; + spuStatus.m_commandId = 0; + spuStatus.m_status = 0; + spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc(); + spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; + spuStatus.threadUsed = 0; + + printf("started thread %d \n",i); + + } + +} + +void PosixThreadSupport::startSPU() +{ +} + + +///tell the task scheduler we are done with the SPU tasks +void PosixThreadSupport::stopSPU() +{ + for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) { + btSpuStatus& spuStatus = m_activeSpuStatus[t]; + printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed); + + destroySem(spuStatus.startSemaphore); + checkPThreadFunction(pthread_cancel(spuStatus.thread)); + } + destroySem(mainSemaphore); + + m_activeSpuStatus.clear(); +} + +#endif // USE_PTHREADS + diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h new file mode 100644 index 00000000000..7cc49115b4b --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h @@ -0,0 +1,124 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "LinearMath/btScalar.h" +#include "PlatformDefinitions.h" + +#ifdef USE_PTHREADS //platform specific defines are defined in PlatformDefinitions.h +#include +#include + +#ifndef POSIX_THREAD_SUPPORT_H +#define POSIX_THREAD_SUPPORT_H + +#include "LinearMath/btAlignedObjectArray.h" + +#include "btThreadSupportInterface.h" + + +typedef void (*PosixThreadFunc)(void* userPtr,void* lsMemory); +typedef void* (*PosixlsMemorySetupFunc)(); + +// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +class PosixThreadSupport : public btThreadSupportInterface +{ +public: + typedef enum sStatus { + STATUS_BUSY, + STATUS_READY, + STATUS_FINISHED + } Status; + + // placeholder, until libspe2 support is there + struct btSpuStatus + { + uint32_t m_taskId; + uint32_t m_commandId; + uint32_t m_status; + + PosixThreadFunc m_userThreadFunc; + void* m_userPtr; //for taskDesc etc + void* m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc + + pthread_t thread; + sem_t* startSemaphore; + + unsigned long threadUsed; + }; +private: + + btAlignedObjectArray m_activeSpuStatus; +public: + ///Setup and initialize SPU/CELL/Libspe2 + + + + struct ThreadConstructionInfo + { + ThreadConstructionInfo(char* uniqueName, + PosixThreadFunc userThreadFunc, + PosixlsMemorySetupFunc lsMemoryFunc, + int numThreads=1, + int threadStackSize=65535 + ) + :m_uniqueName(uniqueName), + m_userThreadFunc(userThreadFunc), + m_lsMemoryFunc(lsMemoryFunc), + m_numThreads(numThreads), + m_threadStackSize(threadStackSize) + { + + } + + char* m_uniqueName; + PosixThreadFunc m_userThreadFunc; + PosixlsMemorySetupFunc m_lsMemoryFunc; + int m_numThreads; + int m_threadStackSize; + + }; + + PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo); + +///cleanup/shutdown Libspe2 + virtual ~PosixThreadSupport(); + + void startThreads(ThreadConstructionInfo& threadInfo); + + +///send messages to SPUs + virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1); + +///check for messages from SPUs + virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1); + +///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + virtual void startSPU(); + +///tell the task scheduler we are done with the SPU tasks + virtual void stopSPU(); + + virtual void setNumTasks(int numTasks) {} + + virtual int getNumTasks() const + { + return m_activeSpuStatus.size(); + } +}; + +#endif // POSIX_THREAD_SUPPORT_H + +#endif // USE_PTHREADS diff --git a/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h b/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h new file mode 100644 index 00000000000..f36fdfb3cd7 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h @@ -0,0 +1,20 @@ +#ifndef __PPU_ADDRESS_SPACE_H +#define __PPU_ADDRESS_SPACE_H + + +#ifdef _WIN32 +//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions +#pragma warning (disable: 4311) +#pragma warning (disable: 4312) +#endif //_WIN32 + +#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64) +typedef uint64_t ppu_address_t; +#else + +typedef uint32_t ppu_address_t; + +#endif + +#endif + diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp new file mode 100644 index 00000000000..4e9c822bbc0 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp @@ -0,0 +1,93 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SequentialThreadSupport.h" + + +#include "SpuCollisionTaskProcess.h" +#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" + +SequentialThreadSupport::SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo) +{ + startThreads(threadConstructionInfo); +} + +///cleanup/shutdown Libspe2 +SequentialThreadSupport::~SequentialThreadSupport() +{ + stopSPU(); +} + +#include + +///send messages to SPUs +void SequentialThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId) +{ + switch (uiCommand) + { + case CMD_GATHER_AND_PROCESS_PAIRLIST: + { + btSpuStatus& spuStatus = m_activeSpuStatus[0]; + spuStatus.m_userPtr=(void*)uiArgument0; + spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory); + } + break; + default: + { + ///not implemented + btAssert(0 && "Not implemented"); + } + + }; + + +} + +///check for messages from SPUs +void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) +{ + btAssert(m_activeSpuStatus.size()); + btSpuStatus& spuStatus = m_activeSpuStatus[0]; + *puiArgument0 = spuStatus.m_taskId; + *puiArgument1 = spuStatus.m_status; +} + +void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo) +{ + m_activeSpuStatus.resize(1); + printf("STS: Not starting any threads\n"); + btSpuStatus& spuStatus = m_activeSpuStatus[0]; + spuStatus.m_userPtr = 0; + spuStatus.m_taskId = 0; + spuStatus.m_commandId = 0; + spuStatus.m_status = 0; + spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc(); + spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; + printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName); +} + +void SequentialThreadSupport::startSPU() +{ +} + +void SequentialThreadSupport::stopSPU() +{ + m_activeSpuStatus.clear(); +} + +void SequentialThreadSupport::setNumTasks(int numTasks) +{ + printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks); +} diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h new file mode 100644 index 00000000000..4256ebd2aa9 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h @@ -0,0 +1,92 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "LinearMath/btScalar.h" +#include "PlatformDefinitions.h" + + +#ifndef SEQUENTIAL_THREAD_SUPPORT_H +#define SEQUENTIAL_THREAD_SUPPORT_H + +#include "LinearMath/btAlignedObjectArray.h" + +#include "btThreadSupportInterface.h" + +typedef void (*SequentialThreadFunc)(void* userPtr,void* lsMemory); +typedef void* (*SequentiallsMemorySetupFunc)(); + + + +///The SequentialThreadSupport is a portable non-parallel implementation of the btThreadSupportInterface +///This is useful for debugging and porting SPU Tasks to other platforms. +class SequentialThreadSupport : public btThreadSupportInterface +{ +public: + struct btSpuStatus + { + uint32_t m_taskId; + uint32_t m_commandId; + uint32_t m_status; + + SequentialThreadFunc m_userThreadFunc; + + void* m_userPtr; //for taskDesc etc + void* m_lsMemory; //initialized using SequentiallsMemorySetupFunc + }; +private: + btAlignedObjectArray m_activeSpuStatus; + btAlignedObjectArray m_completeHandles; +public: + struct SequentialThreadConstructionInfo + { + SequentialThreadConstructionInfo (char* uniqueName, + SequentialThreadFunc userThreadFunc, + SequentiallsMemorySetupFunc lsMemoryFunc + ) + :m_uniqueName(uniqueName), + m_userThreadFunc(userThreadFunc), + m_lsMemoryFunc(lsMemoryFunc) + { + + } + + char* m_uniqueName; + SequentialThreadFunc m_userThreadFunc; + SequentiallsMemorySetupFunc m_lsMemoryFunc; + }; + + SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo); + virtual ~SequentialThreadSupport(); + void startThreads(SequentialThreadConstructionInfo& threadInfo); +///send messages to SPUs + virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1); +///check for messages from SPUs + virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1); +///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + virtual void startSPU(); +///tell the task scheduler we are done with the SPU tasks + virtual void stopSPU(); + + virtual void setNumTasks(int numTasks); + + virtual int getNumTasks() const + { + return 1; + } + +}; + +#endif //SEQUENTIAL_THREAD_SUPPORT_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp new file mode 100644 index 00000000000..182aa269478 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp @@ -0,0 +1,48 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuCollisionObjectWrapper.h" +#include "BulletCollision/CollisionShapes/btCollisionShape.h" + +SpuCollisionObjectWrapper::SpuCollisionObjectWrapper () +{ +} + +#ifndef __SPU__ +SpuCollisionObjectWrapper::SpuCollisionObjectWrapper (const btCollisionObject* collisionObject) +{ + m_shapeType = collisionObject->getCollisionShape()->getShapeType (); + m_collisionObjectPtr = (ppu_address_t)collisionObject; + m_margin = collisionObject->getCollisionShape()->getMargin (); +} +#endif + +int +SpuCollisionObjectWrapper::getShapeType () const +{ + return m_shapeType; +} + +float +SpuCollisionObjectWrapper::getCollisionMargin () const +{ + return m_margin; +} + +ppu_address_t +SpuCollisionObjectWrapper::getCollisionObjectPtr () const +{ + return m_collisionObjectPtr; +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h new file mode 100644 index 00000000000..36ea49209e2 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h @@ -0,0 +1,40 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_COLLISION_OBJECT_WRAPPER_H +#define SPU_COLLISION_OBJECT_WRAPPER_H + +#include "PlatformDefinitions.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" + +ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper +{ +protected: + int m_shapeType; + float m_margin; + ppu_address_t m_collisionObjectPtr; + +public: + SpuCollisionObjectWrapper (); + + SpuCollisionObjectWrapper (const btCollisionObject* collisionObject); + + int getShapeType () const; + float getCollisionMargin () const; + ppu_address_t getCollisionObjectPtr () const; +}; + + +#endif //SPU_COLLISION_OBJECT_WRAPPER_H diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp new file mode 100644 index 00000000000..86eda8697d0 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp @@ -0,0 +1,318 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +//#define DEBUG_SPU_TASK_SCHEDULING 1 + + +//class OptimizedBvhNode; + +#include "SpuCollisionTaskProcess.h" + + + + +void SpuCollisionTaskProcess::setNumTasks(int maxNumTasks) +{ + if (int(m_maxNumOutstandingTasks) != maxNumTasks) + { + m_maxNumOutstandingTasks = maxNumTasks; + m_taskBusy.resize(m_maxNumOutstandingTasks); + m_spuGatherTaskDesc.resize(m_maxNumOutstandingTasks); + + for (int i = 0; i < m_taskBusy.size(); i++) + { + m_taskBusy[i] = false; + } + + ///re-allocate task memory buffers + if (m_workUnitTaskBuffers != 0) + { + btAlignedFree(m_workUnitTaskBuffers); + } + + m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128); + m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*6, 128); + } + +} + + + +SpuCollisionTaskProcess::SpuCollisionTaskProcess(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks) +:m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(0) +{ + m_workUnitTaskBuffers = (unsigned char *)0; + setNumTasks(maxNumOutstandingTasks); + m_numBusyTasks = 0; + m_currentTask = 0; + m_currentPage = 0; + m_currentPageEntry = 0; + +#ifdef DEBUG_SpuCollisionTaskProcess + m_initialized = false; +#endif + + m_threadInterface->startSPU(); + + //printf("sizeof vec_float4: %d\n", sizeof(vec_float4)); + printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput))); + +} + +SpuCollisionTaskProcess::~SpuCollisionTaskProcess() +{ + + if (m_workUnitTaskBuffers != 0) + { + btAlignedFree(m_workUnitTaskBuffers); + m_workUnitTaskBuffers = 0; + } + + + + m_threadInterface->stopSPU(); + +} + + + +void SpuCollisionTaskProcess::initialize2(bool useEpa) +{ + +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("SpuCollisionTaskProcess::initialize()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + for (int i = 0; i < int (m_maxNumOutstandingTasks); i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_currentPage = 0; + m_currentPageEntry = 0; + m_useEpa = useEpa; + +#ifdef DEBUG_SpuCollisionTaskProcess + m_initialized = true; + btAssert(MIDPHASE_NUM_WORKUNITS_PER_TASK*sizeof(SpuGatherAndProcessWorkUnitInput) <= MIDPHASE_WORKUNIT_TASK_SIZE); +#endif +} + + +void SpuCollisionTaskProcess::issueTask2() +{ + +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("SpuCollisionTaskProcess::issueTask (m_currentTask= %d\n)", m_currentTask); +#endif //DEBUG_SPU_TASK_SCHEDULING + + m_taskBusy[m_currentTask] = true; + m_numBusyTasks++; + + + SpuGatherAndProcessPairsTaskDesc& taskDesc = m_spuGatherTaskDesc[m_currentTask]; + taskDesc.m_useEpa = m_useEpa; + + { + // send task description in event message + // no error checking here... + // but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS. + + taskDesc.m_inPairPtr = reinterpret_cast(MIDPHASE_TASK_PTR(m_currentTask)); + + taskDesc.taskId = m_currentTask; + taskDesc.numPages = m_currentPage+1; + taskDesc.numOnLastPage = m_currentPageEntry; + } + + + + m_threadInterface->sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc,m_currentTask); + + // if all tasks busy, wait for spu event to clear the task. + + + if (m_numBusyTasks >= m_maxNumOutstandingTasks) + { + unsigned int taskId; + unsigned int outputSize; + + + for (int i=0;i=0); + + + m_threadInterface->waitForResponse(&taskId, &outputSize); + +// printf("issueTask taskId %d completed, numBusy=%d\n",taskId,m_numBusyTasks); + + //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); + + //postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + +} + +void SpuCollisionTaskProcess::addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex) +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("#"); +#endif //DEBUG_SPU_TASK_SCHEDULING + +#ifdef DEBUG_SpuCollisionTaskProcess + btAssert(m_initialized); + btAssert(m_workUnitTaskBuffers); + +#endif + + bool batch = true; + + if (batch) + { + if (m_currentPageEntry == MIDPHASE_NUM_WORKUNITS_PER_PAGE) + { + if (m_currentPage == MIDPHASE_NUM_WORKUNIT_PAGES-1) + { + // task buffer is full, issue current task. + // if all task buffers busy, this waits until SPU is done. + issueTask2(); + + // find new task buffer + for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + //init the task data + + break; + } + } + + m_currentPage = 0; + } + else + { + m_currentPage++; + } + + m_currentPageEntry = 0; + } + } + + { + + + + SpuGatherAndProcessWorkUnitInput &wuInput = + *(reinterpret_cast + (MIDPHASE_ENTRY_PTR(m_currentTask, m_currentPage, m_currentPageEntry))); + + wuInput.m_pairArrayPtr = reinterpret_cast(pairArrayPtr); + wuInput.m_startIndex = startIndex; + wuInput.m_endIndex = endIndex; + + + + m_currentPageEntry++; + + if (!batch) + { + issueTask2(); + + // find new task buffer + for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + //init the task data + + break; + } + } + + m_currentPage = 0; + m_currentPageEntry =0; + } + } +} + + +void +SpuCollisionTaskProcess::flush2() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("\nSpuCollisionTaskProcess::flush()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + // if there's a partially filled task buffer, submit that task + if (m_currentPage > 0 || m_currentPageEntry > 0) + { + issueTask2(); + } + + + // all tasks are issued, wait for all tasks to be complete + while(m_numBusyTasks > 0) + { + // Consolidating SPU code + unsigned int taskId=-1; + unsigned int outputSize; + + for (int i=0;i=0); + + + { + + // SPURS support. + m_threadInterface->waitForResponse(&taskId, &outputSize); + } +// printf("flush2 taskId %d completed, numBusy =%d \n",taskId,m_numBusyTasks); + //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); + + //postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h new file mode 100644 index 00000000000..2614be6c479 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h @@ -0,0 +1,163 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_COLLISION_TASK_PROCESS_H +#define SPU_COLLISION_TASK_PROCESS_H + +#include + +#include "LinearMath/btScalar.h" + +#include "PlatformDefinitions.h" +#include "LinearMath/btAlignedObjectArray.h" +#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" // for definitions processCollisionTask and createCollisionLocalStoreMemory + +#include "btThreadSupportInterface.h" + + +//#include "SPUAssert.h" +#include + + +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "BulletCollision/CollisionShapes/btConvexShape.h" + +#include "LinearMath/btAlignedAllocator.h" + +#include + + +#define DEBUG_SpuCollisionTaskProcess 1 + + +#define CMD_GATHER_AND_PROCESS_PAIRLIST 1 + +class btCollisionObject; +class btPersistentManifold; +class btDispatcher; + + +/////Task Description for SPU collision detection +//struct SpuGatherAndProcessPairsTaskDesc +//{ +// uint64_t inPtr;//m_pairArrayPtr; +// //mutex variable +// uint32_t m_someMutexVariableInMainMemory; +// +// uint64_t m_dispatcher; +// +// uint32_t numOnLastPage; +// +// uint16_t numPages; +// uint16_t taskId; +// +// struct CollisionTask_LocalStoreMemory* m_lsMemory; +//} +// +//#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) +//__attribute__ ((aligned (16))) +//#endif +//; + + +///MidphaseWorkUnitInput stores individual primitive versus mesh collision detection input, to be processed by the SPU. +ATTRIBUTE_ALIGNED16(struct) SpuGatherAndProcessWorkUnitInput +{ + uint64_t m_pairArrayPtr; + int m_startIndex; + int m_endIndex; +}; + + + + +/// SpuCollisionTaskProcess handles SPU processing of collision pairs. +/// Maintains a set of task buffers. +/// When the task is full, the task is issued for SPUs to process. Contact output goes into btPersistentManifold +/// associated with each task. +/// When PPU issues a task, it will look for completed task buffers +/// PPU will do postprocessing, dependent on workunit output (not likely) +class SpuCollisionTaskProcess +{ + + unsigned char *m_workUnitTaskBuffers; + + + // track task buffers that are being used, and total busy tasks + btAlignedObjectArray m_taskBusy; + btAlignedObjectArray m_spuGatherTaskDesc; + + class btThreadSupportInterface* m_threadInterface; + + unsigned int m_maxNumOutstandingTasks; + + unsigned int m_numBusyTasks; + + // the current task and the current entry to insert a new work unit + unsigned int m_currentTask; + unsigned int m_currentPage; + unsigned int m_currentPageEntry; + + bool m_useEpa; + +#ifdef DEBUG_SpuCollisionTaskProcess + bool m_initialized; +#endif + void issueTask2(); + //void postProcess(unsigned int taskId, int outputSize); + +public: + SpuCollisionTaskProcess(btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks); + + ~SpuCollisionTaskProcess(); + + ///call initialize in the beginning of the frame, before addCollisionPairToTask + void initialize2(bool useEpa = false); + + ///batch up additional work to a current task for SPU processing. When batch is full, it issues the task. + void addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex); + + ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished + void flush2(); + + /// set the maximum number of SPU tasks allocated + void setNumTasks(int maxNumTasks); + + int getNumTasks() const + { + return m_maxNumOutstandingTasks; + } +}; + + + +#define MIDPHASE_TASK_PTR(task) (&m_workUnitTaskBuffers[0] + MIDPHASE_WORKUNIT_TASK_SIZE*task) +#define MIDPHASE_ENTRY_PTR(task,page,entry) (MIDPHASE_TASK_PTR(task) + MIDPHASE_WORKUNIT_PAGE_SIZE*page + sizeof(SpuGatherAndProcessWorkUnitInput)*entry) +#define MIDPHASE_OUTPUT_PTR(task) (&m_contactOutputBuffers[0] + MIDPHASE_MAX_CONTACT_BUFFER_SIZE*task) +#define MIDPHASE_TREENODES_PTR(task) (&m_complexShapeBuffers[0] + MIDPHASE_COMPLEX_SHAPE_BUFFER_SIZE*task) + + +#define MIDPHASE_WORKUNIT_PAGE_SIZE (16) +//#define MIDPHASE_WORKUNIT_PAGE_SIZE (128) + +#define MIDPHASE_NUM_WORKUNIT_PAGES 1 +#define MIDPHASE_WORKUNIT_TASK_SIZE (MIDPHASE_WORKUNIT_PAGE_SIZE*MIDPHASE_NUM_WORKUNIT_PAGES) +#define MIDPHASE_NUM_WORKUNITS_PER_PAGE (MIDPHASE_WORKUNIT_PAGE_SIZE / sizeof(SpuGatherAndProcessWorkUnitInput)) +#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES) + + +#endif // SPU_COLLISION_TASK_PROCESS_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp new file mode 100644 index 00000000000..286b63191ee --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp @@ -0,0 +1,69 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuContactManifoldCollisionAlgorithm.h" +#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h" + + + + +void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) +{ + btAssert(0); +} + +btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) +{ + btAssert(0); + return 1.f; +} + +#ifndef __SPU__ +SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1) +:btCollisionAlgorithm(ci) +#ifdef USE_SEPDISTANCE_UTIL +,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc()) +#endif //USE_SEPDISTANCE_UTIL +{ + m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1); + m_shapeType0 = body0->getCollisionShape()->getShapeType(); + m_shapeType1 = body1->getCollisionShape()->getShapeType(); + m_collisionMargin0 = body0->getCollisionShape()->getMargin(); + m_collisionMargin1 = body1->getCollisionShape()->getMargin(); + m_collisionObject0 = body0; + m_collisionObject1 = body1; + + if (body0->getCollisionShape()->isPolyhedral()) + { + btPolyhedralConvexShape* convex0 = (btPolyhedralConvexShape*)body0->getCollisionShape(); + m_shapeDimensions0 = convex0->getImplicitShapeDimensions(); + } + if (body1->getCollisionShape()->isPolyhedral()) + { + btPolyhedralConvexShape* convex1 = (btPolyhedralConvexShape*)body1->getCollisionShape(); + m_shapeDimensions1 = convex1->getImplicitShapeDimensions(); + } +} +#endif //__SPU__ + + +SpuContactManifoldCollisionAlgorithm::~SpuContactManifoldCollisionAlgorithm() +{ + if (m_manifoldPtr) + m_dispatcher->releaseManifold(m_manifoldPtr); +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h new file mode 100644 index 00000000000..151cb2c7966 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h @@ -0,0 +1,120 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H +#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H + +#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h" +#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" +#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h" +#include "BulletCollision/BroadphaseCollision/btDispatcher.h" +#include "LinearMath/btTransformUtil.h" + +class btPersistentManifold; + +//#define USE_SEPDISTANCE_UTIL 1 + +/// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU. +ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm +{ + btVector3 m_shapeDimensions0; + btVector3 m_shapeDimensions1; + btPersistentManifold* m_manifoldPtr; + int m_shapeType0; + int m_shapeType1; + float m_collisionMargin0; + float m_collisionMargin1; + + btCollisionObject* m_collisionObject0; + btCollisionObject* m_collisionObject1; + + + + +public: + + virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); + + virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); + + + SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1); +#ifdef USE_SEPDISTANCE_UTIL + btConvexSeparatingDistanceUtil m_sepDistance; +#endif //USE_SEPDISTANCE_UTIL + + virtual ~SpuContactManifoldCollisionAlgorithm(); + + virtual void getAllContactManifolds(btManifoldArray& manifoldArray) + { + if (m_manifoldPtr) + manifoldArray.push_back(m_manifoldPtr); + } + + btPersistentManifold* getContactManifoldPtr() + { + return m_manifoldPtr; + } + + btCollisionObject* getCollisionObject0() + { + return m_collisionObject0; + } + + btCollisionObject* getCollisionObject1() + { + return m_collisionObject1; + } + + int getShapeType0() const + { + return m_shapeType0; + } + + int getShapeType1() const + { + return m_shapeType1; + } + float getCollisionMargin0() const + { + return m_collisionMargin0; + } + float getCollisionMargin1() const + { + return m_collisionMargin1; + } + + const btVector3& getShapeDimensions0() const + { + return m_shapeDimensions0; + } + + const btVector3& getShapeDimensions1() const + { + return m_shapeDimensions1; + } + + struct CreateFunc :public btCollisionAlgorithmCreateFunc + { + virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1) + { + void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm)); + return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1); + } + }; + +}; + +#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H diff --git a/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h b/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h new file mode 100644 index 00000000000..a0695744bd5 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h @@ -0,0 +1,110 @@ +#ifndef DOUBLE_BUFFER_H +#define DOUBLE_BUFFER_H + +#include "SpuFakeDma.h" +#include "LinearMath/btScalar.h" + + +///DoubleBuffer +template +class DoubleBuffer +{ +#if defined(__SPU__) || defined(USE_LIBSPE2) + ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ; + ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ; +#else + T m_buffer0[size]; + T m_buffer1[size]; +#endif + + T *m_frontBuffer; + T *m_backBuffer; + + unsigned int m_dmaTag; + bool m_dmaPending; +public: + bool isPending() const { return m_dmaPending;} + DoubleBuffer(); + + void init (); + + // dma get and put commands + void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag); + void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag); + + // gets pointer to a buffer + T *getFront(); + T *getBack(); + + // if back buffer dma was started, wait for it to complete + // then move back to front and vice versa + T *swapBuffers(); +}; + +template +DoubleBuffer::DoubleBuffer() +{ + init (); +} + +template +void DoubleBuffer::init() +{ + this->m_dmaPending = false; + this->m_frontBuffer = &this->m_buffer0[0]; + this->m_backBuffer = &this->m_buffer1[0]; +} + +template +void +DoubleBuffer::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag) +{ + m_dmaPending = true; + m_dmaTag = tag; + if (numBytes) + { + m_backBuffer = (T*)cellDmaLargeGetReadOnly(m_backBuffer, ea, numBytes, tag, 0, 0); + } +} + +template +void +DoubleBuffer::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag) +{ + m_dmaPending = true; + m_dmaTag = tag; + cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0); +} + +template +T * +DoubleBuffer::getFront() +{ + return m_frontBuffer; +} + +template +T * +DoubleBuffer::getBack() +{ + return m_backBuffer; +} + +template +T * +DoubleBuffer::swapBuffers() +{ + if (m_dmaPending) + { + cellDmaWaitTagStatusAll(1< //for btAssert +//Disabling memcpy sometimes helps debugging DMA + +#define USE_MEMCPY 1 +#ifdef USE_MEMCPY + +#endif + + +void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ + +#if defined (__SPU__) || defined (USE_LIBSPE2) + cellDmaLargeGet(ls,ea,size,tag,tid,rid); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + +void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ +#if defined (__SPU__) || defined (USE_LIBSPE2) + mfc_get(ls,ea,size,tag,0,0); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + + + + +void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ +#if defined (__SPU__) || defined (USE_LIBSPE2) + cellDmaGet(ls,ea,size,tag,tid,rid); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + + +///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) +int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) +{ + + btAssert(size<32); + + ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); + + + char* localStore = (char*)ls; + uint32_t i; + + + ///make sure last 4 bits are the same, for cellDmaSmallGet + uint32_t last4BitsOffset = ea & 0x0f; + char* tmpTarget = tmpBuffer + last4BitsOffset; + +#if defined (__SPU__) || defined (USE_LIBSPE2) + + int remainingSize = size; + +//#define FORCE_cellDmaUnalignedGet 1 +#ifdef FORCE_cellDmaUnalignedGet + cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0); +#else + char* remainingTmpTarget = tmpTarget; + uint64_t remainingEa = ea; + + while (remainingSize) + { + switch (remainingSize) + { + case 1: + case 2: + case 4: + case 8: + case 16: + { + mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0); + remainingSize=0; + break; + } + default: + { + //spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize); + int actualSize = 0; + + if (remainingSize > 16) + actualSize = 16; + else + if (remainingSize >8) + actualSize=8; + else + if (remainingSize >4) + actualSize=4; + else + if (remainingSize >2) + actualSize=2; + mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0); + remainingSize-=actualSize; + remainingTmpTarget+=actualSize; + remainingEa += actualSize; + } + } + } +#endif//FORCE_cellDmaUnalignedGet + +#else + char* mainMem = (char*)ea; + //copy into final destination +#ifdef USE_MEMCPY + + memcpy(tmpTarget,mainMem,size); +#else + for ( i=0;i +#include + +#define DMA_TAG(xfer) (xfer + 1) +#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) + +#else // !USE_LIBSPE2 + +#define DMA_TAG(xfer) (xfer + 1) +#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) + +#include + +#define DEBUG_DMA +#ifdef DEBUG_DMA +#define dUASSERT(a,b) if (!(a)) { printf(b);} +#define uintsize ppu_address_t + +#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \ + } \ + mfc_put(ls, ea, size, tag, tid, rid) +#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all() + +#else +#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid) +#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all() +#endif // DEBUG_DMA + + + + + + + + +#endif // USE_LIBSPE2 +#else // !__SPU__ +//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc) +//Potential to add networked simulation using this interface + +#define DMA_TAG(a) (a) +#define DMA_MASK(a) (a) + + /// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + /// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + /// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + void cellDmaWaitTagStatusAll(int ignore); + + +#endif //__CELLOS_LV2__ + +///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1) +int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size); + + +void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); +void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); +void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + + +#endif //FAKE_DMA_H diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp new file mode 100644 index 00000000000..ee0832f12e2 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp @@ -0,0 +1,251 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuGatheringCollisionDispatcher.h" +#include "SpuCollisionTaskProcess.h" + + +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" +#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h" +#include "SpuContactManifoldCollisionAlgorithm.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "LinearMath/btQuickprof.h" + + + + +SpuGatheringCollisionDispatcher::SpuGatheringCollisionDispatcher(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration) +:btCollisionDispatcher(collisionConfiguration), +m_spuCollisionTaskProcess(0), +m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(maxNumOutstandingTasks) +{ + +} + + +bool SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,int proxyType1) +{ + bool supported0 = ( + (proxyType0 == BOX_SHAPE_PROXYTYPE) || + (proxyType0 == TRIANGLE_SHAPE_PROXYTYPE) || + (proxyType0 == SPHERE_SHAPE_PROXYTYPE) || + (proxyType0 == CAPSULE_SHAPE_PROXYTYPE) || + (proxyType0 == CYLINDER_SHAPE_PROXYTYPE) || +// (proxyType0 == CONE_SHAPE_PROXYTYPE) || + (proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) || + (proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)|| + (proxyType0 == STATIC_PLANE_PROXYTYPE)|| + (proxyType0 == COMPOUND_SHAPE_PROXYTYPE) + ); + + bool supported1 = ( + (proxyType1 == BOX_SHAPE_PROXYTYPE) || + (proxyType1 == TRIANGLE_SHAPE_PROXYTYPE) || + (proxyType1 == SPHERE_SHAPE_PROXYTYPE) || + (proxyType1 == CAPSULE_SHAPE_PROXYTYPE) || + (proxyType1 == CYLINDER_SHAPE_PROXYTYPE) || +// (proxyType1 == CONE_SHAPE_PROXYTYPE) || + (proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) || + (proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) || + (proxyType1 == STATIC_PLANE_PROXYTYPE) || + (proxyType1 == COMPOUND_SHAPE_PROXYTYPE) + ); + + + return supported0 && supported1; +} + + + +SpuGatheringCollisionDispatcher::~SpuGatheringCollisionDispatcher() +{ + if (m_spuCollisionTaskProcess) + delete m_spuCollisionTaskProcess; + +} + +#include "stdio.h" + + + +///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc) +///this is useful for the collision dispatcher. +class btSpuCollisionPairCallback : public btOverlapCallback +{ + const btDispatcherInfo& m_dispatchInfo; + SpuGatheringCollisionDispatcher* m_dispatcher; + +public: + + btSpuCollisionPairCallback(const btDispatcherInfo& dispatchInfo, SpuGatheringCollisionDispatcher* dispatcher) + :m_dispatchInfo(dispatchInfo), + m_dispatcher(dispatcher) + { + } + + virtual bool processOverlap(btBroadphasePair& collisionPair) + { + + + //PPU version + //(*m_dispatcher->getNearCallback())(collisionPair,*m_dispatcher,m_dispatchInfo); + + //only support discrete collision detection for now, we could fallback on PPU/unoptimized version for TOI/CCD + btAssert(m_dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE); + + //by default, Bullet will use this near callback + { + ///userInfo is used to determine if the SPU has to handle this case or not (skip PPU tasks) + if (!collisionPair.m_internalTmpValue) + { + collisionPair.m_internalTmpValue = 1; + } + if (!collisionPair.m_algorithm) + { + btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject; + btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject; + + btCollisionAlgorithmConstructionInfo ci; + ci.m_dispatcher1 = m_dispatcher; + ci.m_manifold = 0; + + if (m_dispatcher->needsCollision(colObj0,colObj1)) + { + int proxyType0 = colObj0->getCollisionShape()->getShapeType(); + int proxyType1 = colObj1->getCollisionShape()->getShapeType(); + if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1) + && (colObj0->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) + && (colObj1->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) + ) + { + int so = sizeof(SpuContactManifoldCollisionAlgorithm); +#ifdef ALLOCATE_SEPARATELY + void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so); +#else + void* mem = m_dispatcher->allocateCollisionAlgorithm(so); +#endif + collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1); + collisionPair.m_internalTmpValue = 2; + } else + { + collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1); + collisionPair.m_internalTmpValue = 3; + } + } + } + } + return false; + } +}; + +void SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo, btDispatcher* dispatcher) +{ + + if (dispatchInfo.m_enableSPU) + { + m_maxNumOutstandingTasks = m_threadInterface->getNumTasks(); + + { + BT_PROFILE("processAllOverlappingPairs"); + + if (!m_spuCollisionTaskProcess) + m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks); + + m_spuCollisionTaskProcess->setNumTasks(m_maxNumOutstandingTasks); + // printf("m_maxNumOutstandingTasks =%d\n",m_maxNumOutstandingTasks); + + m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa); + + + ///modified version of btCollisionDispatcher::dispatchAllCollisionPairs: + { + btSpuCollisionPairCallback collisionCallback(dispatchInfo,this); + + pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher); + } + } + + //send one big batch + int numTotalPairs = pairCache->getNumOverlappingPairs(); + + btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr(); + int i; + { + int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS; + if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS)) + { + pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1; + } + + BT_PROFILE("addWorkToTask"); + for (i=0;iaddWorkToTask(pairPtr,i,endIndex); + i = endIndex; + } + } + + { + BT_PROFILE("PPU fallback"); + //handle PPU fallback pairs + for (i=0;im_clientObject; + btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject; + + if (dispatcher->needsCollision(colObj0,colObj1)) + { + btManifoldResult contactPointResult(colObj0,colObj1); + + if (dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE) + { + //discrete collision detection query + collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult); + } else + { + //continuous collision detection query, time of impact (toi) + btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult); + if (dispatchInfo.m_timeOfImpact > toi) + dispatchInfo.m_timeOfImpact = toi; + + } + } + } + } + } + } + { + BT_PROFILE("flush2"); + //make sure all SPU work is done + m_spuCollisionTaskProcess->flush2(); + } + + } else + { + ///PPU fallback + ///!Need to make sure to clear all 'algorithms' when switching between SPU and PPU + btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher); + } +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h new file mode 100644 index 00000000000..7d5be88d71d --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h @@ -0,0 +1,72 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H +#define SPU_GATHERING_COLLISION__DISPATCHER_H + +#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h" + + +///Tuning value to optimized SPU utilization +///Too small value means Task overhead is large compared to computation (too fine granularity) +///Too big value might render some SPUs are idle, while a few other SPUs are doing all work. +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64 +#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512 +//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024 + + + +class SpuCollisionTaskProcess; + +///SpuGatheringCollisionDispatcher can use SPU to gather and calculate collision detection +///Time of Impact, Closest Points and Penetration Depth. +class SpuGatheringCollisionDispatcher : public btCollisionDispatcher +{ + + SpuCollisionTaskProcess* m_spuCollisionTaskProcess; + +protected: + + class btThreadSupportInterface* m_threadInterface; + + unsigned int m_maxNumOutstandingTasks; + + +public: + + //can be used by SPU collision algorithms + SpuCollisionTaskProcess* getSpuCollisionTaskProcess() + { + return m_spuCollisionTaskProcess; + } + + SpuGatheringCollisionDispatcher (class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration); + + virtual ~SpuGatheringCollisionDispatcher(); + + bool supportsDispatchPairOnSpu(int proxyType0,int proxyType1); + + virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ; + +}; + + + +#endif //SPU_GATHERING_COLLISION__DISPATCHER_H + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp new file mode 100644 index 00000000000..a312450ed72 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp @@ -0,0 +1,257 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifdef USE_LIBSPE2 + +#include "SpuLibspe2Support.h" + + + + +//SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +///Setup and initialize SPU/CELL/Libspe2 +SpuLibspe2Support::SpuLibspe2Support(spe_program_handle_t *speprog, int numThreads) +{ + this->program = speprog; + this->numThreads = ((numThreads <= spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)) ? numThreads : spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)); +} + +///cleanup/shutdown Libspe2 +SpuLibspe2Support::~SpuLibspe2Support() +{ + + stopSPU(); +} + + + +///send messages to SPUs +void SpuLibspe2Support::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1) +{ + spe_context_ptr_t context; + + switch (uiCommand) + { + case CMD_SAMPLE_TASK_COMMAND: + { + //get taskdescription + SpuSampleTaskDesc* taskDesc = (SpuSampleTaskDesc*) uiArgument0; + + btAssert(taskDesc->m_taskIdm_taskId]; + + //set data for spuStatus + spuStatus.m_commandId = uiCommand; + spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied" + spuStatus.m_taskDesc.p = taskDesc; + + //get context + context = data[taskDesc->m_taskId].context; + + + taskDesc->m_mainMemoryPtr = reinterpret_cast (spuStatus.m_lsMemory.p); + + + break; + } + case CMD_GATHER_AND_PROCESS_PAIRLIST: + { + //get taskdescription + SpuGatherAndProcessPairsTaskDesc* taskDesc = (SpuGatherAndProcessPairsTaskDesc*) uiArgument0; + + btAssert(taskDesc->taskIdtaskId]; + + //set data for spuStatus + spuStatus.m_commandId = uiCommand; + spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied" + spuStatus.m_taskDesc.p = taskDesc; + + //get context + context = data[taskDesc->taskId].context; + + + taskDesc->m_lsMemory = (CollisionTask_LocalStoreMemory*)spuStatus.m_lsMemory.p; + + break; + } + default: + { + ///not implemented + btAssert(0); + } + + }; + + + //write taskdescription in mailbox + unsigned int event = Spu_Mailbox_Event_Task; + spe_in_mbox_write(context, &event, 1, SPE_MBOX_ANY_NONBLOCKING); + +} + +///check for messages from SPUs +void SpuLibspe2Support::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) +{ + ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response + + ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback' + + btAssert(m_activeSpuStatus.size()); + + + int last = -1; + + //find an active spu/thread + while(last < 0) + { + for (int i=0;i=0); + + + + *puiArgument0 = spuStatus.m_taskId; + *puiArgument1 = spuStatus.m_status; + + +} + + +void SpuLibspe2Support::startSPU() +{ + this->internal_startSPU(); +} + + + +///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) +void SpuLibspe2Support::internal_startSPU() +{ + m_activeSpuStatus.resize(numThreads); + + + for (int i=0; i < numThreads; i++) + { + + if(data[i].context == NULL) + { + + /* Create context */ + if ((data[i].context = spe_context_create(0, NULL)) == NULL) + { + perror ("Failed creating context"); + exit(1); + } + + /* Load program into context */ + if(spe_program_load(data[i].context, this->program)) + { + perror ("Failed loading program"); + exit(1); + } + + m_activeSpuStatus[i].m_status = Spu_Status_Startup; + m_activeSpuStatus[i].m_taskId = i; + m_activeSpuStatus[i].m_commandId = 0; + m_activeSpuStatus[i].m_lsMemory.p = NULL; + + + data[i].entry = SPE_DEFAULT_ENTRY; + data[i].flags = 0; + data[i].argp.p = &m_activeSpuStatus[i]; + data[i].envp.p = NULL; + + /* Create thread for each SPE context */ + if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) )) + { + perror ("Failed creating thread"); + exit(1); + } + /* + else + { + printf("started thread %d\n",i); + }*/ + } + } + + + for (int i=0; i < numThreads; i++) + { + if(data[i].context != NULL) + { + while( m_activeSpuStatus[i].m_status == Spu_Status_Startup) + { + // wait for spu to set up + sched_yield(); + } + printf("Spu %d is ready\n", i); + } + } +} + +///tell the task scheduler we are done with the SPU tasks +void SpuLibspe2Support::stopSPU() +{ + // wait for all threads to finish + int i; + for ( i = 0; i < this->numThreads; i++ ) + { + + unsigned int event = Spu_Mailbox_Event_Shutdown; + spe_context_ptr_t context = data[i].context; + spe_in_mbox_write(context, &event, 1, SPE_MBOX_ALL_BLOCKING); + pthread_join (data[i].pthread, NULL); + + } + // close SPE program + spe_image_close(program); + // destroy SPE contexts + for ( i = 0; i < this->numThreads; i++ ) + { + if(data[i].context != NULL) + { + spe_context_destroy (data[i].context); + } + } + + m_activeSpuStatus.clear(); + +} + + + +#endif //USE_LIBSPE2 + diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h new file mode 100644 index 00000000000..a6d6baca47b --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h @@ -0,0 +1,180 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef SPU_LIBSPE2_SUPPORT_H +#define SPU_LIBSPE2_SUPPORT_H + +#include //for uint32_t etc. + +#ifdef USE_LIBSPE2 + +#include +#include +//#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" +#include "PlatformDefinitions.h" + + +//extern struct SpuGatherAndProcessPairsTaskDesc; + +enum +{ + Spu_Mailbox_Event_Nothing = 0, + Spu_Mailbox_Event_Task = 1, + Spu_Mailbox_Event_Shutdown = 2, + + Spu_Mailbox_Event_ForceDword = 0xFFFFFFFF + +}; + +enum +{ + Spu_Status_Free = 0, + Spu_Status_Occupied = 1, + Spu_Status_Startup = 2, + + Spu_Status_ForceDword = 0xFFFFFFFF + +}; + + +struct btSpuStatus +{ + uint32_t m_taskId; + uint32_t m_commandId; + uint32_t m_status; + + addr64 m_taskDesc; + addr64 m_lsMemory; + +} +__attribute__ ((aligned (128))) +; + + + +#ifndef __SPU__ + +#include "LinearMath/btAlignedObjectArray.h" +#include "SpuCollisionTaskProcess.h" +#include "SpuSampleTaskProcess.h" +#include "btThreadSupportInterface.h" +#include +#include +#include + +#define MAX_SPUS 4 + +typedef struct ppu_pthread_data +{ + spe_context_ptr_t context; + pthread_t pthread; + unsigned int entry; + unsigned int flags; + addr64 argp; + addr64 envp; + spe_stop_info_t stopinfo; +} ppu_pthread_data_t; + + +static void *ppu_pthread_function(void *arg) +{ + ppu_pthread_data_t * datap = (ppu_pthread_data_t *)arg; + /* + int rc; + do + {*/ + spe_context_run(datap->context, &datap->entry, datap->flags, datap->argp.p, datap->envp.p, &datap->stopinfo); + if (datap->stopinfo.stop_reason == SPE_EXIT) + { + if (datap->stopinfo.result.spe_exit_code != 0) + { + perror("FAILED: SPE returned a non-zero exit status: \n"); + exit(1); + } + } + else + { + perror("FAILED: SPE abnormally terminated\n"); + exit(1); + } + + + //} while (rc > 0); // loop until exit or error, and while any stop & signal + pthread_exit(NULL); +} + + + + + + +///SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +class SpuLibspe2Support : public btThreadSupportInterface +{ + + btAlignedObjectArray m_activeSpuStatus; + +public: + //Setup and initialize SPU/CELL/Libspe2 + SpuLibspe2Support(spe_program_handle_t *speprog,int numThreads); + + // SPE program handle ptr. + spe_program_handle_t *program; + + // SPE program data + ppu_pthread_data_t data[MAX_SPUS]; + + //cleanup/shutdown Libspe2 + ~SpuLibspe2Support(); + + ///send messages to SPUs + void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1=0); + + //check for messages from SPUs + void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1); + + //start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + virtual void startSPU(); + + //tell the task scheduler we are done with the SPU tasks + virtual void stopSPU(); + + virtual void setNumTasks(int numTasks) + { + //changing the number of tasks after initialization is not implemented (yet) + } + +private: + + ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + void internal_startSPU(); + + + + + int numThreads; + +}; + +#endif // NOT __SPU__ + +#endif //USE_LIBSPE2 + +#endif //SPU_LIBSPE2_SUPPORT_H + + + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h new file mode 100644 index 00000000000..9bc2ebf51ec --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h @@ -0,0 +1,172 @@ +/* + Copyright (C) 2006, 2008 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef __BOX_H__ +#define __BOX_H__ + + +#ifndef PE_REF +#define PE_REF(a) a& +#endif + +#include + +///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH +#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH) +#include +#else +#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h" +#endif + + + +using namespace Vectormath::Aos; + +enum FeatureType { F, E, V }; + +//---------------------------------------------------------------------------- +// Box +//---------------------------------------------------------------------------- +///The Box is an internal class used by the boxBoxDistance calculation. +class Box +{ +public: + Vector3 half; + + inline Box() + {} + inline Box(PE_REF(Vector3) half_); + inline Box(float hx, float hy, float hz); + + inline void Set(PE_REF(Vector3) half_); + inline void Set(float hx, float hy, float hz); + + inline Vector3 GetAABB(const Matrix3& rotation) const; +}; + +inline +Box::Box(PE_REF(Vector3) half_) +{ + Set(half_); +} + +inline +Box::Box(float hx, float hy, float hz) +{ + Set(hx, hy, hz); +} + +inline +void +Box::Set(PE_REF(Vector3) half_) +{ + half = half_; +} + +inline +void +Box::Set(float hx, float hy, float hz) +{ + half = Vector3(hx, hy, hz); +} + +inline +Vector3 +Box::GetAABB(const Matrix3& rotation) const +{ + return absPerElem(rotation) * half; +} + +//------------------------------------------------------------------------------------------------- +// BoxPoint +//------------------------------------------------------------------------------------------------- + +///The BoxPoint class is an internally used class to contain feature information for boxBoxDistance calculation. +class BoxPoint +{ +public: + BoxPoint() : localPoint(0.0f) {} + + Point3 localPoint; + FeatureType featureType; + int featureIdx; + + inline void setVertexFeature(int plusX, int plusY, int plusZ); + inline void setEdgeFeature(int dim0, int plus0, int dim1, int plus1); + inline void setFaceFeature(int dim, int plus); + + inline void getVertexFeature(int & plusX, int & plusY, int & plusZ) const; + inline void getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const; + inline void getFaceFeature(int & dim, int & plus) const; +}; + +inline +void +BoxPoint::setVertexFeature(int plusX, int plusY, int plusZ) +{ + featureType = V; + featureIdx = plusX << 2 | plusY << 1 | plusZ; +} + +inline +void +BoxPoint::setEdgeFeature(int dim0, int plus0, int dim1, int plus1) +{ + featureType = E; + + if (dim0 > dim1) { + featureIdx = plus1 << 5 | dim1 << 3 | plus0 << 2 | dim0; + } else { + featureIdx = plus0 << 5 | dim0 << 3 | plus1 << 2 | dim1; + } +} + +inline +void +BoxPoint::setFaceFeature(int dim, int plus) +{ + featureType = F; + featureIdx = plus << 2 | dim; +} + +inline +void +BoxPoint::getVertexFeature(int & plusX, int & plusY, int & plusZ) const +{ + plusX = featureIdx >> 2; + plusY = featureIdx >> 1 & 1; + plusZ = featureIdx & 1; +} + +inline +void +BoxPoint::getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const +{ + plus0 = featureIdx >> 5; + dim0 = featureIdx >> 3 & 3; + plus1 = featureIdx >> 2 & 1; + dim1 = featureIdx & 3; +} + +inline +void +BoxPoint::getFaceFeature(int & dim, int & plus) const +{ + plus = featureIdx >> 2; + dim = featureIdx & 3; +} + +#endif /* __BOX_H__ */ diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp new file mode 100644 index 00000000000..dfcd8426695 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp @@ -0,0 +1,302 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "SpuCollisionShapes.h" + +///not supported on IBM SDK, until we fix the alignment of btVector3 +#if defined (__CELLOS_LV2__) && defined (__SPU__) +#include +static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 ) +{ + vec_float4 result; + result = spu_mul( vec0, vec1 ); + result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result ); + return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result ); +} +#endif //__SPU__ + + +void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform) +{ + //calculate the aabb, given the types... + switch (shapeType) + { + case CYLINDER_SHAPE_PROXYTYPE: + /* fall through */ + case BOX_SHAPE_PROXYTYPE: + { + btScalar margin=convexShape->getMarginNV(); + btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); + halfExtents += btVector3(margin,margin,margin); + const btTransform& t = xform; + btMatrix3x3 abs_b = t.getBasis().absolute(); + btVector3 center = t.getOrigin(); + btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); + + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + case CAPSULE_SHAPE_PROXYTYPE: + { + btScalar margin=convexShape->getMarginNV(); + btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); + //add the radius to y-axis to get full height + btScalar radius = halfExtents[0]; + halfExtents[1] += radius; + halfExtents += btVector3(margin,margin,margin); +#if 0 + int capsuleUpAxis = convexShape->getUpAxis(); + btScalar halfHeight = convexShape->getHalfHeight(); + btScalar radius = convexShape->getRadius(); + halfExtents[capsuleUpAxis] = radius + halfHeight; +#endif + const btTransform& t = xform; + btMatrix3x3 abs_b = t.getBasis().absolute(); + btVector3 center = t.getOrigin(); + btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); + + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + case SPHERE_SHAPE_PROXYTYPE: + { + btScalar radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX(); + btScalar margin = radius + convexShape->getMarginNV(); + const btTransform& t = xform; + const btVector3& center = t.getOrigin(); + btVector3 extent(margin,margin,margin); + aabbMin = center - extent; + aabbMax = center + extent; + break; + } + case CONVEX_HULL_SHAPE_PROXYTYPE: + { + ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); + cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; + const btTransform& t = xform; + btScalar margin = convexShape->getMarginNV(); + localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin); + //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); + //spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ()); + break; + } + default: + { + // spu_printf("SPU: unsupported shapetype %d in AABB calculation\n"); + } + }; +} + +void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape) +{ + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + + dmaSize = sizeof(btTriangleIndexVertexArray); + dmaPpuAddress2 = reinterpret_cast(triMeshShape->getMeshInterface()); + // spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2); +#ifdef __SPU__ + cellDmaGet(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + bvhMeshShape->gTriangleMeshInterfacePtr = &bvhMeshShape->gTriangleMeshInterfaceStorage; +#else + bvhMeshShape->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); +#endif + + //cellDmaWaitTagStatusAll(DMA_MASK(1)); + + ///now DMA over the BVH + + dmaSize = sizeof(btOptimizedBvh); + dmaPpuAddress2 = reinterpret_cast(triMeshShape->getOptimizedBvh()); + //spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2); + cellDmaGet(&bvhMeshShape->gOptimizedBvh, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(2)); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); +} + +void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag) +{ + cellDmaGet(IndexMesh, (ppu_address_t)&indexArray[index] , sizeof(btIndexedMesh), DMA_TAG(dmaTag), 0, 0); + +} + +void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag) +{ + cellDmaGet(subTreeHeaders, subTreePtr, batchSize * sizeof(btBvhSubtreeInfo), DMA_TAG(dmaTag), 0, 0); +} + +void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag) +{ + cellDmaGet(nodes, reinterpret_cast(&nodeArray[subtree.m_rootNodeIndex]) , subtree.m_subtreeSize* sizeof(btQuantizedBvhNode), DMA_TAG(2), 0, 0); +} + +///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck +int getShapeTypeSize(int shapeType) +{ + + + switch (shapeType) + { + case CYLINDER_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btCylinderShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + case BOX_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btBoxShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + case SPHERE_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btSphereShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + case TRIANGLE_MESH_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btBvhTriangleMeshShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + case CAPSULE_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btCapsuleShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + + case CONVEX_HULL_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btConvexHullShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + + case COMPOUND_SHAPE_PROXYTYPE: + { + int shapeSize = sizeof(btCompoundShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + case STATIC_PLANE_PROXYTYPE: + { + int shapeSize = sizeof(btStaticPlaneShape); + btAssert(shapeSize < MAX_SHAPE_SIZE); + return shapeSize; + } + + default: + btAssert(0); + //unsupported shapetype, please add here + return 0; + } +} + +void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU) +{ + convexVertexData->gNumConvexPoints = convexShapeSPU->getNumPoints(); + if (convexVertexData->gNumConvexPoints>MAX_NUM_SPU_CONVEX_POINTS) + { + btAssert(0); + // spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,convexVertexData->gNumConvexPoints); + return; + } + + register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btVector3); + ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getUnscaledPoints(); + cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU , dmaSize, DMA_TAG(2), 0, 0); +} + +void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType) +{ + register int dmaSize = getShapeTypeSize(shapeType); + cellDmaGet(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0); + //cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(dmaTag)); +} + +void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag) +{ + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + int childShapeCount = spuCompoundShape->getNumChildShapes(); + dmaSize = childShapeCount * sizeof(btCompoundShapeChild); + dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList(); + cellDmaGet(&compoundShapeLocation->gSubshapes[0], dmaPpuAddress2, dmaSize, DMA_TAG(dmaTag), 0, 0); +} + +void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag) +{ + int childShapeCount = spuCompoundShape->getNumChildShapes(); + int i; + // DMA all the subshapes + for ( i = 0; i < childShapeCount; ++i) + { + btCompoundShapeChild& childShape = compoundShapeLocation->gSubshapes[i]; + dmaCollisionShape (&compoundShapeLocation->gSubshapeShape[i],(ppu_address_t)childShape.m_childShape, dmaTag, childShape.m_childShapeType); + } +} + + +void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex) +{ + + int curIndex = startNodeIndex; + int walkIterations = 0; +#ifdef BT_DEBUG + int subTreeSize = endNodeIndex - startNodeIndex; +#endif + + int escapeIndex; + + unsigned int aabbOverlap, isLeafNode; + + while (curIndex < endNodeIndex) + { + //catch bugs in tree data + btAssert (walkIterations < subTreeSize); + + walkIterations++; + aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + + if (isLeafNode && aabbOverlap) + { + //printf("overlap with node %d\n",rootNode->getTriangleIndex()); + nodeCallback->processNode(0,rootNode->getTriangleIndex()); + // spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex()); + } + + if (aabbOverlap || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h new file mode 100644 index 00000000000..d369395e160 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h @@ -0,0 +1,126 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef __SPU_COLLISION_SHAPES_H +#define __SPU_COLLISION_SHAPES_H + +#include "../SpuDoubleBuffer.h" + +#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" +#include "BulletCollision/CollisionShapes/btConvexInternalShape.h" +#include "BulletCollision/CollisionShapes/btCylinderShape.h" +#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h" + +#include "BulletCollision/CollisionShapes/btOptimizedBvh.h" +#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" +#include "BulletCollision/CollisionShapes/btSphereShape.h" + +#include "BulletCollision/CollisionShapes/btCapsuleShape.h" + +#include "BulletCollision/CollisionShapes/btConvexShape.h" +#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h" +#include "BulletCollision/CollisionShapes/btConvexHullShape.h" +#include "BulletCollision/CollisionShapes/btCompoundShape.h" + +#define MAX_NUM_SPU_CONVEX_POINTS 128 + +ATTRIBUTE_ALIGNED16(struct) SpuConvexPolyhedronVertexData +{ + void* gSpuConvexShapePtr; + btVector3* gConvexPoints; + int gNumConvexPoints; + int unused; + ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]); +}; + +#define MAX_SHAPE_SIZE 256 + +ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory +{ + ATTRIBUTE_ALIGNED16(char collisionShape[MAX_SHAPE_SIZE]); +}; + +ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory +{ + // Compound data +#define MAX_SPU_COMPOUND_SUBSHAPES 16 + ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]); + ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]); +}; + +ATTRIBUTE_ALIGNED16(struct) bvhMeshShape_LocalStoreMemory +{ + //ATTRIBUTE_ALIGNED16(btOptimizedBvh gOptimizedBvh); + ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]); + btOptimizedBvh* getOptimizedBvh() + { + return (btOptimizedBvh*) gOptimizedBvh; + } + + ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterfaceStorage); + btTriangleIndexVertexArray* gTriangleMeshInterfacePtr; + ///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment + ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh); + #define MAX_SPU_SUBTREE_HEADERS 32 + //1024 + ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]); + ATTRIBUTE_ALIGNED16(btQuantizedBvhNode gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]); +}; + + +void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform); +void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape); +void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag); +void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag); +void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag); + +int getShapeTypeSize(int shapeType); +void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU); +void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType); +void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); +void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); + + +#define USE_BRANCHFREE_TEST 1 +#ifdef USE_BRANCHFREE_TEST +SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) +{ +#if defined(__CELLOS_LV2__) && defined (__SPU__) + vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0}; + vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0}; + vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax); + return spu_extract(spu_gather(isGt),0)==0; + +#else + return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) + & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) + & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])), + 1, 0); +#endif +} +#else + +SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) +{ + unsigned int overlap = 1; + overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap; + overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap; + overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap; + return overlap; +} +#endif + +void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex); + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp new file mode 100644 index 00000000000..8e540d9297b --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp @@ -0,0 +1,242 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuContactResult.h" + +//#define DEBUG_SPU_COLLISION_DETECTION 1 + +#ifdef DEBUG_SPU_COLLISION_DETECTION +#ifndef __SPU__ +#include +#define spu_printf printf +#endif +#endif DEBUG_SPU_COLLISION_DETECTION + +SpuContactResult::SpuContactResult() +{ + m_manifoldAddress = 0; + m_spuManifold = NULL; + m_RequiresWriteBack = false; +} + + SpuContactResult::~SpuContactResult() +{ + g_manifoldDmaExport.swapBuffers(); +} + + ///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback; +inline btScalar calculateCombinedFriction(btScalar friction0,btScalar friction1) +{ + btScalar friction = friction0*friction1; + + const btScalar MAX_FRICTION = btScalar(10.); + + if (friction < -MAX_FRICTION) + friction = -MAX_FRICTION; + if (friction > MAX_FRICTION) + friction = MAX_FRICTION; + return friction; + +} + +inline btScalar calculateCombinedRestitution(btScalar restitution0,btScalar restitution1) +{ + return restitution0*restitution1; +} + + + + void SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped) + { + //spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress); + m_rootWorldTransform0 = worldTrans0; + m_rootWorldTransform1 = worldTrans1; + m_manifoldAddress = manifoldAddress; + m_spuManifold = spuManifold; + + m_combinedFriction = calculateCombinedFriction(friction0,friction1); + m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1); + m_isSwapped = isSwapped; + } + + void SpuContactResult::setShapeIdentifiersA(int partId0,int index0) + { + + } + + void SpuContactResult::setShapeIdentifiersB(int partId1,int index1) + { + + } + + + + ///return true if it requires a dma transfer back +bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld, + const btVector3& pointInWorld, + float depth, + btPersistentManifold* manifoldPtr, + btTransform& transA, + btTransform& transB, + btScalar combinedFriction, + btScalar combinedRestitution, + bool isSwapped) +{ + +// float contactTreshold = manifoldPtr->getContactBreakingThreshold(); + + //spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr); + +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: contactTreshold %f\n",contactTreshold); +#endif //DEBUG_SPU_COLLISION_DETECTION + if (depth > manifoldPtr->getContactBreakingThreshold()) + return false; + + btVector3 pointA; + btVector3 localA; + btVector3 localB; + btVector3 normal; + + + if (isSwapped) + { + normal = normalOnBInWorld * -1; + pointA = pointInWorld + normal * depth; + localA = transA.invXform(pointA ); + localB = transB.invXform(pointInWorld); + } + else + { + normal = normalOnBInWorld; + pointA = pointInWorld + normal * depth; + localA = transA.invXform(pointA ); + localB = transB.invXform(pointInWorld); + } + + btManifoldPoint newPt(localA,localB,normal,depth); + newPt.m_positionWorldOnA = pointA; + newPt.m_positionWorldOnB = pointInWorld; + + newPt.m_combinedFriction = combinedFriction; + newPt.m_combinedRestitution = combinedRestitution; + + + int insertIndex = manifoldPtr->getCacheEntry(newPt); + if (insertIndex >= 0) + { + // we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc) + manifoldPtr->replaceContactPoint(newPt,insertIndex); + return true; + + } else + { + + /* + ///@todo: SPU callbacks, either immediate (local on the SPU), or deferred + //User can override friction and/or restitution + if (gContactAddedCallback && + //and if either of the two bodies requires custom material + ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) || + (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback))) + { + //experimental feature info, for per-triangle material etc. + (*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1); + } + */ + manifoldPtr->addManifoldPoint(newPt); + return true; + + } + return false; + +} + + +void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold) +{ + ///only write back the contact information on SPU. Other platforms avoid copying, and use the data in-place + ///see SpuFakeDma.cpp 'cellDmaLargeGetReadOnly' +#if defined (__SPU__) || defined (USE_LIBSPE2) + memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold)); + + g_manifoldDmaExport.swapBuffers(); + ppu_address_t mmAddr = (ppu_address_t)mmManifold; + g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9)); + // Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon? + //no, the swapBuffers does the wait +#endif +} + +void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) +{ +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth); + spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ()); + spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ()); +#endif //DEBUG_SPU_COLLISION_DETECTION + + +#ifdef DEBUG_SPU_COLLISION_DETECTION + // int sman = sizeof(rage::phManifold); +// spu_printf("sizeof_manifold = %i\n",sman); +#endif //DEBUG_SPU_COLLISION_DETECTION + + btPersistentManifold* localManifold = m_spuManifold; + + btVector3 normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ()); + btVector3 pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ()); + + //process the contact point + const bool retVal = ManifoldResultAddContactPoint(normalB, + pointWrld, + depth, + localManifold, + m_rootWorldTransform0, + m_rootWorldTransform1, + m_combinedFriction, + m_combinedRestitution, + m_isSwapped); + m_RequiresWriteBack = m_RequiresWriteBack || retVal; +} + +void SpuContactResult::flush() +{ + + if (m_spuManifold && m_spuManifold->getNumContacts()) + { + m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1); + m_RequiresWriteBack = true; + } + + + if (m_RequiresWriteBack) + { +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n"); + spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts()); + spu_printf("Manifold address: %llu\n", m_manifoldAddress); +#endif //DEBUG_SPU_COLLISION_DETECTION + // spu_printf("writeDoubleBufferedManifold\n"); + writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress); +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: Finished (Put) DMA\n"); +#endif //DEBUG_SPU_COLLISION_DETECTION + } + m_spuManifold = NULL; + m_RequiresWriteBack = false; +} + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h new file mode 100644 index 00000000000..394f56dcbd1 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h @@ -0,0 +1,106 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_CONTACT_RESULT2_H +#define SPU_CONTACT_RESULT2_H + + +#ifndef _WIN32 +#include +#endif + + + +#include "../SpuDoubleBuffer.h" + + +#include "LinearMath/btTransform.h" + + +#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h" +#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h" + +class btCollisionShape; + + +struct SpuCollisionPairInput +{ + ppu_address_t m_collisionShapes[2]; + btCollisionShape* m_spuCollisionShapes[2]; + + ppu_address_t m_persistentManifoldPtr; + btVector3 m_primitiveDimensions0; + btVector3 m_primitiveDimensions1; + int m_shapeType0; + int m_shapeType1; + float m_collisionMargin0; + float m_collisionMargin1; + + btTransform m_worldTransform0; + btTransform m_worldTransform1; + + bool m_isSwapped; + bool m_useEpa; +}; + + +struct SpuClosestPointInput : public btDiscreteCollisionDetectorInterface::ClosestPointInput +{ + struct SpuConvexPolyhedronVertexData* m_convexVertexData[2]; +}; + +///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed +///So when an existing contact point is duplicated, no transfer/refresh is performed. +class SpuContactResult : public btDiscreteCollisionDetectorInterface::Result +{ + btTransform m_rootWorldTransform0; + btTransform m_rootWorldTransform1; + ppu_address_t m_manifoldAddress; + + btPersistentManifold* m_spuManifold; + bool m_RequiresWriteBack; + btScalar m_combinedFriction; + btScalar m_combinedRestitution; + + bool m_isSwapped; + + DoubleBuffer g_manifoldDmaExport; + + public: + SpuContactResult(); + virtual ~SpuContactResult(); + + btPersistentManifold* GetSpuManifold() const + { + return m_spuManifold; + } + + virtual void setShapeIdentifiersA(int partId0,int index0); + virtual void setShapeIdentifiersB(int partId1,int index1); + + void setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped); + + + void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold); + + virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth); + + void flush(); +}; + + + +#endif //SPU_CONTACT_RESULT2_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h new file mode 100644 index 00000000000..449f19288c4 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h @@ -0,0 +1,51 @@ + +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef SPU_CONVEX_PENETRATION_DEPTH_H +#define SPU_CONVEX_PENETRATION_DEPTH_H + + + +class btStackAlloc; +class btIDebugDraw; +#include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h" + +#include "LinearMath/btTransform.h" + + +///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation. +class SpuConvexPenetrationDepthSolver : public btConvexPenetrationDepthSolver +{ +public: + + virtual ~SpuConvexPenetrationDepthSolver() {}; + virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver, + void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB, + btTransform& transA,const btTransform& transB, + btVector3& v, btVector3& pa, btVector3& pb, + class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc, + struct SpuConvexPolyhedronVertexData* convexVertexDataA, + struct SpuConvexPolyhedronVertexData* convexVertexDataB + ) const = 0; + + +}; + + + +#endif //SPU_CONVEX_PENETRATION_DEPTH_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp new file mode 100644 index 00000000000..c3dfaa793e3 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp @@ -0,0 +1,1381 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuGatheringCollisionTask.h" + +//#define DEBUG_SPU_COLLISION_DETECTION 1 +#include "../SpuDoubleBuffer.h" + +#include "../SpuCollisionTaskProcess.h" +#include "../SpuGatheringCollisionDispatcher.h" //for SPU_BATCHSIZE_BROADPHASE_PAIRS + +#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" +#include "../SpuContactManifoldCollisionAlgorithm.h" +#include "BulletCollision/CollisionDispatch/btCollisionObject.h" +#include "SpuContactResult.h" +#include "BulletCollision/CollisionShapes/btOptimizedBvh.h" +#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" +#include "BulletCollision/CollisionShapes/btSphereShape.h" +#include "BulletCollision/CollisionShapes/btConvexPointCloudShape.h" + +#include "BulletCollision/CollisionShapes/btCapsuleShape.h" + +#include "BulletCollision/CollisionShapes/btConvexShape.h" +#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h" +#include "BulletCollision/CollisionShapes/btConvexHullShape.h" +#include "BulletCollision/CollisionShapes/btCompoundShape.h" + +#include "SpuMinkowskiPenetrationDepthSolver.h" +//#include "SpuEpaPenetrationDepthSolver.h" +#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h" + + +#include "boxBoxDistance.h" +#include "BulletMultiThreaded/vectormath2bullet.h" +#include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData +#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h" +#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h" +#include "BulletCollision/CollisionShapes/btTriangleShape.h" + +#ifdef __SPU__ +///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases +#ifndef USE_LIBSPE2 +#define USE_SOFTWARE_CACHE 1 +#endif +#endif //__SPU__ + +int gSkippedCol = 0; +int gProcessedCol = 0; + +//////////////////////////////////////////////// +/// software caching +#if USE_SOFTWARE_CACHE +#include +#include +#include +#include +#define SPE_CACHE_NWAY 4 +//#define SPE_CACHE_NSETS 32, 16 +#define SPE_CACHE_NSETS 8 +//#define SPE_CACHELINE_SIZE 512 +#define SPE_CACHELINE_SIZE 128 +#define SPE_CACHE_SET_TAGID(set) 15 +///make sure that spe_cache.h is below those defines! +#include "../Extras/software_cache/cache/include/spe_cache.h" + + +int g_CacheMisses=0; +int g_CacheHits=0; + +#if 0 // Added to allow cache misses and hits to be tracked, change this to 1 to restore unmodified version +#define spe_cache_read(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1) +#else +#define spe_cache_read(ea) \ +({ \ + int set, idx, line, byte; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + \ + if (btUnlikely(idx < 0)) { \ + ++g_CacheMisses; \ + idx = _spe_cache_miss_(ea, set, -1); \ + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ + } \ + else \ + { \ + ++g_CacheHits; \ + } \ + line = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + (void *) &spe_cache_mem[line + byte]; \ +}) + +#endif + +#endif // USE_SOFTWARE_CACHE + +bool gUseEpa = false; + +#ifdef USE_SN_TUNER +#include +#endif //USE_SN_TUNER + +#if defined (__SPU__) && !defined (USE_LIBSPE2) +#include +#elif defined (USE_LIBSPE2) +#define spu_printf(a) +#else +#define IGNORE_ALIGNMENT 1 +#include +#include +#define spu_printf printf + +#endif + +//int gNumConvexPoints0=0; + +///Make sure no destructors are called on this memory +struct CollisionTask_LocalStoreMemory +{ + ///This CollisionTask_LocalStoreMemory is mainly used for the SPU version, using explicit DMA + ///Other platforms can use other memory programming models. + + ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairsBuffer[SPU_BATCHSIZE_BROADPHASE_PAIRS]); + DoubleBuffer g_workUnitTaskBuffers; + ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgoBuffer [sizeof(SpuContactManifoldCollisionAlgorithm)+16]); + ATTRIBUTE_ALIGNED16(char gColObj0Buffer [sizeof(btCollisionObject)+16]); + ATTRIBUTE_ALIGNED16(char gColObj1Buffer [sizeof(btCollisionObject)+16]); + ///we reserve 32bit integer indices, even though they might be 16bit + ATTRIBUTE_ALIGNED16(int spuIndices[16]); + btPersistentManifold gPersistentManifoldBuffer; + CollisionShape_LocalStoreMemory gCollisionShapes[2]; + bvhMeshShape_LocalStoreMemory bvhShapeData; + SpuConvexPolyhedronVertexData convexVertexData[2]; + CompoundShape_LocalStoreMemory compoundShapeData[2]; + + ///The following pointers might either point into this local store memory, or to the original/other memory locations. + ///See SpuFakeDma for implementation of cellDmaSmallGetReadOnly. + btCollisionObject* m_lsColObj0Ptr; + btCollisionObject* m_lsColObj1Ptr; + btBroadphasePair* m_pairsPointer; + btPersistentManifold* m_lsManifoldPtr; + SpuContactManifoldCollisionAlgorithm* m_lsCollisionAlgorithmPtr; + + bool needsDmaPutContactManifoldAlgo; + + btCollisionObject* getColObj0() + { + return m_lsColObj0Ptr; + } + btCollisionObject* getColObj1() + { + return m_lsColObj1Ptr; + } + + + btBroadphasePair* getBroadphasePairPtr() + { + return m_pairsPointer; + } + + SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() + { + return m_lsCollisionAlgorithmPtr; + } + + btPersistentManifold* getContactManifoldPtr() + { + return m_lsManifoldPtr; + } +}; + + +#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) + +ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory gLocalStoreMemory); + +void* createCollisionLocalStoreMemory() +{ + return &gLocalStoreMemory; +} +#else +void* createCollisionLocalStoreMemory() +{ + return new CollisionTask_LocalStoreMemory; +} + +#endif + +void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts); + + +SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) +{ +#if USE_SOFTWARE_CACHE + // Check for alignment requirements. We need to make sure the entire request fits within one cache line, + // so the first and last bytes should fall on the same cache line + btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); + + void* ls = spe_cache_read(ea); + memcpy(buffer, ls, size); +#else + stallingUnalignedDmaSmallGet(buffer,ea,size); +#endif +} + +SIMD_FORCE_INLINE void small_cache_read_triple( void* ls0, ppu_address_t ea0, + void* ls1, ppu_address_t ea1, + void* ls2, ppu_address_t ea2, + size_t size) +{ + btAssert(size<16); + ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); + ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); + ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); + + uint32_t i; + + + ///make sure last 4 bits are the same, for cellDmaSmallGet + char* localStore0 = (char*)ls0; + uint32_t last4BitsOffset = ea0 & 0x0f; + char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; +#ifdef __SPU__ + cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0); +#else + tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); +#endif + + + char* localStore1 = (char*)ls1; + last4BitsOffset = ea1 & 0x0f; + char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; +#ifdef __SPU__ + cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0); +#else + tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); +#endif + + char* localStore2 = (char*)ls2; + last4BitsOffset = ea2 & 0x0f; + char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; +#ifdef __SPU__ + cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0); +#else + tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); +#endif + + + cellDmaWaitTagStatusAll( DMA_MASK(1) ); + + //this is slowish, perhaps memcpy on SPU is smarter? + for (i=0; btLikely( ibvhShapeData.gIndexMesh.m_indexType == PHY_SHORT) + { + unsigned short int* indexBasePtr = (unsigned short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); + ATTRIBUTE_ALIGNED16(unsigned short int tmpIndices[3]); + + small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0], + &tmpIndices[1],(ppu_address_t)&indexBasePtr[1], + &tmpIndices[2],(ppu_address_t)&indexBasePtr[2], + sizeof(unsigned short int)); + + m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]); + m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]); + m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]); + } else + { + unsigned int* indexBasePtr = (unsigned int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); + + small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], + &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], + &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], + sizeof(int)); + } + + // spu_printf("SPU index0=%d ,",spuIndices[0]); + // spu_printf("SPU index1=%d ,",spuIndices[1]); + // spu_printf("SPU index2=%d ,",spuIndices[2]); + // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); + + const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); + for (int j=2;btLikely( j>=0 );j--) + { + int graphicsindex = m_lsMemPtr->spuIndices[j]; + + // spu_printf("SPU index=%d ,",graphicsindex); + btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); + // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); + + + ///handle un-aligned vertices... + + //another DMA for each vertex + small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], + &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], + &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], + sizeof(btScalar)); + + m_tmpTriangleShape.getVertexPtr(j).setValue(spuUnscaledVertex[0]*meshScaling.getX(), + spuUnscaledVertex[1]*meshScaling.getY(), + spuUnscaledVertex[2]*meshScaling.getZ()); + + // spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); + } + + + SpuCollisionPairInput triangleConcaveInput(*m_wuInput); +// triangleConcaveInput.m_spuCollisionShapes[1] = &spuTriangleVertices[0]; + triangleConcaveInput.m_spuCollisionShapes[1] = &m_tmpTriangleShape; + triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE; + + m_spuContacts.setShapeIdentifiersB(subPart,triangleIndex); + + // m_spuContacts.flush(); + + ProcessSpuConvexConvexCollision(&triangleConcaveInput, m_lsMemPtr,m_spuContacts); + ///this flush should be automatic + // m_spuContacts.flush(); + } + +}; + + + +void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts) +{ + + btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0]; + btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1]; + + bool hasCollision = false; + const btVector3& planeNormal = planeShape->getPlaneNormal(); + const btScalar& planeConstant = planeShape->getPlaneConstant(); + + + btTransform convexWorldTransform = wuInput->m_worldTransform0; + btTransform convexInPlaneTrans; + convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform; + btTransform planeInConvex; + planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1; + + //btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal); + btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal); + + btVector3 vtxInPlane = convexInPlaneTrans(vtx); + btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant); + + btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal; + btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected; + + hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold(); + //resultOut->setPersistentManifold(m_manifoldPtr); + if (hasCollision) + { + /// report a contact. internally this will be kept persistent, and contact reduction is done + btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal; + btVector3 pOnB = vtxInPlaneWorld; + spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance); + } +} + +void ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) +{ + + register int dmaSize = 0; + register ppu_address_t dmaPpuAddress2; + btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr; + + ///DMA in the vertices for convex shapes + ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); + ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]); + + if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + // spu_printf("SPU: DMA btConvexHullShape\n"); + + dmaSize = sizeof(btConvexHullShape); + dmaPpuAddress2 = wuInput->m_collisionShapes[0]; + + cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + // spu_printf("SPU: DMA btConvexHullShape\n"); + dmaSize = sizeof(btConvexHullShape); + dmaPpuAddress2 = wuInput->m_collisionShapes[1]; + cellDmaGet(&convexHullShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0); + lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0]; + } + + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1); + lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1]; + } + + + btConvexPointCloudShape cpc0,cpc1; + + if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(2)); + lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0]; + btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0]; + const btVector3& localScaling = ch->getLocalScalingNV(); + cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling); + wuInput->m_spuCollisionShapes[0] = &cpc0; + } + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(2)); + lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0]; + btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1]; + const btVector3& localScaling = ch->getLocalScalingNV(); + cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling); + wuInput->m_spuCollisionShapes[1] = &cpc1; + + } + + +// const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0]; +// const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1]; +// int shapeType0 = wuInput->m_shapeType0; +// int shapeType1 = wuInput->m_shapeType1; + float marginA = wuInput->m_collisionMargin0; + float marginB = wuInput->m_collisionMargin1; + + SpuClosestPointInput cpInput; + cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0]; + cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1]; + cpInput.m_transformA = wuInput->m_worldTransform0; + cpInput.m_transformB = wuInput->m_worldTransform1; + float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold()); + cpInput.m_maximumDistanceSquared = sumMargin * sumMargin; + + ppu_address_t manifoldAddress = (ppu_address_t)manifold; + + btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr(); + //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped); + spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(), + lsMemPtr->getColObj1()->getWorldTransform(), + lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(), + lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(), + wuInput->m_isSwapped); + + + btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts); + + + + +} + + + + +//////////////////////// +/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc) +/////////////////// +void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) +{ + //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite + + btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1]; + //need the mesh interface, for access to triangle vertices + dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape); + + btVector3 aabbMin(-1,-400,-1); + btVector3 aabbMax(1,400,1); + + + //recalc aabbs + btTransform convexInTriangleSpace; + convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0; + btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0]; + + computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace); + + + //CollisionShape* triangleShape = static_cast(triBody->m_collisionShape); + //convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax); + + // btScalar extraMargin = collisionMarginTriangle; + // btVector3 extra(extraMargin,extraMargin,extraMargin); + // aabbMax += extra; + // aabbMin -= extra; + + ///quantize query AABB + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); + + QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); + //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); + + BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); + + + spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts); + IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); + //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); + + // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); + //not likely to happen + if (subTrees.size() && indexArray.size() == 1) + { + ///DMA in the index info + dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + //display the headers + int numBatch = subTrees.size(); + for (int i=0;ibvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + // spu_printf("nextBatch = %d\n",nextBatch); + + for (int j=0;jbvhShapeData.gSubtreeHeaders[j]; + + unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap) + { + btAssert(subtree.m_subtreeSize); + + //dma the actual nodes of this subtree + dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); + cellDmaWaitTagStatusAll(DMA_MASK(2)); + + /* Walk this subtree */ + spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, + &lsMemPtr->bvhShapeData.gSubtreeNodes[0], + 0, + subtree.m_subtreeSize); + } + // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); + } + + // unsigned short int m_quantizedAabbMin[3]; + // unsigned short int m_quantizedAabbMax[3]; + // int m_rootNodeIndex; + // int m_subtreeSize; + i+=nextBatch; + } + + //pre-fetch first tree, then loop and double buffer + } + +} + + +int stats[11]={0,0,0,0,0,0,0,0,0,0,0}; +int degenerateStats[11]={0,0,0,0,0,0,0,0,0,0,0}; + + +//////////////////////// +/// Convex versus Convex collision detection (handles collision between sphere, box, cylinder, triangle, cone, convex polyhedron etc) +/////////////////// +void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) +{ + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + +#ifdef DEBUG_SPU_COLLISION_DETECTION + //spu_printf("SPU: ProcessSpuConvexConvexCollision\n"); +#endif //DEBUG_SPU_COLLISION_DETECTION + //CollisionShape* shape0 = (CollisionShape*)wuInput->m_collisionShapes[0]; + //CollisionShape* shape1 = (CollisionShape*)wuInput->m_collisionShapes[1]; + btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr; + + bool genericGjk = true; + + if (genericGjk) + { + //try generic GJK + + + + //SpuConvexPenetrationDepthSolver* penetrationSolver=0; + btVoronoiSimplexSolver simplexSolver; + btGjkEpaPenetrationDepthSolver epaPenetrationSolver2; + + btConvexPenetrationDepthSolver* penetrationSolver = &epaPenetrationSolver2; + + //SpuMinkowskiPenetrationDepthSolver minkowskiPenetrationSolver; +#ifdef ENABLE_EPA + if (gUseEpa) + { + penetrationSolver = &epaPenetrationSolver2; + } else +#endif + { + //penetrationSolver = &minkowskiPenetrationSolver; + } + + + ///DMA in the vertices for convex shapes + ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); + ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]); + + if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + // spu_printf("SPU: DMA btConvexHullShape\n"); + + dmaSize = sizeof(btConvexHullShape); + dmaPpuAddress2 = wuInput->m_collisionShapes[0]; + + cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + // spu_printf("SPU: DMA btConvexHullShape\n"); + dmaSize = sizeof(btConvexHullShape); + dmaPpuAddress2 = wuInput->m_collisionShapes[1]; + cellDmaGet(&convexHullShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + //cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + + if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0); + lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0]; + } + + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1); + lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1]; + } + + + btConvexPointCloudShape cpc0,cpc1; + + if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(2)); + lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0]; + btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0]; + const btVector3& localScaling = ch->getLocalScalingNV(); + cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling); + wuInput->m_spuCollisionShapes[0] = &cpc0; + } + + if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) + { + cellDmaWaitTagStatusAll(DMA_MASK(2)); + lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0]; + btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1]; + const btVector3& localScaling = ch->getLocalScalingNV(); + cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling); + wuInput->m_spuCollisionShapes[1] = &cpc1; + + } + + + const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0]; + const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1]; + int shapeType0 = wuInput->m_shapeType0; + int shapeType1 = wuInput->m_shapeType1; + float marginA = wuInput->m_collisionMargin0; + float marginB = wuInput->m_collisionMargin1; + + SpuClosestPointInput cpInput; + cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0]; + cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1]; + cpInput.m_transformA = wuInput->m_worldTransform0; + cpInput.m_transformB = wuInput->m_worldTransform1; + float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold()); + cpInput.m_maximumDistanceSquared = sumMargin * sumMargin; + + ppu_address_t manifoldAddress = (ppu_address_t)manifold; + + btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr(); + //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped); + spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(), + lsMemPtr->getColObj1()->getWorldTransform(), + lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(), + lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(), + wuInput->m_isSwapped); + + { + btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver); + gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw); + + stats[gjk.m_lastUsedMethod]++; + degenerateStats[gjk.m_degenerateSimplex]++; + +#ifdef USE_SEPDISTANCE_UTIL + btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold(); + lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1); + lsMemPtr->needsDmaPutContactManifoldAlgo = true; +#endif //USE_SEPDISTANCE_UTIL + + } + + } + + +} + + +template void DoSwap(T& a, T& b) +{ + char tmp[sizeof(T)]; + memcpy(tmp, &a, sizeof(T)); + memcpy(&a, &b, sizeof(T)); + memcpy(&b, tmp, sizeof(T)); +} + +SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem) +{ + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + + dmaSize = sizeof(btCollisionObject);//btTransform); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0(); + lsMem.m_lsColObj0Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj0Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + + dmaSize = sizeof(btCollisionObject);//btTransform); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1(); + lsMem.m_lsColObj1Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj1Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + btCollisionObject* ob0 = lsMem.getColObj0(); + btCollisionObject* ob1 = lsMem.getColObj1(); + + collisionPairInput.m_worldTransform0 = ob0->getWorldTransform(); + collisionPairInput.m_worldTransform1 = ob1->getWorldTransform(); +} + + + +void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem, + SpuContactResult &spuContacts, + ppu_address_t collisionShape0Ptr, void* collisionShape0Loc, + ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true) +{ + + if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) + && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) + { + if (dmaShapes) + { + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + } + + btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc; + btConvexInternalShape* spuConvexShape1 = (btConvexInternalShape*)collisionShape1Loc; + + btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions(); + btVector3 dim1 = spuConvexShape1->getImplicitShapeDimensions(); + + collisionPairInput.m_primitiveDimensions0 = dim0; + collisionPairInput.m_primitiveDimensions1 = dim1; + collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr; + collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr; + collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0; + collisionPairInput.m_spuCollisionShapes[1] = spuConvexShape1; + ProcessSpuConvexConvexCollision(&collisionPairInput,&lsMem,spuContacts); + } + else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) && + btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1)) + { + //snPause(); + + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + // Both are compounds, do N^2 CD for now + ///@todo: add some AABB-based pruning (probably not -> slower) + + btCompoundShape* spuCompoundShape0 = (btCompoundShape*)collisionShape0Loc; + btCompoundShape* spuCompoundShape1 = (btCompoundShape*)collisionShape1Loc; + + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape0, 1); + dmaCompoundShapeInfo (&lsMem.compoundShapeData[1], spuCompoundShape1, 2); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + + dmaCompoundSubShapes (&lsMem.compoundShapeData[0], spuCompoundShape0, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaCompoundSubShapes (&lsMem.compoundShapeData[1], spuCompoundShape1, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + int childShapeCount0 = spuCompoundShape0->getNumChildShapes(); + int childShapeCount1 = spuCompoundShape1->getNumChildShapes(); + + // Start the N^2 + for (int i = 0; i < childShapeCount0; ++i) + { + btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i]; + btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType)); + + for (int j = 0; j < childShapeCount1; ++j) + { + btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j]; + btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType)); + + + /* Create a new collision pair input struct using the two child shapes */ + SpuCollisionPairInput cinput (collisionPairInput); + + cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform; + cinput.m_shapeType0 = childShape0.m_childShapeType; + cinput.m_collisionMargin0 = childShape0.m_childMargin; + + cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform; + cinput.m_shapeType1 = childShape1.m_childShapeType; + cinput.m_collisionMargin1 = childShape1.m_childMargin; + /* Recursively call handleCollisionPair () with new collision pair input */ + + handleCollisionPair(cinput, lsMem, spuContacts, + (ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], + (ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false); + } + } + } + else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) ) + { + //snPause(); + + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + // object 0 compound, object 1 non-compound + btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape0Loc; + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + int childShapeCount = spuCompoundShape->getNumChildShapes(); + + for (int i = 0; i < childShapeCount; ++i) + { + btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i]; + btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType)); + // Dma the child shape + dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + SpuCollisionPairInput cinput (collisionPairInput); + cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform; + cinput.m_shapeType0 = childShape.m_childShapeType; + cinput.m_collisionMargin0 = childShape.m_childMargin; + + handleCollisionPair(cinput, lsMem, spuContacts, + (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], + collisionShape1Ptr, collisionShape1Loc, false); + } + } + else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1) ) + { + //snPause(); + + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + // object 0 non-compound, object 1 compound + btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape1Loc; + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + int childShapeCount = spuCompoundShape->getNumChildShapes(); + + for (int i = 0; i < childShapeCount; ++i) + { + btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i]; + btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType)); + // Dma the child shape + dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + SpuCollisionPairInput cinput (collisionPairInput); + cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform; + cinput.m_shapeType1 = childShape.m_childShapeType; + cinput.m_collisionMargin1 = childShape.m_childMargin; + handleCollisionPair(cinput, lsMem, spuContacts, + collisionShape0Ptr, collisionShape0Loc, + (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], false); + } + + } + else + { + //a non-convex shape is involved + bool handleConvexConcave = false; + + //snPause(); + + if (btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType0) && + btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) + { + // Swap stuff + DoSwap(collisionShape0Ptr, collisionShape1Ptr); + DoSwap(collisionShape0Loc, collisionShape1Loc); + DoSwap(collisionPairInput.m_shapeType0, collisionPairInput.m_shapeType1); + DoSwap(collisionPairInput.m_worldTransform0, collisionPairInput.m_worldTransform1); + DoSwap(collisionPairInput.m_collisionMargin0, collisionPairInput.m_collisionMargin1); + + collisionPairInput.m_isSwapped = true; + } + + if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)&& + btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType1)) + { + handleConvexConcave = true; + } + if (handleConvexConcave) + { + if (dmaShapes) + { + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + } + + if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE) + { + btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc; + btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc; + + btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions(); + collisionPairInput.m_primitiveDimensions0 = dim0; + collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr; + collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr; + collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0; + collisionPairInput.m_spuCollisionShapes[1] = planeShape; + + ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts); + } else + { + btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc; + btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc; + + btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions(); + collisionPairInput.m_primitiveDimensions0 = dim0; + collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr; + collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr; + collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0; + collisionPairInput.m_spuCollisionShapes[1] = trimeshShape; + + ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts); + } + } + + } + + spuContacts.flush(); + +} + + +void processCollisionTask(void* userPtr, void* lsMemPtr) +{ + + SpuGatherAndProcessPairsTaskDesc* taskDescPtr = (SpuGatherAndProcessPairsTaskDesc*)userPtr; + SpuGatherAndProcessPairsTaskDesc& taskDesc = *taskDescPtr; + CollisionTask_LocalStoreMemory* colMemPtr = (CollisionTask_LocalStoreMemory*)lsMemPtr; + CollisionTask_LocalStoreMemory& lsMem = *(colMemPtr); + + gUseEpa = taskDesc.m_useEpa; + + // spu_printf("taskDescPtr=%llx\n",taskDescPtr); + + SpuContactResult spuContacts; + + //////////////////// + + ppu_address_t dmaInPtr = taskDesc.m_inPairPtr; + unsigned int numPages = taskDesc.numPages; + unsigned int numOnLastPage = taskDesc.numOnLastPage; + + // prefetch first set of inputs and wait + lsMem.g_workUnitTaskBuffers.init(); + + unsigned int nextNumOnPage = (numPages > 1)? MIDPHASE_NUM_WORKUNITS_PER_PAGE : numOnLastPage; + lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3)); + dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE; + + + register unsigned char *inputPtr; + register unsigned int numOnPage; + register unsigned int j; + SpuGatherAndProcessWorkUnitInput* wuInputs; + register int dmaSize; + register ppu_address_t dmaPpuAddress; + register ppu_address_t dmaPpuAddress2; + + int numPairs; + register int p; + SpuCollisionPairInput collisionPairInput; + + for (unsigned int i = 0; btLikely(i < numPages); i++) + { + + // wait for back buffer dma and swap buffers + inputPtr = lsMem.g_workUnitTaskBuffers.swapBuffers(); + + // number on current page is number prefetched last iteration + numOnPage = nextNumOnPage; + + + // prefetch next set of inputs +#if MIDPHASE_NUM_WORKUNIT_PAGES > 2 + if ( btLikely( i < numPages-1 ) ) +#else + if ( btUnlikely( i < numPages-1 ) ) +#endif + { + nextNumOnPage = (i == numPages-2)? numOnLastPage : MIDPHASE_NUM_WORKUNITS_PER_PAGE; + lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3)); + dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE; + } + + wuInputs = reinterpret_cast(inputPtr); + + + for (j = 0; btLikely( j < numOnPage ); j++) + { +#ifdef DEBUG_SPU_COLLISION_DETECTION + // printMidphaseInput(&wuInputs[j]); +#endif //DEBUG_SPU_COLLISION_DETECTION + + + numPairs = wuInputs[j].m_endIndex - wuInputs[j].m_startIndex; + + if ( btLikely( numPairs ) ) + { + dmaSize = numPairs*sizeof(btBroadphasePair); + dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair); + lsMem.m_pairsPointer = (btBroadphasePair*)cellDmaGetReadOnly(&lsMem.gBroadphasePairsBuffer, dmaPpuAddress , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + for (p=0;pm_userInfo = %d\n",pair.m_userInfo); + spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm); + spu_printf("pair->m_pProxy0 = %d\n",pair.m_pProxy0); + spu_printf("pair->m_pProxy1 = %d\n",pair.m_pProxy1); +#endif //DEBUG_SPU_COLLISION_DETECTION + + if (pair.m_internalTmpValue == 2 && pair.m_algorithm && pair.m_pProxy0 && pair.m_pProxy1) + { + dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm); + dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; + lsMem.m_lsCollisionAlgorithmPtr = (SpuContactManifoldCollisionAlgorithm*)cellDmaGetReadOnly(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + lsMem.needsDmaPutContactManifoldAlgo = false; + + collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr(); + collisionPairInput.m_isSwapped = false; + + if (1) + { + + ///can wait on the combined DMA_MASK, or dma on the same tag + + +#ifdef DEBUG_SPU_COLLISION_DETECTION + // spu_printf("SPU collisionPairInput->m_shapeType0 = %d\n",collisionPairInput->m_shapeType0); + // spu_printf("SPU collisionPairInput->m_shapeType1 = %d\n",collisionPairInput->m_shapeType1); +#endif //DEBUG_SPU_COLLISION_DETECTION + + + dmaSize = sizeof(btPersistentManifold); + + dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr; + lsMem.m_lsManifoldPtr = (btPersistentManifold*)cellDmaGetReadOnly(&lsMem.gPersistentManifoldBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + + collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0(); + collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1(); + collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + + + + //??cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + if (1) + { + //snPause(); + + // Get the collision objects + dmaAndSetupCollisionObjects(collisionPairInput, lsMem); + + if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive()) + { + + lsMem.needsDmaPutContactManifoldAlgo = true; +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1); +#endif //USE_SEPDISTANCE_UTIL + +#define USE_DEDICATED_BOX_BOX 1 +#ifdef USE_DEDICATED_BOX_BOX + bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&& + (lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE)); + if (boxbox) + { + //spu_printf("boxbox dist = %f\n",distance); + btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr(); + btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; + ppu_address_t manifoldAddress = (ppu_address_t)manifold; + + spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(), + lsMem.getColObj1()->getWorldTransform(), + lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(), + lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(), + collisionPairInput.m_isSwapped); + + + //float distance=0.f; + btVector3 normalInB; + + + if (//!gUseEpa && +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f +#else + 1 +#endif + ) + { +//#define USE_PE_BOX_BOX 1 +#ifdef USE_PE_BOX_BOX + { + + //getCollisionMargin0 + btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); + btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); + + Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); + Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); + Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()); + Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()); + Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()); + + Transform3 transformA(vmMatrix0,vmPos0); + Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ()); + Transform3 transformB(vmMatrix1,vmPos1); + BoxPoint resultClosestBoxPointA; + BoxPoint resultClosestBoxPointB; + Vector3 resultNormal; +#ifdef USE_SEPDISTANCE_UTIL + float distanceThreshold = FLT_MAX +#else + float distanceThreshold = 0.f; +#endif + + + distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold); + + normalInB = -getBtVector3(resultNormal); + + if(distance < spuManifold->getContactBreakingThreshold()) + { + btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint)); + + spuContacts.addContactPoint( + normalInB, + pointOnB, + distance); + } + } +#else + { + + btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0(); + btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1(); + btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0); + btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1); + + + btBoxShape box0(shapeDim0); + btBoxShape box1(shapeDim1); + + struct SpuBridgeContactCollector : public btDiscreteCollisionDetectorInterface::Result + { + SpuContactResult& m_spuContacts; + + virtual void setShapeIdentifiersA(int partId0,int index0) + { + m_spuContacts.setShapeIdentifiersA(partId0,index0); + } + virtual void setShapeIdentifiersB(int partId1,int index1) + { + m_spuContacts.setShapeIdentifiersB(partId1,index1); + } + virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) + { + m_spuContacts.addContactPoint(normalOnBInWorld,pointInWorld,depth); + } + + SpuBridgeContactCollector(SpuContactResult& spuContacts) + :m_spuContacts(spuContacts) + { + + } + }; + + SpuBridgeContactCollector bridgeOutput(spuContacts); + + btDiscreteCollisionDetectorInterface::ClosestPointInput input; + input.m_maximumDistanceSquared = BT_LARGE_FLOAT; + input.m_transformA = collisionPairInput.m_worldTransform0; + input.m_transformB = collisionPairInput.m_worldTransform1; + + btBoxBoxDetector detector(&box0,&box1); + + detector.getClosestPoints(input,bridgeOutput,0); + + } +#endif //USE_PE_BOX_BOX + + lsMem.needsDmaPutContactManifoldAlgo = true; +#ifdef USE_SEPDISTANCE_UTIL + btScalar sepDist2 = distance+spuManifold->getContactBreakingThreshold(); + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist2,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1); +#endif //USE_SEPDISTANCE_UTIL + gProcessedCol++; + } else + { + gSkippedCol++; + } + + spuContacts.flush(); + + + } else +#endif //USE_DEDICATED_BOX_BOX + { + if ( +#ifdef USE_SEPDISTANCE_UTIL + lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f +#else + 1 +#endif //USE_SEPDISTANCE_UTIL + ) + { + handleCollisionPair(collisionPairInput, lsMem, spuContacts, + (ppu_address_t)lsMem.getColObj0()->getRootCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, + (ppu_address_t)lsMem.getColObj1()->getRootCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape); + } else + { + //spu_printf("boxbox dist = %f\n",distance); + btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr(); + btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; + ppu_address_t manifoldAddress = (ppu_address_t)manifold; + + spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(), + lsMem.getColObj1()->getWorldTransform(), + lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(), + lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(), + collisionPairInput.m_isSwapped); + + spuContacts.flush(); + } + } + + } + + } + } + +#ifdef USE_SEPDISTANCE_UTIL +#if defined (__SPU__) || defined (USE_LIBSPE2) + if (lsMem.needsDmaPutContactManifoldAlgo) + { + dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm); + dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; + cellDmaLargePut(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } +#endif +#endif //#ifdef USE_SEPDISTANCE_UTIL + + } + } + } + } //end for (j = 0; j < numOnPage; j++) + + }// for + + + + return; +} + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h new file mode 100644 index 00000000000..bbaa555ee1b --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h @@ -0,0 +1,140 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_GATHERING_COLLISION_TASK_H +#define SPU_GATHERING_COLLISION_TASK_H + +#include "../PlatformDefinitions.h" +//#define DEBUG_SPU_COLLISION_DETECTION 1 + + +///Task Description for SPU collision detection +struct SpuGatherAndProcessPairsTaskDesc +{ + ppu_address_t m_inPairPtr;//m_pairArrayPtr; + //mutex variable + uint32_t m_someMutexVariableInMainMemory; + + ppu_address_t m_dispatcher; + + uint32_t numOnLastPage; + + uint16_t numPages; + uint16_t taskId; + bool m_useEpa; + + struct CollisionTask_LocalStoreMemory* m_lsMemory; +} + +#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) +__attribute__ ((aligned (128))) +#endif +; + + +void processCollisionTask(void* userPtr, void* lsMemory); + +void* createCollisionLocalStoreMemory(); + + +#if defined(USE_LIBSPE2) && defined(__SPU__) +#include "../SpuLibspe2Support.h" +#include +#include +#include + +//#define DEBUG_LIBSPE2_SPU_TASK + + + +int main(unsigned long long speid, addr64 argp, addr64 envp) +{ + printf("SPU: hello \n"); + + ATTRIBUTE_ALIGNED128(btSpuStatus status); + ATTRIBUTE_ALIGNED16( SpuGatherAndProcessPairsTaskDesc taskDesc ) ; + unsigned int received_message = Spu_Mailbox_Event_Nothing; + bool shutdown = false; + + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + status.m_status = Spu_Status_Free; + status.m_lsMemory.p = createCollisionLocalStoreMemory(); + + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + while ( btLikely( !shutdown ) ) + { + + received_message = spu_read_in_mbox(); + + if( btLikely( received_message == Spu_Mailbox_Event_Task )) + { +#ifdef DEBUG_LIBSPE2_SPU_TASK + printf("SPU: received Spu_Mailbox_Event_Task\n"); +#endif //DEBUG_LIBSPE2_SPU_TASK + + // refresh the status + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + btAssert(status.m_status==Spu_Status_Occupied); + + cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuGatherAndProcessPairsTaskDesc), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); +#ifdef DEBUG_LIBSPE2_SPU_TASK + printf("SPU:processCollisionTask\n"); +#endif //DEBUG_LIBSPE2_SPU_TASK + processCollisionTask((void*)&taskDesc, taskDesc.m_lsMemory); + +#ifdef DEBUG_LIBSPE2_SPU_TASK + printf("SPU:finished processCollisionTask\n"); +#endif //DEBUG_LIBSPE2_SPU_TASK + } + else + { +#ifdef DEBUG_LIBSPE2_SPU_TASK + printf("SPU: received ShutDown\n"); +#endif //DEBUG_LIBSPE2_SPU_TASK + if( btLikely( received_message == Spu_Mailbox_Event_Shutdown ) ) + { + shutdown = true; + } + else + { + //printf("SPU - Sth. recieved\n"); + } + } + + // set to status free and wait for next task + status.m_status = Spu_Status_Free; + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + } + + printf("SPU: shutdown\n"); + return 0; +} +#endif // USE_LIBSPE2 + + +#endif //SPU_GATHERING_COLLISION_TASK_H + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h new file mode 100644 index 00000000000..8b89de03f59 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h @@ -0,0 +1,19 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp new file mode 100644 index 00000000000..9f7e64dd1b3 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp @@ -0,0 +1,348 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuMinkowskiPenetrationDepthSolver.h" +#include "SpuContactResult.h" +#include "SpuPreferredPenetrationDirections.h" +#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h" +#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h" +#include "SpuCollisionShapes.h" + +#define NUM_UNITSPHERE_POINTS 42 +static btVector3 sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = +{ +btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)), +btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)), +btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)), +btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)), +btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)), +btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)), +btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)), +btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)), +btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)), +btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)), +btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)), +btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)), +btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)), +btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)), +btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)), +btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)), +btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)), +btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)), +btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)), +btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)), +btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)), +btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)), +btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)), +btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)), +btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)), +btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)), +btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)), +btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)), +btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)), +btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)), +btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)), +btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)), +btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)), +btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)), +btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)), +btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)), +btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)), +btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)), +btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)), +btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)), +btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)), +btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654)) +}; + + +bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( btSimplexSolverInterface& simplexSolver, + const btConvexShape* convexA,const btConvexShape* convexB, + const btTransform& transA,const btTransform& transB, + btVector3& v, btVector3& pa, btVector3& pb, + class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc) +{ +#if 0 + (void)stackAlloc; + (void)v; + + + struct btIntermediateResult : public SpuContactResult + { + + btIntermediateResult():m_hasResult(false) + { + } + + btVector3 m_normalOnBInWorld; + btVector3 m_pointInWorld; + btScalar m_depth; + bool m_hasResult; + + virtual void setShapeIdentifiersA(int partId0,int index0) + { + (void)partId0; + (void)index0; + } + + virtual void setShapeIdentifiersB(int partId1,int index1) + { + (void)partId1; + (void)index1; + } + void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) + { + m_normalOnBInWorld = normalOnBInWorld; + m_pointInWorld = pointInWorld; + m_depth = depth; + m_hasResult = true; + } + }; + + //just take fixed number of orientation, and sample the penetration depth in that direction + btScalar minProj = btScalar(BT_LARGE_FLOAT); + btVector3 minNorm(0.f,0.f,0.f); + btVector3 minVertex; + btVector3 minA,minB; + btVector3 seperatingAxisInA,seperatingAxisInB; + btVector3 pInA,qInB,pWorld,qWorld,w; + +//#define USE_BATCHED_SUPPORT 1 +#ifdef USE_BATCHED_SUPPORT + + btVector3 supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2]; + btVector3 supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2]; + btVector3 seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2]; + btVector3 seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2]; + int i; + + int numSampleDirections = NUM_UNITSPHERE_POINTS; + + for (i=0;igetNumPreferredPenetrationDirections(); + if (numPDA) + { + for (int i=0;igetPreferredPenetrationDirection(i,norm); + norm = transA.getBasis() * norm; + sPenetrationDirections[numSampleDirections] = norm; + seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis(); + seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis(); + numSampleDirections++; + } + } + } + + { + int numPDB = convexB->getNumPreferredPenetrationDirections(); + if (numPDB) + { + for (int i=0;igetPreferredPenetrationDirection(i,norm); + norm = transB.getBasis() * norm; + sPenetrationDirections[numSampleDirections] = norm; + seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis(); + seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis(); + numSampleDirections++; + } + } + } + + + + convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections); + convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections); + + for (i=0;ilocalGetSupportVertexWithoutMarginNonVirtual( seperatingAxisInA);//, NULL); + qInB = convexB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);//, NULL); + + // pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA); + // qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB); + + pWorld = transA(pInA); + qWorld = transB(qInB); + w = qWorld - pWorld; + btScalar delta = norm.dot(w); + //find smallest delta + if (delta < minProj) + { + minProj = delta; + minNorm = norm; + minA = pWorld; + minB = qWorld; + } + } +#endif //USE_BATCHED_SUPPORT + + //add the margins + + minA += minNorm*marginA; + minB -= minNorm*marginB; + //no penetration + if (minProj < btScalar(0.)) + return false; + + minProj += (marginA + marginB) + btScalar(1.00); + + + + + +//#define DEBUG_DRAW 1 +#ifdef DEBUG_DRAW + if (debugDraw) + { + btVector3 color(0,1,0); + debugDraw->drawLine(minA,minB,color); + color = btVector3 (1,1,1); + btVector3 vec = minB-minA; + btScalar prj2 = minNorm.dot(vec); + debugDraw->drawLine(minA,minA+(minNorm*minProj),color); + + } +#endif //DEBUG_DRAW + + + btGjkPairDetector gjkdet(convexA,convexB,&simplexSolver,0); + + btScalar offsetDist = minProj; + btVector3 offset = minNorm * offsetDist; + + + SpuClosestPointInput input; + input.m_convexVertexData[0] = convexVertexDataA; + input.m_convexVertexData[1] = convexVertexDataB; + btVector3 newOrg = transA.getOrigin() + offset; + + btTransform displacedTrans = transA; + displacedTrans.setOrigin(newOrg); + + input.m_transformA = displacedTrans; + input.m_transformB = transB; + input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);//minProj; + + btIntermediateResult res; + gjkdet.getClosestPoints(input,res,0); + + btScalar correctedMinNorm = minProj - res.m_depth; + + + //the penetration depth is over-estimated, relax it + btScalar penetration_relaxation= btScalar(1.); + minNorm*=penetration_relaxation; + + if (res.m_hasResult) + { + + pa = res.m_pointInWorld - minNorm * correctedMinNorm; + pb = res.m_pointInWorld; + +#ifdef DEBUG_DRAW + if (debugDraw) + { + btVector3 color(1,0,0); + debugDraw->drawLine(pa,pb,color); + } +#endif//DEBUG_DRAW + + + } else { + // could not seperate shapes + //btAssert (false); + } + return res.m_hasResult; +#endif + return false; +} + + + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h new file mode 100644 index 00000000000..18ad223ed36 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h @@ -0,0 +1,48 @@ + +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H +#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H + + +#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h" + +class btStackAlloc; +class btIDebugDraw; +class btVoronoiSimplexSolver; +class btConvexShape; + +///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation. +///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points. +class SpuMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver +{ +public: + SpuMinkowskiPenetrationDepthSolver() {} + virtual ~SpuMinkowskiPenetrationDepthSolver() {}; + + virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver, + const btConvexShape* convexA,const btConvexShape* convexB, + const btTransform& transA,const btTransform& transB, + btVector3& v, btVector3& pa, btVector3& pb, + class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc + ); + + +}; + + +#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h new file mode 100644 index 00000000000..774a0cb2eb1 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h @@ -0,0 +1,70 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef _SPU_PREFERRED_PENETRATION_DIRECTIONS_H +#define _SPU_PREFERRED_PENETRATION_DIRECTIONS_H + + +#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" + +int spuGetNumPreferredPenetrationDirections(int shapeType, void* shape) +{ + switch (shapeType) + { + case TRIANGLE_SHAPE_PROXYTYPE: + { + return 2; + //spu_printf("2\n"); + break; + } + default: + { +#if __ASSERT + spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType); +#endif // __ASSERT + } + } + + return 0; +} + +void spuGetPreferredPenetrationDirection(int shapeType, void* shape, int index, btVector3& penetrationVector) +{ + + + switch (shapeType) + { + case TRIANGLE_SHAPE_PROXYTYPE: + { + btVector3* vertices = (btVector3*)shape; + ///calcNormal + penetrationVector = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]); + penetrationVector.normalize(); + if (index) + penetrationVector *= btScalar(-1.); + break; + } + default: + { + +#if __ASSERT + spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType); +#endif // __ASSERT + } + } + +} + +#endif //_SPU_PREFERRED_PENETRATION_DIRECTIONS_H diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp new file mode 100644 index 00000000000..30642a39294 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp @@ -0,0 +1,1155 @@ +/* + Copyright (C) 2006, 2008 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#include "Box.h" + +static inline float sqr( float a ) +{ + return (a * a); +} + +enum BoxSepAxisType +{ + A_AXIS, B_AXIS, CROSS_AXIS +}; + +//------------------------------------------------------------------------------------------------- +// voronoiTol: bevels Voronoi planes slightly which helps when features are parallel. +//------------------------------------------------------------------------------------------------- + +static const float voronoiTol = -1.0e-5f; + +//------------------------------------------------------------------------------------------------- +// separating axis tests: gaps along each axis are computed, and the axis with the maximum +// gap is stored. cross product axes are normalized. +//------------------------------------------------------------------------------------------------- + +#define AaxisTest( dim, letter, first ) \ +{ \ + if ( first ) \ + { \ + maxGap = gap = gapsA.get##letter(); \ + if ( gap > distanceThreshold ) return gap; \ + axisType = A_AXIS; \ + faceDimA = dim; \ + axisA = identity.getCol##dim(); \ + } \ + else \ + { \ + gap = gapsA.get##letter(); \ + if ( gap > distanceThreshold ) return gap; \ + else if ( gap > maxGap ) \ + { \ + maxGap = gap; \ + axisType = A_AXIS; \ + faceDimA = dim; \ + axisA = identity.getCol##dim(); \ + } \ + } \ +} + + +#define BaxisTest( dim, letter ) \ +{ \ + gap = gapsB.get##letter(); \ + if ( gap > distanceThreshold ) return gap; \ + else if ( gap > maxGap ) \ + { \ + maxGap = gap; \ + axisType = B_AXIS; \ + faceDimB = dim; \ + axisB = identity.getCol##dim(); \ + } \ +} + +#define CrossAxisTest( dima, dimb, letterb ) \ +{ \ + const float lsqr_tolerance = 1.0e-30f; \ + float lsqr; \ + \ + lsqr = lsqrs.getCol##dima().get##letterb(); \ + \ + if ( lsqr > lsqr_tolerance ) \ + { \ + float l_recip = 1.0f / sqrtf( lsqr ); \ + gap = float(gapsAxB.getCol##dima().get##letterb()) * l_recip; \ + \ + if ( gap > distanceThreshold ) \ + { \ + return gap; \ + } \ + \ + if ( gap > maxGap ) \ + { \ + maxGap = gap; \ + axisType = CROSS_AXIS; \ + edgeDimA = dima; \ + edgeDimB = dimb; \ + axisA = cross(identity.getCol##dima(),matrixAB.getCol##dimb()) * l_recip; \ + } \ + } \ +} + +//------------------------------------------------------------------------------------------------- +// tests whether a vertex of box B and a face of box A are the closest features +//------------------------------------------------------------------------------------------------- + +inline +float +VertexBFaceATest( + bool & inVoronoi, + float & t0, + float & t1, + const Vector3 & hA, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesB ) +{ + // compute a corner of box B in A's coordinate system + + Vector3 corner = + Vector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() ); + + // compute the parameters of the point on A, closest to this corner + + t0 = corner[0]; + t1 = corner[1]; + + if ( t0 > hA[0] ) + t0 = hA[0]; + else if ( t0 < -hA[0] ) + t0 = -hA[0]; + if ( t1 > hA[1] ) + t1 = hA[1]; + else if ( t1 < -hA[1] ) + t1 = -hA[1]; + + // do the Voronoi test: already know the point on B is in the Voronoi region of the + // point on A, check the reverse. + + Vector3 facePointB = + Vector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) ); + + inVoronoi = ( ( facePointB[0] >= voronoiTol * facePointB[2] ) && + ( facePointB[1] >= voronoiTol * facePointB[0] ) && + ( facePointB[2] >= voronoiTol * facePointB[1] ) ); + + return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] )); +} + +#define VertexBFaceA_SetNewMin() \ +{ \ + minDistSqr = distSqr; \ + localPointA.setX(t0); \ + localPointA.setY(t1); \ + localPointB.setX( scalesB.getX() ); \ + localPointB.setY( scalesB.getY() ); \ + featureA = F; \ + featureB = V; \ +} + +void +VertexBFaceATests( + bool & done, + float & minDistSqr, + Point3 & localPointA, + Point3 & localPointB, + FeatureType & featureA, + FeatureType & featureB, + const Vector3 & hA, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesB, + bool first ) +{ + + float t0, t1; + float distSqr; + + distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsB, scalesB ); + + if ( first ) { + VertexBFaceA_SetNewMin(); + } else { + if ( distSqr < minDistSqr ) { + VertexBFaceA_SetNewMin(); + } + } + + if ( done ) + return; + + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsB, scalesB ); + + if ( distSqr < minDistSqr ) { + VertexBFaceA_SetNewMin(); + } + + if ( done ) + return; + + signsB.setY( -signsB.getY() ); + scalesB.setY( -scalesB.getY() ); + + distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsB, scalesB ); + + if ( distSqr < minDistSqr ) { + VertexBFaceA_SetNewMin(); + } + + if ( done ) + return; + + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsB, scalesB ); + + if ( distSqr < minDistSqr ) { + VertexBFaceA_SetNewMin(); + } +} + +//------------------------------------------------------------------------------------------------- +// VertexAFaceBTest: tests whether a vertex of box A and a face of box B are the closest features +//------------------------------------------------------------------------------------------------- + +inline +float +VertexAFaceBTest( + bool & inVoronoi, + float & t0, + float & t1, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) scalesA ) +{ + Vector3 corner = + Vector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() ); + + t0 = corner[0]; + t1 = corner[1]; + + if ( t0 > hB[0] ) + t0 = hB[0]; + else if ( t0 < -hB[0] ) + t0 = -hB[0]; + if ( t1 > hB[1] ) + t1 = hB[1]; + else if ( t1 < -hB[1] ) + t1 = -hB[1]; + + Vector3 facePointA = + Vector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) ); + + inVoronoi = ( ( facePointA[0] >= voronoiTol * facePointA[2] ) && + ( facePointA[1] >= voronoiTol * facePointA[0] ) && + ( facePointA[2] >= voronoiTol * facePointA[1] ) ); + + return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] )); +} + +#define VertexAFaceB_SetNewMin() \ +{ \ + minDistSqr = distSqr; \ + localPointB.setX(t0); \ + localPointB.setY(t1); \ + localPointA.setX( scalesA.getX() ); \ + localPointA.setY( scalesA.getY() ); \ + featureA = V; \ + featureB = F; \ +} + +void +VertexAFaceBTests( + bool & done, + float & minDistSqr, + Point3 & localPointA, + Point3 & localPointB, + FeatureType & featureA, + FeatureType & featureB, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) scalesA, + bool first ) +{ + float t0, t1; + float distSqr; + + distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, scalesA ); + + if ( first ) { + VertexAFaceB_SetNewMin(); + } else { + if ( distSqr < minDistSqr ) { + VertexAFaceB_SetNewMin(); + } + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, scalesA ); + + if ( distSqr < minDistSqr ) { + VertexAFaceB_SetNewMin(); + } + + if ( done ) + return; + + signsA.setY( -signsA.getY() ); + scalesA.setY( -scalesA.getY() ); + + distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, scalesA ); + + if ( distSqr < minDistSqr ) { + VertexAFaceB_SetNewMin(); + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, scalesA ); + + if ( distSqr < minDistSqr ) { + VertexAFaceB_SetNewMin(); + } +} + +//------------------------------------------------------------------------------------------------- +// EdgeEdgeTest: +// +// tests whether a pair of edges are the closest features +// +// note on the shorthand: +// 'a' & 'b' refer to the edges. +// 'c' is the dimension of the axis that points from the face center to the edge Center +// 'd' is the dimension of the edge Direction +// the dimension of the face normal is 2 +//------------------------------------------------------------------------------------------------- + +#define EdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter ) \ +{ \ + Vector3 edgeOffsetAB; \ + Vector3 edgeOffsetBA; \ + \ + edgeOffsetAB = faceOffsetAB + matrixAB.getCol##bc() * scalesB.get##bc_letter(); \ + edgeOffsetAB.set##ac_letter( edgeOffsetAB.get##ac_letter() - scalesA.get##ac_letter() ); \ + \ + edgeOffsetBA = faceOffsetBA + matrixBA.getCol##ac() * scalesA.get##ac_letter(); \ + edgeOffsetBA.set##bc_letter( edgeOffsetBA.get##bc_letter() - scalesB.get##bc_letter() ); \ + \ + float dirDot = matrixAB.getCol##bd().get##ad_letter(); \ + float denom = 1.0f - dirDot*dirDot; \ + float edgeOffsetAB_ad = edgeOffsetAB.get##ad_letter(); \ + float edgeOffsetBA_bd = edgeOffsetBA.get##bd_letter(); \ + \ + if ( denom == 0.0f ) \ + { \ + tA = 0.0f; \ + } \ + else \ + { \ + tA = ( edgeOffsetAB_ad + edgeOffsetBA_bd * dirDot ) / denom; \ + } \ + \ + if ( tA < -hA[ad] ) tA = -hA[ad]; \ + else if ( tA > hA[ad] ) tA = hA[ad]; \ + \ + tB = tA * dirDot + edgeOffsetBA_bd; \ + \ + if ( tB < -hB[bd] ) \ + { \ + tB = -hB[bd]; \ + tA = tB * dirDot + edgeOffsetAB_ad; \ + \ + if ( tA < -hA[ad] ) tA = -hA[ad]; \ + else if ( tA > hA[ad] ) tA = hA[ad]; \ + } \ + else if ( tB > hB[bd] ) \ + { \ + tB = hB[bd]; \ + tA = tB * dirDot + edgeOffsetAB_ad; \ + \ + if ( tA < -hA[ad] ) tA = -hA[ad]; \ + else if ( tA > hA[ad] ) tA = hA[ad]; \ + } \ + \ + Vector3 edgeOffAB = Vector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\ + Vector3 edgeOffBA = Vector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\ + \ + inVoronoi = ( edgeOffAB[ac] >= voronoiTol * edgeOffAB[2] ) && \ + ( edgeOffAB[2] >= voronoiTol * edgeOffAB[ac] ) && \ + ( edgeOffBA[bc] >= voronoiTol * edgeOffBA[2] ) && \ + ( edgeOffBA[2] >= voronoiTol * edgeOffBA[bc] ); \ + \ + edgeOffAB[ad] -= tA; \ + edgeOffBA[bd] -= tB; \ + \ + return dot(edgeOffAB,edgeOffAB); \ +} + +float +EdgeEdgeTest_0101( + bool & inVoronoi, + float & tA, + float & tB, + const Vector3 & hA, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB ) +{ + EdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y ); +} + +float +EdgeEdgeTest_0110( + bool & inVoronoi, + float & tA, + float & tB, + const Vector3 & hA, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB ) +{ + EdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X ); +} + +float +EdgeEdgeTest_1001( + bool & inVoronoi, + float & tA, + float & tB, + const Vector3 & hA, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB ) +{ + EdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y ); +} + +float +EdgeEdgeTest_1010( + bool & inVoronoi, + float & tA, + float & tB, + const Vector3 & hA, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB ) +{ + EdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X ); +} + +#define EdgeEdge_SetNewMin( ac_letter, ad_letter, bc_letter, bd_letter ) \ +{ \ + minDistSqr = distSqr; \ + localPointA.set##ac_letter(scalesA.get##ac_letter()); \ + localPointA.set##ad_letter(tA); \ + localPointB.set##bc_letter(scalesB.get##bc_letter()); \ + localPointB.set##bd_letter(tB); \ + otherFaceDimA = testOtherFaceDimA; \ + otherFaceDimB = testOtherFaceDimB; \ + featureA = E; \ + featureB = E; \ +} + +void +EdgeEdgeTests( + bool & done, + float & minDistSqr, + Point3 & localPointA, + Point3 & localPointB, + int & otherFaceDimA, + int & otherFaceDimB, + FeatureType & featureA, + FeatureType & featureB, + const Vector3 & hA, + const Vector3 & hB, + PE_REF(Vector3) faceOffsetAB, + PE_REF(Vector3) faceOffsetBA, + const Matrix3 & matrixAB, + const Matrix3 & matrixBA, + PE_REF(Vector3) signsA, + PE_REF(Vector3) signsB, + PE_REF(Vector3) scalesA, + PE_REF(Vector3) scalesB, + bool first ) +{ + + float distSqr; + float tA, tB; + + int testOtherFaceDimA, testOtherFaceDimB; + + testOtherFaceDimA = 0; + testOtherFaceDimB = 0; + + distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( first ) { + EdgeEdge_SetNewMin( X, Y, X, Y ); + } else { + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, X, Y ); + } + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, X, Y ); + } + + if ( done ) + return; + + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, X, Y ); + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, X, Y ); + } + + if ( done ) + return; + + testOtherFaceDimA = 1; + testOtherFaceDimB = 0; + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, X, Y ); + } + + if ( done ) + return; + + signsA.setY( -signsA.getY() ); + scalesA.setY( -scalesA.getY() ); + + distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, X, Y ); + } + + if ( done ) + return; + + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, X, Y ); + } + + if ( done ) + return; + + signsA.setY( -signsA.getY() ); + scalesA.setY( -scalesA.getY() ); + + distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, X, Y ); + } + + if ( done ) + return; + + testOtherFaceDimA = 0; + testOtherFaceDimB = 1; + signsB.setX( -signsB.getX() ); + scalesB.setX( -scalesB.getX() ); + + distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, Y, X ); + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, Y, X ); + } + + if ( done ) + return; + + signsB.setY( -signsB.getY() ); + scalesB.setY( -scalesB.getY() ); + + distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, Y, X ); + } + + if ( done ) + return; + + signsA.setX( -signsA.getX() ); + scalesA.setX( -scalesA.getX() ); + + distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( X, Y, Y, X ); + } + + if ( done ) + return; + + testOtherFaceDimA = 1; + testOtherFaceDimB = 1; + signsB.setY( -signsB.getY() ); + scalesB.setY( -scalesB.getY() ); + + distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, Y, X ); + } + + if ( done ) + return; + + signsA.setY( -signsA.getY() ); + scalesA.setY( -scalesA.getY() ); + + distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, Y, X ); + } + + if ( done ) + return; + + signsB.setY( -signsB.getY() ); + scalesB.setY( -scalesB.getY() ); + + distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, Y, X ); + } + + if ( done ) + return; + + signsA.setY( -signsA.getY() ); + scalesA.setY( -scalesA.getY() ); + + distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA, + matrixAB, matrixBA, signsA, signsB, scalesA, scalesB ); + + if ( distSqr < minDistSqr ) { + EdgeEdge_SetNewMin( Y, X, Y, X ); + } +} + +float +boxBoxDistance( + Vector3& normal, + BoxPoint& boxPointA, + BoxPoint& boxPointB, + PE_REF(Box) boxA, const Transform3& transformA, + PE_REF(Box) boxB, const Transform3& transformB, + float distanceThreshold ) +{ + Matrix3 identity; + identity = Matrix3::identity(); + Vector3 ident[3]; + ident[0] = identity.getCol0(); + ident[1] = identity.getCol1(); + ident[2] = identity.getCol2(); + + // get relative transformations + + Transform3 transformAB, transformBA; + Matrix3 matrixAB, matrixBA; + Vector3 offsetAB, offsetBA; + + transformAB = orthoInverse(transformA) * transformB; + transformBA = orthoInverse(transformAB); + + matrixAB = transformAB.getUpper3x3(); + offsetAB = transformAB.getTranslation(); + matrixBA = transformBA.getUpper3x3(); + offsetBA = transformBA.getTranslation(); + + Matrix3 absMatrixAB = absPerElem(matrixAB); + Matrix3 absMatrixBA = absPerElem(matrixBA); + + // find separating axis with largest gap between projections + + BoxSepAxisType axisType; + Vector3 axisA(0.0f), axisB(0.0f); + float gap, maxGap; + int faceDimA = 0, faceDimB = 0, edgeDimA = 0, edgeDimB = 0; + + // face axes + + Vector3 gapsA = absPerElem(offsetAB) - boxA.half - absMatrixAB * boxB.half; + + AaxisTest(0,X,true); + AaxisTest(1,Y,false); + AaxisTest(2,Z,false); + + Vector3 gapsB = absPerElem(offsetBA) - boxB.half - absMatrixBA * boxA.half; + + BaxisTest(0,X); + BaxisTest(1,Y); + BaxisTest(2,Z); + + // cross product axes + + // ŠOÏ‚ª‚O‚Ì‚Æ‚«‚Ì‘Îô + absMatrixAB += Matrix3(1.0e-5f); + absMatrixBA += Matrix3(1.0e-5f); + + Matrix3 lsqrs, projOffset, projAhalf, projBhalf; + + lsqrs.setCol0( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) + + mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) ); + lsqrs.setCol1( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) + + mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) ); + lsqrs.setCol2( mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) + + mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) ); + + projOffset.setCol0(matrixBA.getCol1() * offsetAB.getZ() - matrixBA.getCol2() * offsetAB.getY()); + projOffset.setCol1(matrixBA.getCol2() * offsetAB.getX() - matrixBA.getCol0() * offsetAB.getZ()); + projOffset.setCol2(matrixBA.getCol0() * offsetAB.getY() - matrixBA.getCol1() * offsetAB.getX()); + + projAhalf.setCol0(absMatrixBA.getCol1() * boxA.half.getZ() + absMatrixBA.getCol2() * boxA.half.getY()); + projAhalf.setCol1(absMatrixBA.getCol2() * boxA.half.getX() + absMatrixBA.getCol0() * boxA.half.getZ()); + projAhalf.setCol2(absMatrixBA.getCol0() * boxA.half.getY() + absMatrixBA.getCol1() * boxA.half.getX()); + + projBhalf.setCol0(absMatrixAB.getCol1() * boxB.half.getZ() + absMatrixAB.getCol2() * boxB.half.getY()); + projBhalf.setCol1(absMatrixAB.getCol2() * boxB.half.getX() + absMatrixAB.getCol0() * boxB.half.getZ()); + projBhalf.setCol2(absMatrixAB.getCol0() * boxB.half.getY() + absMatrixAB.getCol1() * boxB.half.getX()); + + Matrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf); + + CrossAxisTest(0,0,X); + CrossAxisTest(0,1,Y); + CrossAxisTest(0,2,Z); + CrossAxisTest(1,0,X); + CrossAxisTest(1,1,Y); + CrossAxisTest(1,2,Z); + CrossAxisTest(2,0,X); + CrossAxisTest(2,1,Y); + CrossAxisTest(2,2,Z); + + // need to pick the face on each box whose normal best matches the separating axis. + // will transform vectors to be in the coordinate system of this face to simplify things later. + // for this, a permutation matrix can be used, which the next section computes. + + int dimA[3], dimB[3]; + + if ( axisType == A_AXIS ) { + if ( dot(axisA,offsetAB) < 0.0f ) + axisA = -axisA; + axisB = matrixBA * -axisA; + + Vector3 absAxisB = Vector3(absPerElem(axisB)); + + if ( ( absAxisB[0] > absAxisB[1] ) && ( absAxisB[0] > absAxisB[2] ) ) + faceDimB = 0; + else if ( absAxisB[1] > absAxisB[2] ) + faceDimB = 1; + else + faceDimB = 2; + } else if ( axisType == B_AXIS ) { + if ( dot(axisB,offsetBA) < 0.0f ) + axisB = -axisB; + axisA = matrixAB * -axisB; + + Vector3 absAxisA = Vector3(absPerElem(axisA)); + + if ( ( absAxisA[0] > absAxisA[1] ) && ( absAxisA[0] > absAxisA[2] ) ) + faceDimA = 0; + else if ( absAxisA[1] > absAxisA[2] ) + faceDimA = 1; + else + faceDimA = 2; + } + + if ( axisType == CROSS_AXIS ) { + if ( dot(axisA,offsetAB) < 0.0f ) + axisA = -axisA; + axisB = matrixBA * -axisA; + + Vector3 absAxisA = Vector3(absPerElem(axisA)); + Vector3 absAxisB = Vector3(absPerElem(axisB)); + + dimA[1] = edgeDimA; + dimB[1] = edgeDimB; + + if ( edgeDimA == 0 ) { + if ( absAxisA[1] > absAxisA[2] ) { + dimA[0] = 2; + dimA[2] = 1; + } else { + dimA[0] = 1; + dimA[2] = 2; + } + } else if ( edgeDimA == 1 ) { + if ( absAxisA[2] > absAxisA[0] ) { + dimA[0] = 0; + dimA[2] = 2; + } else { + dimA[0] = 2; + dimA[2] = 0; + } + } else { + if ( absAxisA[0] > absAxisA[1] ) { + dimA[0] = 1; + dimA[2] = 0; + } else { + dimA[0] = 0; + dimA[2] = 1; + } + } + + if ( edgeDimB == 0 ) { + if ( absAxisB[1] > absAxisB[2] ) { + dimB[0] = 2; + dimB[2] = 1; + } else { + dimB[0] = 1; + dimB[2] = 2; + } + } else if ( edgeDimB == 1 ) { + if ( absAxisB[2] > absAxisB[0] ) { + dimB[0] = 0; + dimB[2] = 2; + } else { + dimB[0] = 2; + dimB[2] = 0; + } + } else { + if ( absAxisB[0] > absAxisB[1] ) { + dimB[0] = 1; + dimB[2] = 0; + } else { + dimB[0] = 0; + dimB[2] = 1; + } + } + } else { + dimA[2] = faceDimA; + dimA[0] = (faceDimA+1)%3; + dimA[1] = (faceDimA+2)%3; + dimB[2] = faceDimB; + dimB[0] = (faceDimB+1)%3; + dimB[1] = (faceDimB+2)%3; + } + + Matrix3 aperm_col, bperm_col; + + aperm_col.setCol0(ident[dimA[0]]); + aperm_col.setCol1(ident[dimA[1]]); + aperm_col.setCol2(ident[dimA[2]]); + + bperm_col.setCol0(ident[dimB[0]]); + bperm_col.setCol1(ident[dimB[1]]); + bperm_col.setCol2(ident[dimB[2]]); + + Matrix3 aperm_row, bperm_row; + + aperm_row = transpose(aperm_col); + bperm_row = transpose(bperm_col); + + // permute all box parameters to be in the face coordinate systems + + Matrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col; + Matrix3 matrixBA_perm = transpose(matrixAB_perm); + + Vector3 offsetAB_perm, offsetBA_perm; + + offsetAB_perm = aperm_row * offsetAB; + offsetBA_perm = bperm_row * offsetBA; + + Vector3 halfA_perm, halfB_perm; + + halfA_perm = aperm_row * boxA.half; + halfB_perm = bperm_row * boxB.half; + + // compute the vector between the centers of each face, in each face's coordinate frame + + Vector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm; + + signsA_perm = copySignPerElem(Vector3(1.0f),aperm_row * axisA); + signsB_perm = copySignPerElem(Vector3(1.0f),bperm_row * axisB); + scalesA_perm = mulPerElem( signsA_perm, halfA_perm ); + scalesB_perm = mulPerElem( signsB_perm, halfB_perm ); + + faceOffsetAB_perm = offsetAB_perm + matrixAB_perm.getCol2() * scalesB_perm.getZ(); + faceOffsetAB_perm.setZ( faceOffsetAB_perm.getZ() - scalesA_perm.getZ() ); + + faceOffsetBA_perm = offsetBA_perm + matrixBA_perm.getCol2() * scalesA_perm.getZ(); + faceOffsetBA_perm.setZ( faceOffsetBA_perm.getZ() - scalesB_perm.getZ() ); + + if ( maxGap < 0.0f ) { + // if boxes overlap, this will separate the faces for finding points of penetration. + + faceOffsetAB_perm -= aperm_row * axisA * maxGap * 1.01f; + faceOffsetBA_perm -= bperm_row * axisB * maxGap * 1.01f; + } + + // for each vertex/face or edge/edge pair of the two faces, find the closest points. + // + // these points each have an associated box feature (vertex, edge, or face). if each + // point is in the external Voronoi region of the other's feature, they are the + // closest points of the boxes, and the algorithm can exit. + // + // the feature pairs are arranged so that in the general case, the first test will + // succeed. degenerate cases (parallel faces) may require up to all tests in the + // worst case. + // + // if for some reason no case passes the Voronoi test, the features with the minimum + // distance are returned. + + Point3 localPointA_perm, localPointB_perm; + float minDistSqr; + bool done; + + Vector3 hA_perm( halfA_perm ), hB_perm( halfB_perm ); + + localPointA_perm.setZ( scalesA_perm.getZ() ); + localPointB_perm.setZ( scalesB_perm.getZ() ); + scalesA_perm.setZ(0.0f); + scalesB_perm.setZ(0.0f); + + int otherFaceDimA, otherFaceDimB; + FeatureType featureA, featureB; + + if ( axisType == CROSS_AXIS ) { + EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm, + otherFaceDimA, otherFaceDimB, featureA, featureB, + hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm, + scalesA_perm, scalesB_perm, true ); + + if ( !done ) { + VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hA_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false ); + + if ( !done ) { + VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false ); + } + } + } else if ( axisType == B_AXIS ) { + VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, true ); + + if ( !done ) { + VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hA_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false ); + + if ( !done ) { + EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm, + otherFaceDimA, otherFaceDimB, featureA, featureB, + hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm, + scalesA_perm, scalesB_perm, false ); + } + } + } else { + VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hA_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, true ); + + if ( !done ) { + VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm, + featureA, featureB, + hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false ); + + if ( !done ) { + EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm, + otherFaceDimA, otherFaceDimB, featureA, featureB, + hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm, + matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm, + scalesA_perm, scalesB_perm, false ); + } + } + } + + // convert local points from face-local to box-local coordinate system + + boxPointA.localPoint = Point3( aperm_col * Vector3( localPointA_perm ) ); + boxPointB.localPoint = Point3( bperm_col * Vector3( localPointB_perm ) ); + + // find which features of the boxes are involved. + // the only feature pairs which occur in this function are VF, FV, and EE, even though the + // closest points might actually lie on sub-features, as in a VF contact might be used for + // what's actually a VV contact. this means some feature pairs could possibly seem distinct + // from others, although their contact positions are the same. don't know yet whether this + // matters. + + int sA[3], sB[3]; + + sA[0] = boxPointA.localPoint.getX() > 0.0f; + sA[1] = boxPointA.localPoint.getY() > 0.0f; + sA[2] = boxPointA.localPoint.getZ() > 0.0f; + + sB[0] = boxPointB.localPoint.getX() > 0.0f; + sB[1] = boxPointB.localPoint.getY() > 0.0f; + sB[2] = boxPointB.localPoint.getZ() > 0.0f; + + if ( featureA == F ) { + boxPointA.setFaceFeature( dimA[2], sA[dimA[2]] ); + } else if ( featureA == E ) { + boxPointA.setEdgeFeature( dimA[2], sA[dimA[2]], dimA[otherFaceDimA], sA[dimA[otherFaceDimA]] ); + } else { + boxPointA.setVertexFeature( sA[0], sA[1], sA[2] ); + } + + if ( featureB == F ) { + boxPointB.setFaceFeature( dimB[2], sB[dimB[2]] ); + } else if ( featureB == E ) { + boxPointB.setEdgeFeature( dimB[2], sB[dimB[2]], dimB[otherFaceDimB], sB[dimB[otherFaceDimB]] ); + } else { + boxPointB.setVertexFeature( sB[0], sB[1], sB[2] ); + } + + normal = transformA * axisA; + + if ( maxGap < 0.0f ) { + return (maxGap); + } else { + return (sqrtf( minDistSqr )); + } +} diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h new file mode 100644 index 00000000000..c58e257c026 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h @@ -0,0 +1,66 @@ +/* + Copyright (C) 2006, 2008 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#ifndef __BOXBOXDISTANCE_H__ +#define __BOXBOXDISTANCE_H__ + + +#include "Box.h" + +using namespace Vectormath::Aos; + +//--------------------------------------------------------------------------- +// boxBoxDistance: +// +// description: +// this computes info that can be used for the collision response of two boxes. when the boxes +// do not overlap, the points are set to the closest points of the boxes, and a positive +// distance between them is returned. if the boxes do overlap, a negative distance is returned +// and the points are set to two points that would touch after the boxes are translated apart. +// the contact normal gives the direction to repel or separate the boxes when they touch or +// overlap (it's being approximated here as one of the 15 "separating axis" directions). +// +// returns: +// positive or negative distance between two boxes. +// +// args: +// Vector3& normal: set to a unit contact normal pointing from box A to box B. +// +// BoxPoint& boxPointA, BoxPoint& boxPointB: +// set to a closest point or point of penetration on each box. +// +// Box boxA, Box boxB: +// boxes, represented as 3 half-widths +// +// const Transform3& transformA, const Transform3& transformB: +// box transformations, in world coordinates +// +// float distanceThreshold: +// the algorithm will exit early if it finds that the boxes are more distant than this +// threshold, and not compute a contact normal or points. if this distance returned +// exceeds the threshold, all the other output data may not have been computed. by +// default, this is set to MAX_FLOAT so it will have no effect. +// +//--------------------------------------------------------------------------- + +float +boxBoxDistance(Vector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB, + PE_REF(Box) boxA, const Transform3 & transformA, PE_REF(Box) boxB, + const Transform3 & transformB, + float distanceThreshold = FLT_MAX ); + +#endif /* __BOXBOXDISTANCE_H__ */ diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt new file mode 100644 index 00000000000..5b4a907058f --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt @@ -0,0 +1 @@ +Empty placeholder for future Libspe2 SPU task diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp new file mode 100644 index 00000000000..fe61955572f --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp @@ -0,0 +1,214 @@ +/* +Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + + +#include "SpuSampleTask.h" +#include "BulletDynamics/Dynamics/btRigidBody.h" +#include "../PlatformDefinitions.h" +#include "../SpuFakeDma.h" +#include "LinearMath/btMinMax.h" + +#ifdef __SPU__ +#include +#else +#include +#define spu_printf printf +#endif + +#define MAX_NUM_BODIES 8192 + +struct SampleTask_LocalStoreMemory +{ + ATTRIBUTE_ALIGNED16(char gLocalRigidBody [sizeof(btRigidBody)+16]); + ATTRIBUTE_ALIGNED16(void* gPointerArray[MAX_NUM_BODIES]); + +}; + + + + +//-- MAIN METHOD +void processSampleTask(void* userPtr, void* lsMemory) +{ + // BT_PROFILE("processSampleTask"); + + SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory; + + SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr; + SpuSampleTaskDesc& taskDesc = *taskDescPtr; + + switch (taskDesc.m_sampleCommand) + { + case CMD_SAMPLE_INTEGRATE_BODIES: + { + btTransform predictedTrans; + btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; + + int batchSize = taskDesc.m_sampleValue; + if (batchSize>MAX_NUM_BODIES) + { + spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); + break; + } + int dmaArraySize = batchSize*sizeof(void*); + + uint64_t ppuArrayAddress = reinterpret_cast(eaPtr); + + // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); + + if (dmaArraySize>=16) + { + cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } else + { + stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); + } + + + for ( int i=0;igLocalRigidBody[0]; + void* shortAdd = localMemory->gPointerArray[i]; + uint64_t ppuRigidBodyAddress = reinterpret_cast(shortAdd); + + // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); + + int dmaBodySize = sizeof(btRigidBody); + + cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + float timeStep = 1.f/60.f; + + btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); + if (body) + { + if (body->isActive() && (!body->isStaticOrKinematicObject())) + { + body->predictIntegratedTransform(timeStep, predictedTrans); + body->proceedToTransform( predictedTrans); + void* ptr = (void*)localPtr; + // spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress); + + cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + } + } + + } + break; + } + + + case CMD_SAMPLE_PREDICT_MOTION_BODIES: + { + btTransform predictedTrans; + btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr; + + int batchSize = taskDesc.m_sampleValue; + int dmaArraySize = batchSize*sizeof(void*); + + if (batchSize>MAX_NUM_BODIES) + { + spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n"); + break; + } + + uint64_t ppuArrayAddress = reinterpret_cast(eaPtr); + + // spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize); + + if (dmaArraySize>=16) + { + cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } else + { + stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize); + } + + + for ( int i=0;igLocalRigidBody[0]; + void* shortAdd = localMemory->gPointerArray[i]; + uint64_t ppuRigidBodyAddress = reinterpret_cast(shortAdd); + + // spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr); + + int dmaBodySize = sizeof(btRigidBody); + + cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + float timeStep = 1.f/60.f; + + btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj); + if (body) + { + if (!body->isStaticOrKinematicObject()) + { + if (body->isActive()) + { + body->integrateVelocities( timeStep); + //damping + body->applyDamping(timeStep); + + body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform()); + + void* ptr = (void*)localPtr; + cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + } + } + } + + } + break; + } + + + + default: + { + + } + }; +} + + +#if defined(__CELLOS_LV2__) || defined (LIBSPE2) + +ATTRIBUTE_ALIGNED16(SampleTask_LocalStoreMemory gLocalStoreMemory); + +void* createSampleLocalStoreMemory() +{ + return &gLocalStoreMemory; +} +#else +void* createSampleLocalStoreMemory() +{ + return new SampleTask_LocalStoreMemory; +}; + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h new file mode 100644 index 00000000000..c8ebdfd6232 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h @@ -0,0 +1,54 @@ +/* +Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef SPU_SAMPLE_TASK_H +#define SPU_SAMPLE_TASK_H + +#include "../PlatformDefinitions.h" +#include "LinearMath/btScalar.h" +#include "LinearMath/btVector3.h" +#include "LinearMath/btMatrix3x3.h" + +#include "LinearMath/btAlignedAllocator.h" + + +enum +{ + CMD_SAMPLE_INTEGRATE_BODIES = 1, + CMD_SAMPLE_PREDICT_MOTION_BODIES +}; + + + +ATTRIBUTE_ALIGNED16(struct) SpuSampleTaskDesc +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + uint32_t m_sampleCommand; + uint32_t m_taskId; + + uint64_t m_mainMemoryPtr; + int m_sampleValue; + + +}; + + +void processSampleTask(void* userPtr, void* lsMemory); +void* createSampleLocalStoreMemory(); + + +#endif //SPU_SAMPLE_TASK_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt new file mode 100644 index 00000000000..5b4a907058f --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt @@ -0,0 +1 @@ +Empty placeholder for future Libspe2 SPU task diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp new file mode 100644 index 00000000000..11cb9e7c3f5 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp @@ -0,0 +1,222 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//#define __CELLOS_LV2__ 1 + +#define USE_SAMPLE_PROCESS 1 +#ifdef USE_SAMPLE_PROCESS + + +#include "SpuSampleTaskProcess.h" +#include + +#ifdef __SPU__ + + + +void SampleThreadFunc(void* userPtr,void* lsMemory) +{ + //do nothing + printf("hello world\n"); +} + + +void* SamplelsMemoryFunc() +{ + //don't create local store memory, just return 0 + return 0; +} + + +#else + + +#include "btThreadSupportInterface.h" + +//# include "SPUAssert.h" +#include + + + +extern "C" { + extern char SPU_SAMPLE_ELF_SYMBOL[]; +} + + + + + +SpuSampleTaskProcess::SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks) +:m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(maxNumOutstandingTasks) +{ + + m_taskBusy.resize(m_maxNumOutstandingTasks); + m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks); + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + + m_initialized = false; + + m_threadInterface->startSPU(); + + +} + +SpuSampleTaskProcess::~SpuSampleTaskProcess() +{ + m_threadInterface->stopSPU(); + +} + + + +void SpuSampleTaskProcess::initialize() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("SpuSampleTaskProcess::initialize()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_initialized = true; + +} + + +void SpuSampleTaskProcess::issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand) +{ + +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("SpuSampleTaskProcess::issueTask (m_currentTask= %d\)n", m_currentTask); +#endif //DEBUG_SPU_TASK_SCHEDULING + + m_taskBusy[m_currentTask] = true; + m_numBusyTasks++; + + SpuSampleTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask]; + { + // send task description in event message + // no error checking here... + // but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS. + + taskDesc.m_mainMemoryPtr = reinterpret_cast(sampleMainMemPtr); + taskDesc.m_sampleValue = sampleValue; + taskDesc.m_sampleCommand = sampleCommand; + + //some bookkeeping to recognize finished tasks + taskDesc.m_taskId = m_currentTask; + } + + + m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask); + + // if all tasks busy, wait for spu event to clear the task. + + if (m_numBusyTasks >= m_maxNumOutstandingTasks) + { + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + + //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + // find new task buffer + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + break; + } + } +} + + +///Optional PPU-size post processing for each task +void SpuSampleTaskProcess::postProcess(int taskId, int outputSize) +{ + +} + + +void SpuSampleTaskProcess::flush() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("\nSpuCollisionTaskProcess::flush()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + + // all tasks are issued, wait for all tasks to be complete + while(m_numBusyTasks > 0) + { +// Consolidating SPU code + unsigned int taskId; + unsigned int outputSize; + + for (int i=0;iwaitForResponse(&taskId, &outputSize); + } + + //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); + + postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } + + +} + +#endif + + +#endif //USE_SAMPLE_PROCESS diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h new file mode 100644 index 00000000000..d733a9a8528 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h @@ -0,0 +1,153 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPU_SAMPLE_TASK_PROCESS_H +#define SPU_SAMPLE_TASK_PROCESS_H + +#include + + +#include "PlatformDefinitions.h" + +#include + +#include "LinearMath/btAlignedObjectArray.h" + + +#include "SpuSampleTask/SpuSampleTask.h" + + +//just add your commands here, try to keep them globally unique for debugging purposes +#define CMD_SAMPLE_TASK_COMMAND 10 + + + +/// SpuSampleTaskProcess handles SPU processing of collision pairs. +/// When PPU issues a task, it will look for completed task buffers +/// PPU will do postprocessing, dependent on workunit output (not likely) +class SpuSampleTaskProcess +{ + // track task buffers that are being used, and total busy tasks + btAlignedObjectArray m_taskBusy; + btAlignedObjectArraym_spuSampleTaskDesc; + + int m_numBusyTasks; + + // the current task and the current entry to insert a new work unit + int m_currentTask; + + bool m_initialized; + + void postProcess(int taskId, int outputSize); + + class btThreadSupportInterface* m_threadInterface; + + int m_maxNumOutstandingTasks; + + + +public: + SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks); + + ~SpuSampleTaskProcess(); + + ///call initialize in the beginning of the frame, before addCollisionPairToTask + void initialize(); + + void issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand); + + ///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished + void flush(); +}; + + +#if defined(USE_LIBSPE2) && defined(__SPU__) +////////////////////MAIN///////////////////////////// +#include "../SpuLibspe2Support.h" +#include +#include +#include + +void * SamplelsMemoryFunc(); +void SampleThreadFunc(void* userPtr,void* lsMemory); + +//#define DEBUG_LIBSPE2_MAINLOOP + +int main(unsigned long long speid, addr64 argp, addr64 envp) +{ + printf("SPU is up \n"); + + ATTRIBUTE_ALIGNED128(btSpuStatus status); + ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ; + unsigned int received_message = Spu_Mailbox_Event_Nothing; + bool shutdown = false; + + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + status.m_status = Spu_Status_Free; + status.m_lsMemory.p = SamplelsMemoryFunc(); + + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + while (!shutdown) + { + received_message = spu_read_in_mbox(); + + + + switch(received_message) + { + case Spu_Mailbox_Event_Shutdown: + shutdown = true; + break; + case Spu_Mailbox_Event_Task: + // refresh the status +#ifdef DEBUG_LIBSPE2_MAINLOOP + printf("SPU recieved Task \n"); +#endif //DEBUG_LIBSPE2_MAINLOOP + cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + btAssert(status.m_status==Spu_Status_Occupied); + + cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + SampleThreadFunc((void*)&taskDesc, reinterpret_cast (taskDesc.m_mainMemoryPtr) ); + break; + case Spu_Mailbox_Event_Nothing: + default: + break; + } + + // set to status free and wait for next task + status.m_status = Spu_Status_Free; + cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(3)); + + + } + return 0; +} +////////////////////////////////////////////////////// +#endif + + + +#endif // SPU_SAMPLE_TASK_PROCESS_H + diff --git a/extern/bullet2/BulletMultiThreaded/SpuSync.h b/extern/bullet2/BulletMultiThreaded/SpuSync.h new file mode 100644 index 00000000000..b90d0fcbfd4 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/SpuSync.h @@ -0,0 +1,148 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2007 Starbreeze Studios + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +Written by: Marten Svanfeldt +*/ + +#ifndef SPU_SYNC_H +#define SPU_SYNC_H + + +#include "PlatformDefinitions.h" + + +#if defined(WIN32) + +#define WIN32_LEAN_AND_MEAN +#ifdef _XBOX +#include +#else +#include +#endif + +///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms. +class btSpinlock +{ +public: + //typedef volatile LONG SpinVariable; + typedef CRITICAL_SECTION SpinVariable; + + btSpinlock (SpinVariable* var) + : spinVariable (var) + {} + + void Init () + { + //*spinVariable = 0; + InitializeCriticalSection(spinVariable); + } + + void Lock () + { + EnterCriticalSection(spinVariable); + } + + void Unlock () + { + LeaveCriticalSection(spinVariable); + } + +private: + SpinVariable* spinVariable; +}; + + +#elif defined (__CELLOS_LV2__) + +//#include +#include + +///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms. +class btSpinlock +{ +public: + typedef CellSyncMutex SpinVariable; + + btSpinlock (SpinVariable* var) + : spinVariable (var) + {} + + void Init () + { +#ifndef __SPU__ + //*spinVariable = 1; + cellSyncMutexInitialize(spinVariable); +#endif + } + + + + void Lock () + { +#ifdef __SPU__ + // lock semaphore + /*while (cellAtomicTestAndDecr32(atomic_buf, (uint64_t)spinVariable) == 0) + { + + };*/ + cellSyncMutexLock((uint64_t)spinVariable); +#endif + } + + void Unlock () + { +#ifdef __SPU__ + //cellAtomicIncr32(atomic_buf, (uint64_t)spinVariable); + cellSyncMutexUnlock((uint64_t)spinVariable); +#endif + } + + +private: + SpinVariable* spinVariable; + ATTRIBUTE_ALIGNED128(uint32_t atomic_buf[32]); +}; + +#else +//create a dummy implementation (without any locking) useful for serial processing +class btSpinlock +{ +public: + typedef int SpinVariable; + + btSpinlock (SpinVariable* var) + : spinVariable (var) + {} + + void Init () + { + } + + void Lock () + { + } + + void Unlock () + { + } + +private: + SpinVariable* spinVariable; +}; + + +#endif + + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp new file mode 100644 index 00000000000..42b60a460e0 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp @@ -0,0 +1,262 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "Win32ThreadSupport.h" + +#ifdef USE_WIN32_THREADING + +#include + +#include "SpuCollisionTaskProcess.h" + +#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" + + + +///The number of threads should be equal to the number of available cores +///@todo: each worker should be linked to a single core, using SetThreadIdealProcessor. + +///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +///Setup and initialize SPU/CELL/Libspe2 +Win32ThreadSupport::Win32ThreadSupport(const Win32ThreadConstructionInfo & threadConstructionInfo) +{ + m_maxNumTasks = threadConstructionInfo.m_numThreads; + startThreads(threadConstructionInfo); +} + +///cleanup/shutdown Libspe2 +Win32ThreadSupport::~Win32ThreadSupport() +{ + stopSPU(); +} + + + + +#include + +DWORD WINAPI Thread_no_1( LPVOID lpParam ) +{ + + Win32ThreadSupport::btSpuStatus* status = (Win32ThreadSupport::btSpuStatus*)lpParam; + + + while (1) + { + WaitForSingleObject(status->m_eventStartHandle,INFINITE); + + void* userPtr = status->m_userPtr; + + if (userPtr) + { + btAssert(status->m_status); + status->m_userThreadFunc(userPtr,status->m_lsMemory); + status->m_status = 2; + SetEvent(status->m_eventCompletetHandle); + } else + { + //exit Thread + status->m_status = 3; + SetEvent(status->m_eventCompletetHandle); + printf("Thread with taskId %i with handle %p exiting\n",status->m_taskId, status->m_threadHandle); + break; + } + + } + + printf("Thread TERMINATED\n"); + return 0; + +} + +///send messages to SPUs +void Win32ThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId) +{ + /// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc); + + ///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished + + + + switch (uiCommand) + { + case CMD_GATHER_AND_PROCESS_PAIRLIST: + { + + +//#define SINGLE_THREADED 1 +#ifdef SINGLE_THREADED + + btSpuStatus& spuStatus = m_activeSpuStatus[0]; + spuStatus.m_userPtr=(void*)uiArgument0; + spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory); + HANDLE handle =0; +#else + + + btSpuStatus& spuStatus = m_activeSpuStatus[taskId]; + btAssert(taskId>=0); + btAssert(int(taskId) 1); + spuStatus.m_status = 0; + + ///need to find an active spu + btAssert(last>=0); + +#else + last=0; + btSpuStatus& spuStatus = m_activeSpuStatus[last]; +#endif //SINGLE_THREADED + + + + *puiArgument0 = spuStatus.m_taskId; + *puiArgument1 = spuStatus.m_status; + + +} + + + +void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo) +{ + + m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads); + m_completeHandles.resize(threadConstructionInfo.m_numThreads); + + m_maxNumTasks = threadConstructionInfo.m_numThreads; + + for (int i=0;i0) + { + WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE); + } + + + spuStatus.m_userPtr = 0; + SetEvent(spuStatus.m_eventStartHandle); + WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE); + + CloseHandle(spuStatus.m_eventCompletetHandle); + CloseHandle(spuStatus.m_eventStartHandle); + CloseHandle(spuStatus.m_threadHandle); + } + + m_activeSpuStatus.clear(); + m_completeHandles.clear(); + +} + +#endif //USE_WIN32_THREADING diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h new file mode 100644 index 00000000000..c61ad901c07 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h @@ -0,0 +1,132 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "LinearMath/btScalar.h" +#include "PlatformDefinitions.h" + +#ifdef USE_WIN32_THREADING //platform specific defines are defined in PlatformDefinitions.h + +#ifndef WIN32_THREAD_SUPPORT_H +#define WIN32_THREAD_SUPPORT_H + +#include "LinearMath/btAlignedObjectArray.h" + +#include "btThreadSupportInterface.h" + + +typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory); +typedef void* (*Win32lsMemorySetupFunc)(); + + + + + + +///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication +class Win32ThreadSupport : public btThreadSupportInterface +{ +public: + ///placeholder, until libspe2 support is there + struct btSpuStatus + { + uint32_t m_taskId; + uint32_t m_commandId; + uint32_t m_status; + + Win32ThreadFunc m_userThreadFunc; + void* m_userPtr; //for taskDesc etc + void* m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc + + void* m_threadHandle; //this one is calling 'Win32ThreadFunc' + + void* m_eventStartHandle; + char m_eventStartHandleName[32]; + + void* m_eventCompletetHandle; + char m_eventCompletetHandleName[32]; + + + }; +private: + + btAlignedObjectArray m_activeSpuStatus; + btAlignedObjectArray m_completeHandles; + + int m_maxNumTasks; +public: + ///Setup and initialize SPU/CELL/Libspe2 + + struct Win32ThreadConstructionInfo + { + Win32ThreadConstructionInfo(char* uniqueName, + Win32ThreadFunc userThreadFunc, + Win32lsMemorySetupFunc lsMemoryFunc, + int numThreads=1, + int threadStackSize=65535 + ) + :m_uniqueName(uniqueName), + m_userThreadFunc(userThreadFunc), + m_lsMemoryFunc(lsMemoryFunc), + m_numThreads(numThreads), + m_threadStackSize(threadStackSize) + { + + } + + char* m_uniqueName; + Win32ThreadFunc m_userThreadFunc; + Win32lsMemorySetupFunc m_lsMemoryFunc; + int m_numThreads; + int m_threadStackSize; + + }; + + + + Win32ThreadSupport(const Win32ThreadConstructionInfo& threadConstructionInfo); + +///cleanup/shutdown Libspe2 + virtual ~Win32ThreadSupport(); + + void startThreads(const Win32ThreadConstructionInfo& threadInfo); + + +///send messages to SPUs + virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1); + +///check for messages from SPUs + virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1); + +///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + virtual void startSPU(); + +///tell the task scheduler we are done with the SPU tasks + virtual void stopSPU(); + + virtual void setNumTasks(int numTasks) + { + m_maxNumTasks = numTasks; + } + + virtual int getNumTasks() const + { + return m_maxNumTasks; + } + +}; + +#endif //WIN32_THREAD_SUPPORT_H + +#endif //USE_WIN32_THREADING diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp new file mode 100644 index 00000000000..84a5e59f0af --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp @@ -0,0 +1,590 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///The 3 following lines include the CPU implementation of the kernels, keep them in this order. +#include "BulletMultiThreaded/btGpuDefines.h" +#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" +#include "BulletMultiThreaded/btGpuUtilsSharedCode.h" + + + +#include "LinearMath/btAlignedAllocator.h" +#include "LinearMath/btQuickprof.h" +#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" + + + +#include "btGpuDefines.h" +#include "btGpuUtilsSharedDefs.h" + +#include "btGpu3DGridBroadphaseSharedDefs.h" + +#include "btGpu3DGridBroadphase.h" +#include //for memset + + +#include + + + +static bt3DGridBroadphaseParams s3DGridBroadphaseParams; + + + +btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell, + btScalar cellFactorAABB) : + btSimpleBroadphase(maxSmallProxies, +// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache), + new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache), + m_bInitialized(false), + m_numBodies(0) +{ + _initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, + maxSmallProxies, maxLargeProxies, maxPairsPerBody, + maxBodiesPerCell, cellFactorAABB); +} + + + +btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell, + btScalar cellFactorAABB) : + btSimpleBroadphase(maxSmallProxies, overlappingPairCache), + m_bInitialized(false), + m_numBodies(0) +{ + _initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, + maxSmallProxies, maxLargeProxies, maxPairsPerBody, + maxBodiesPerCell, cellFactorAABB); +} + + + +btGpu3DGridBroadphase::~btGpu3DGridBroadphase() +{ + //btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache + assert(m_bInitialized); + _finalize(); +} + + + +void btGpu3DGridBroadphase::_initialize( const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell, + btScalar cellFactorAABB) +{ + // set various paramerers + m_ownsPairCache = true; + m_params.m_gridSizeX = gridSizeX; + m_params.m_gridSizeY = gridSizeY; + m_params.m_gridSizeZ = gridSizeZ; + m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ; + btVector3 w_org = worldAabbMin; + m_params.m_worldOriginX = w_org.getX(); + m_params.m_worldOriginY = w_org.getY(); + m_params.m_worldOriginZ = w_org.getZ(); + btVector3 w_size = worldAabbMax - worldAabbMin; + m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX; + m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY; + m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ; + m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ); + m_maxRadius *= btScalar(0.5f); + m_params.m_numBodies = m_numBodies; + m_params.m_maxBodiesPerCell = maxBodiesPerCell; + + m_numLargeHandles = 0; + m_maxLargeHandles = maxLargeProxies; + + m_maxPairsPerBody = maxPairsPerBody; + + m_cellFactorAABB = cellFactorAABB; + + m_LastLargeHandleIndex = -1; + + assert(!m_bInitialized); + // allocate host storage + m_hBodiesHash = new unsigned int[m_maxHandles * 2]; + memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int)); + + m_hCellStart = new unsigned int[m_params.m_numCells]; + memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int)); + + m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2]; + // --------------- for now, init with m_maxPairsPerBody for each body + m_hPairBuffStartCurr[0] = 0; + m_hPairBuffStartCurr[1] = 0; + for(int i = 1; i <= m_maxHandles; i++) + { + m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; + m_hPairBuffStartCurr[i * 2 + 1] = 0; + } + //---------------- + unsigned int numAABB = m_maxHandles + m_maxLargeHandles; + m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max + + m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed? + + m_hPairScan = new unsigned int[m_maxHandles + 1]; + + m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + +// large proxies + + // allocate handles buffer and put all handles on free list + m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16); + m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles]; + m_firstFreeLargeHandle = 0; + { + for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++) + { + m_pLargeHandles[i].SetNextFree(i + 1); + m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i; + } + m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0); + } + +// debug data + m_numPairsAdded = 0; + m_numOverflows = 0; + + m_bInitialized = true; +} + + + +void btGpu3DGridBroadphase::_finalize() +{ + assert(m_bInitialized); + delete [] m_hBodiesHash; + delete [] m_hCellStart; + delete [] m_hPairBuffStartCurr; + delete [] m_hAABB; + delete [] m_hPairBuff; + delete [] m_hPairScan; + delete [] m_hPairOut; + btAlignedFree(m_pLargeHandlesRawPtr); + m_bInitialized = false; +} + + + +void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher) +{ + if(m_numHandles <= 0) + { + BT_PROFILE("addLarge2LargePairsToCache"); + addLarge2LargePairsToCache(dispatcher); + return; + } + // update constants + setParameters(&m_params); + // prepare AABB array + prepareAABB(); + // calculate hash + calcHashAABB(); + // sort bodies based on hash + sortHash(); + // find start of each cell + findCellStart(); + // findOverlappingPairs (small/small) + findOverlappingPairs(); + // findOverlappingPairs (small/large) + findPairsLarge(); + // add pairs to CPU cache + computePairCacheChanges(); + scanOverlappingPairBuff(); + squeezeOverlappingPairBuff(); + addPairsToCache(dispatcher); + // find and add large/large pairs to CPU cache + addLarge2LargePairsToCache(dispatcher); + return; +} + + + +void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher) +{ + m_numPairsAdded = 0; + m_numPairsRemoved = 0; + for(int i = 0; i < m_numHandles; i++) + { + unsigned int num = m_hPairScan[i+1] - m_hPairScan[i]; + if(!num) + { + continue; + } + unsigned int* pInp = m_hPairOut + m_hPairScan[i]; + unsigned int index0 = m_hAABB[i * 2].uw; + btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0]; + for(unsigned int j = 0; j < num; j++) + { + unsigned int indx1_s = pInp[j]; + unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG); + btSimpleBroadphaseProxy* proxy1; + if(index1 < (unsigned int)m_maxHandles) + { + proxy1 = &m_pHandles[index1]; + } + else + { + index1 -= m_maxHandles; + btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles)); + proxy1 = &m_pLargeHandles[index1]; + } + if(indx1_s & BT_3DGRID_PAIR_NEW_FLG) + { + m_pairCache->addOverlappingPair(proxy0,proxy1); + m_numPairsAdded++; + } + else + { + m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + m_numPairsRemoved++; + } + } + } +} + + + +btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) +{ + btBroadphaseProxy* proxy; + bool bIsLarge = isLargeProxy(aabbMin, aabbMax); + if(bIsLarge) + { + if (m_numLargeHandles >= m_maxLargeHandles) + { + ///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell) + btAssert(0); + return 0; //should never happen, but don't let the game crash ;-) + } + btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2])); + int newHandleIndex = allocLargeHandle(); + proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy); + } + else + { + proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy); + } + return proxy; +} + + + +void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher) +{ + bool bIsLarge = isLargeProxy(proxy); + if(bIsLarge) + { + + btSimpleBroadphaseProxy* proxy0 = static_cast(proxy); + freeLargeHandle(proxy0); + m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher); + } + else + { + btSimpleBroadphase::destroyProxy(proxy, dispatcher); + } + return; +} + + + +void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher) +{ + m_hPairBuffStartCurr[0] = 0; + m_hPairBuffStartCurr[1] = 0; + for(int i = 1; i <= m_maxHandles; i++) + { + m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; + m_hPairBuffStartCurr[i * 2 + 1] = 0; + } +} + + + +bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax) +{ + btVector3 diag = aabbMax - aabbMin; + + ///use the bounding sphere radius of this bounding box, to include rotation + btScalar radius = diag.length() * btScalar(0.5f); + radius *= m_cellFactorAABB; // user-defined factor + + return (radius > m_maxRadius); +} + + + +bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy) +{ + return (proxy->getUid() >= (m_maxHandles+2)); +} + + + +void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher) +{ + int i,j; + if (m_numLargeHandles <= 0) + { + return; + } + int new_largest_index = -1; + for(i = 0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; + if(!proxy0->m_clientObject) + { + continue; + } + new_largest_index = i; + for(j = i + 1; j <= m_LastLargeHandleIndex; j++) + { + btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j]; + if(!proxy1->m_clientObject) + { + continue; + } + btAssert(proxy0 != proxy1); + btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0); + btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1); + if(aabbOverlap(p0,p1)) + { + if (!m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->addOverlappingPair(proxy0,proxy1); + } + } + else + { + if(m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + } + } + } + } + m_LastLargeHandleIndex = new_largest_index; + return; +} + + + +void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback) +{ + btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback); + for (int i=0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i]; + if(!proxy->m_clientObject) + { + continue; + } + rayCallback.process(proxy); + } +} + + + +// +// overrides for CPU version +// + + + +void btGpu3DGridBroadphase::prepareAABB() +{ + BT_PROFILE("prepareAABB"); + bt3DGrid3F1U* pBB = m_hAABB; + int i; + int new_largest_index = -1; + unsigned int num_small = 0; + for(i = 0; i <= m_LastHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i]; + if(!proxy0->m_clientObject) + { + continue; + } + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_small; + pBB++; + num_small++; + } + m_LastHandleIndex = new_largest_index; + new_largest_index = -1; + unsigned int num_large = 0; + for(i = 0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; + if(!proxy0->m_clientObject) + { + continue; + } + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i + m_maxHandles; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_large + m_maxHandles; + pBB++; + num_large++; + } + m_LastLargeHandleIndex = new_largest_index; + // paranoid checks + btAssert(num_small == m_numHandles); + btAssert(num_large == m_numLargeHandles); + return; +} + + + +void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams) +{ + s3DGridBroadphaseParams = *hostParams; + return; +} + + + +void btGpu3DGridBroadphase::calcHashAABB() +{ + BT_PROFILE("bt3DGrid_calcHashAABB"); + btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles); + return; +} + + + +void btGpu3DGridBroadphase::sortHash() +{ + class bt3DGridHashKey + { + public: + unsigned int hash; + unsigned int index; + void quickSort(bt3DGridHashKey* pData, int lo, int hi) + { + int i=lo, j=hi; + bt3DGridHashKey x = pData[(lo+hi)/2]; + do + { + while(pData[i].hash > x.hash) i++; + while(x.hash > pData[j].hash) j--; + if(i <= j) + { + bt3DGridHashKey t = pData[i]; + pData[i] = pData[j]; + pData[j] = t; + i++; j--; + } + } while(i <= j); + if(lo < j) pData->quickSort(pData, lo, j); + if(i < hi) pData->quickSort(pData, i, hi); + } + }; + BT_PROFILE("bt3DGrid_sortHash"); + bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash; + pHash->quickSort(pHash, 0, m_numHandles - 1); + return; +} + + + +void btGpu3DGridBroadphase::findCellStart() +{ + BT_PROFILE("bt3DGrid_findCellStart"); + btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells); + return; +} + + + +void btGpu3DGridBroadphase::findOverlappingPairs() +{ + BT_PROFILE("bt3DGrid_findOverlappingPairs"); + btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles); + return; +} + + + +void btGpu3DGridBroadphase::findPairsLarge() +{ + BT_PROFILE("bt3DGrid_findPairsLarge"); + btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles); + return; +} + + + +void btGpu3DGridBroadphase::computePairCacheChanges() +{ + BT_PROFILE("bt3DGrid_computePairCacheChanges"); + btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles); + return; +} + + + +void btGpu3DGridBroadphase::scanOverlappingPairBuff() +{ + BT_PROFILE("bt3DGrid_scanOverlappingPairBuff"); + m_hPairScan[0] = 0; + for(int i = 1; i <= m_numHandles; i++) + { + unsigned int delta = m_hPairScan[i]; + m_hPairScan[i] = m_hPairScan[i-1] + delta; + } + return; +} + + + +void btGpu3DGridBroadphase::squeezeOverlappingPairBuff() +{ + BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff"); + btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles); + return; +} + + + +#include "btGpu3DGridBroadphaseSharedCode.h" + + diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h new file mode 100644 index 00000000000..1d49a0557ae --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h @@ -0,0 +1,138 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASE_H +#define BTGPU3DGRIDBROADPHASE_H + +//---------------------------------------------------------------------------------------- + +#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" + +#include "btGpu3DGridBroadphaseSharedTypes.h" + +//---------------------------------------------------------------------------------------- + +///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs + +class btGpu3DGridBroadphase : public btSimpleBroadphase +{ +protected: + bool m_bInitialized; + unsigned int m_numBodies; + unsigned int m_numCells; + unsigned int m_maxPairsPerBody; + btScalar m_cellFactorAABB; + unsigned int m_maxBodiesPerCell; + bt3DGridBroadphaseParams m_params; + btScalar m_maxRadius; + // CPU data + unsigned int* m_hBodiesHash; + unsigned int* m_hCellStart; + unsigned int* m_hPairBuffStartCurr; + bt3DGrid3F1U* m_hAABB; + unsigned int* m_hPairBuff; + unsigned int* m_hPairScan; + unsigned int* m_hPairOut; +// large proxies + int m_numLargeHandles; + int m_maxLargeHandles; + int m_LastLargeHandleIndex; + btSimpleBroadphaseProxy* m_pLargeHandles; + void* m_pLargeHandlesRawPtr; + int m_firstFreeLargeHandle; + int allocLargeHandle() + { + btAssert(m_numLargeHandles < m_maxLargeHandles); + int freeLargeHandle = m_firstFreeLargeHandle; + m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree(); + m_numLargeHandles++; + if(freeLargeHandle > m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex = freeLargeHandle; + } + return freeLargeHandle; + } + void freeLargeHandle(btSimpleBroadphaseProxy* proxy) + { + int handle = int(proxy - m_pLargeHandles); + btAssert((handle >= 0) && (handle < m_maxHandles)); + if(handle == m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex--; + } + proxy->SetNextFree(m_firstFreeLargeHandle); + m_firstFreeLargeHandle = handle; + proxy->m_clientObject = 0; + m_numLargeHandles--; + } + bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax); + bool isLargeProxy(btBroadphaseProxy* proxy); +// debug + unsigned int m_numPairsAdded; + unsigned int m_numPairsRemoved; + unsigned int m_numOverflows; +// +public: + btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell = 8, + btScalar cellFactorAABB = btScalar(1.0f)); + btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell = 8, + btScalar cellFactorAABB = btScalar(1.0f)); + virtual ~btGpu3DGridBroadphase(); + virtual void calculateOverlappingPairs(btDispatcher* dispatcher); + + virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy); + virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher); + virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback); + virtual void resetPool(btDispatcher* dispatcher); + +protected: + void _initialize( const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell = 8, + btScalar cellFactorAABB = btScalar(1.0f)); + void _finalize(); + void addPairsToCache(btDispatcher* dispatcher); + void addLarge2LargePairsToCache(btDispatcher* dispatcher); + +// overrides for CPU version + virtual void setParameters(bt3DGridBroadphaseParams* hostParams); + virtual void prepareAABB(); + virtual void calcHashAABB(); + virtual void sortHash(); + virtual void findCellStart(); + virtual void findOverlappingPairs(); + virtual void findPairsLarge(); + virtual void computePairCacheChanges(); + virtual void scanOverlappingPairBuff(); + virtual void squeezeOverlappingPairBuff(); +}; + +//---------------------------------------------------------------------------------------- + +#endif //BTGPU3DGRIDBROADPHASE_H + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h new file mode 100644 index 00000000000..e0afb87bb82 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h @@ -0,0 +1,430 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +// calculate position in uniform grid +BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p) +{ + int3 gridPos; + gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX); + gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY); + gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ); + return gridPos; +} // bt3DGrid_calcGridPos() + +//---------------------------------------------------------------------------------------- + +// calculate address in grid from position (clamping to edges) +BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos) +{ + gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1)); + gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1)); + gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1)); + return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x; +} // bt3DGrid_calcGridHash() + +//---------------------------------------------------------------------------------------- + +// calculate grid hash value for each body using its AABB +BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index*2]; + bt3DGrid3F1U bbMax = pAABB[index*2 + 1]; + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = bt3DGrid_calcGridPos(pos); + uint gridHash = bt3DGrid_calcGridHash(gridPos); + // store grid hash and body index + pHash[index] = BT_GPU_make_uint2(gridHash, index); +} // calcHashAABBD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + // Load hash data into shared memory so that we can look + // at neighboring body's hash value without loading + // two hash values per thread + BT_GPU___shared__ uint sharedHash[257]; + sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x; + if((index > 0) && (BT_GPU_threadIdx.x == 0)) + { + // first thread in block must load neighbor body hash + volatile uint2 prevData = pHash[index-1]; + sharedHash[0] = prevData.x; + } + BT_GPU___syncthreads(); + if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x])) + { + cellStart[sortedData.x] = index; + } +} // findCellStartD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1) +{ + return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && + (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && + (min0.fz <= max1.fz)&& (min1.fz <= max0.fz); +} // cudaTestAABBOverlap() + +//---------------------------------------------------------------------------------------- + +BT_GPU___device__ void findPairsInCell( int3 gridPos, + uint index, + uint2* pHash, + uint* pCellStart, + bt3DGrid3F1U* pAABB, + uint* pPairBuff, + uint2* pPairBuffStartCurr, + uint numBodies) +{ + if ( (gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1) + || (gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1) + || (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) + { + return; + } + uint gridHash = bt3DGrid_calcGridHash(gridPos); + // get start of bucket for this cell + uint bucketStart = pCellStart[gridHash]; + if (bucketStart == 0xffffffff) + { + return; // cell empty + } + // iterate over bodies in this cell + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell; + bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd; + for(uint index2 = bucketStart; index2 < bucketEnd; index2++) + { + uint2 cellData = pHash[index2]; + if (cellData.x != gridHash) + { + break; // no longer in same bucket + } + uint unsorted_indx2 = cellData.y; + if (unsorted_indx2 < unsorted_indx) // check not colliding with self + { + bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2); + bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint handleIndex2 = min1.uw; + uint k; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG; + curr++; + } + } + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr); + return; +} // findPairsInCell() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, + uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = bt3DGrid_calcGridPos(pos); + // examine only neighbouring cells + for(int z=-1; z<=1; z++) { + for(int y=-1; y<=1; y++) { + for(int x=-1; x<=1; x++) { + findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies); + } + } + } +} // findOverlappingPairsD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, + uint2* pPairBuffStartCurr, uint numBodies, uint numLarge) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); + bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + for(uint i = 0; i < numLarge; i++) + { + uint indx2 = numBodies + i; + bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2); + bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint k; + uint handleIndex2 = min1.uw; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG; + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + curr++; + } + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr); + return; +} // findPairsLargeD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, + uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint *pInp = pPairBuff + start; + uint num_changes = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG)) + { + num_changes++; + } + } + pPairScan[index+1] = num_changes; +} // computePairCacheChangesD() + +//---------------------------------------------------------------------------------------- + +BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, + uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies) +{ + int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x; + if(index >= (int)numBodies) + { + return; + } + bt3DGrid3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint* pInp = pPairBuff + start; + uint* pOut = pPairOut + pPairScan[index]; + uint* pOut2 = pInp; + uint num = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG)) + { + *pOut = *pInp; + pOut++; + } + if((*pInp) & BT_3DGRID_PAIR_ANY_FLG) + { + *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG); + pOut2++; + num++; + } + } + pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num); +} // squeezeOverlappingPairBuffD() + + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// E N D O F K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +extern "C" +{ + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + // execute the kernel + BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies)); + // check if kernel invocation generated an error + BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed"); +} // calcHashAABB() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint))); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD"); +} // findCellStart() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies)) +{ +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U))); +#endif + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD"); +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif +} // findOverlappingPairs() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge)) +{ +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U))); +#endif + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD"); +#if B_CUDA_USE_TEX + BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif +} // findPairsLarge() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD"); +} // computePairCacheChanges() + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies)) +{ + int numThreads, numBlocks; + BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); + BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies)); + BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD"); +} // btCuda_squeezeOverlappingPairBuff() + +//------------------------------------------------------------------------------------------------ + +} // extern "C" + +//------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------ +//------------------------------------------------------------------------------------------------ diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h new file mode 100644 index 00000000000..607bda7edfd --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h @@ -0,0 +1,61 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +// Shared definitions for GPU-based 3D Grid collision detection broadphase + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Keep this file free from Bullet headers +// it is included into both CUDA and CPU code +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H +#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H + +//---------------------------------------------------------------------------------------- + +#include "btGpu3DGridBroadphaseSharedTypes.h" + +//---------------------------------------------------------------------------------------- + +extern "C" +{ + +//---------------------------------------------------------------------------------------- + +void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies); + +void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells); + +void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies); + +void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge); + +void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies); + +void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies); + + +//---------------------------------------------------------------------------------------- + +} // extern "C" + +//---------------------------------------------------------------------------------------- + +#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H + diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h new file mode 100644 index 00000000000..616a40094ca --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h @@ -0,0 +1,67 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +// Shared definitions for GPU-based 3D Grid collision detection broadphase + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Keep this file free from Bullet headers +// it is included into both CUDA and CPU code +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +//---------------------------------------------------------------------------------------- + +#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H +#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H + +//---------------------------------------------------------------------------------------- + +#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000) +#define BT_3DGRID_PAIR_NEW_FLG (0x20000000) +#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG) + +//---------------------------------------------------------------------------------------- + +struct bt3DGridBroadphaseParams +{ + unsigned int m_gridSizeX; + unsigned int m_gridSizeY; + unsigned int m_gridSizeZ; + unsigned int m_numCells; + float m_worldOriginX; + float m_worldOriginY; + float m_worldOriginZ; + float m_cellSizeX; + float m_cellSizeY; + float m_cellSizeZ; + unsigned int m_numBodies; + unsigned int m_maxBodiesPerCell; +}; + +//---------------------------------------------------------------------------------------- + +struct bt3DGrid3F1U +{ + float fx; + float fy; + float fz; + unsigned int uw; +}; + +//---------------------------------------------------------------------------------------- + +#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H + diff --git a/extern/bullet2/BulletMultiThreaded/btGpuDefines.h b/extern/bullet2/BulletMultiThreaded/btGpuDefines.h new file mode 100644 index 00000000000..f9315ab6496 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/btGpuDefines.h @@ -0,0 +1,211 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +// definitions for "GPU on CPU" code + + +#ifndef BT_GPU_DEFINES_H +#define BT_GPU_DEFINES_H + +typedef unsigned int uint; + +struct int2 +{ + int x, y; +}; + +struct uint2 +{ + unsigned int x, y; +}; + +struct int3 +{ + int x, y, z; +}; + +struct uint3 +{ + unsigned int x, y, z; +}; + +struct float4 +{ + float x, y, z, w; +}; + +struct float3 +{ + float x, y, z; +}; + + +#define BT_GPU___device__ inline +#define BT_GPU___devdata__ +#define BT_GPU___constant__ +#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b)) +#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b)) +#define BT_GPU_params s3DGridBroadphaseParams +#define BT_GPU___mul24(a, b) ((a)*(b)) +#define BT_GPU___global__ inline +#define BT_GPU___shared__ static +#define BT_GPU___syncthreads() +#define CUDART_PI_F SIMD_PI + +static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y) +{ + uint2 t; t.x = x; t.y = y; return t; +} +#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y) + +static inline int3 bt3dGrid_make_int3(int x, int y, int z) +{ + int3 t; t.x = x; t.y = y; t.z = z; return t; +} +#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z) + +static inline float3 bt3dGrid_make_float3(float x, float y, float z) +{ + float3 t; t.x = x; t.y = y; t.z = z; return t; +} +#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z) + +static inline float3 bt3dGrid_make_float34(float4 f) +{ + float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t; +} +#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f) + +static inline float3 bt3dGrid_make_float31(float f) +{ + float3 t; t.x = t.y = t.z = f; return t; +} +#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x) + +static inline float4 bt3dGrid_make_float42(float3 v, float f) +{ + float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t; +} +#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b) + +static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d) +{ + float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t; +} +#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d) + +inline int3 operator+(int3 a, int3 b) +{ + return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} + +inline float4 operator+(const float4& a, const float4& b) +{ + float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r; +} +inline float4 operator*(const float4& a, float fact) +{ + float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r; +} +inline float4 operator*(float fact, float4& a) +{ + return (a * fact); +} +inline float4& operator*=(float4& a, float fact) +{ + a = fact * a; + return a; +} +inline float4& operator+=(float4& a, const float4& b) +{ + a = a + b; + return a; +} + +inline float3 operator+(const float3& a, const float3& b) +{ + float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r; +} +inline float3 operator-(const float3& a, const float3& b) +{ + float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r; +} +static inline float bt3dGrid_dot(float3& a, float3& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z; +} +#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b) + +static inline float bt3dGrid_dot4(float4& a, float4& b) +{ + return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; +} +#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b) + +static inline float3 bt3dGrid_cross(const float3& a, const float3& b) +{ + float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r; +} +#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b) + + +inline float3 operator*(const float3& a, float fact) +{ + float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r; +} + + +inline float3& operator+=(float3& a, const float3& b) +{ + a = a + b; + return a; +} +inline float3& operator-=(float3& a, const float3& b) +{ + a = a - b; + return a; +} +inline float3& operator*=(float3& a, float fact) +{ + a = a * fact; + return a; +} +inline float3 operator-(const float3& v) +{ + float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r; +} + + +#define BT_GPU_FETCH(a, b) a[b] +#define BT_GPU_FETCH4(a, b) a[b] +#define BT_GPU_PREF(func) btGpu_##func +#define BT_GPU_SAFE_CALL(func) func +#define BT_GPU_Memset memset +#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c) +#define BT_GPU_BindTexture(a, b, c, d) +#define BT_GPU_UnbindTexture(a) + +static uint2 s_blockIdx, s_blockDim, s_threadIdx; +#define BT_GPU_blockIdx s_blockIdx +#define BT_GPU_blockDim s_blockDim +#define BT_GPU_threadIdx s_threadIdx +#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb //for uint32_t etc. +#include "PlatformDefinitions.h" +#include "PpuAddressSpace.h" + +class btThreadSupportInterface +{ +public: + + virtual ~btThreadSupportInterface(); + +///send messages to SPUs + virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1) =0; + +///check for messages from SPUs + virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0; + +///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) + virtual void startSPU() =0; + +///tell the task scheduler we are done with the SPU tasks + virtual void stopSPU()=0; + + ///tell the task scheduler to use no more than numTasks tasks + virtual void setNumTasks(int numTasks)=0; + + virtual int getNumTasks() const = 0; + +}; + +#endif //THREAD_SUPPORT_INTERFACE_H + diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h new file mode 100644 index 00000000000..c5eeeebd7a1 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h @@ -0,0 +1,225 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _BOOLINVEC_H +#define _BOOLINVEC_H + +#include +namespace Vectormath { + +class floatInVec; + +//-------------------------------------------------------------------------------------------------- +// boolInVec class +// + +class boolInVec +{ +private: + unsigned int mData; + +public: + // Default constructor; does no initialization + // + inline boolInVec( ) { }; + + // Construct from a value converted from float + // + inline boolInVec(floatInVec vec); + + // Explicit cast from bool + // + explicit inline boolInVec(bool scalar); + + // Explicit cast to bool + // + inline bool getAsBool() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to bool + // + inline operator bool() const; +#endif + + // Boolean negation operator + // + inline const boolInVec operator ! () const; + + // Assignment operator + // + inline boolInVec& operator = (boolInVec vec); + + // Boolean and assignment operator + // + inline boolInVec& operator &= (boolInVec vec); + + // Boolean exclusive or assignment operator + // + inline boolInVec& operator ^= (boolInVec vec); + + // Boolean or assignment operator + // + inline boolInVec& operator |= (boolInVec vec); + +}; + +// Equal operator +// +inline const boolInVec operator == (boolInVec vec0, boolInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (boolInVec vec0, boolInVec vec1); + +// And operator +// +inline const boolInVec operator & (boolInVec vec0, boolInVec vec1); + +// Exclusive or operator +// +inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1); + +// Or operator +// +inline const boolInVec operator | (boolInVec vec0, boolInVec vec1); + +// Conditionally select between two values +// +inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// boolInVec implementation +// + +#include "floatInVec.h" + +namespace Vectormath { + +inline +boolInVec::boolInVec(floatInVec vec) +{ + *this = (vec != floatInVec(0.0f)); +} + +inline +boolInVec::boolInVec(bool scalar) +{ + mData = -(int)scalar; +} + +inline +bool +boolInVec::getAsBool() const +{ + return (mData > 0); +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +boolInVec::operator bool() const +{ + return getAsBool(); +} +#endif + +inline +const boolInVec +boolInVec::operator ! () const +{ + return boolInVec(!mData); +} + +inline +boolInVec& +boolInVec::operator = (boolInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +boolInVec& +boolInVec::operator &= (boolInVec vec) +{ + *this = *this & vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator ^= (boolInVec vec) +{ + *this = *this ^ vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator |= (boolInVec vec) +{ + *this = *this | vec; + return *this; +} + +inline +const boolInVec +operator == (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() == vec1.getAsBool()); +} + +inline +const boolInVec +operator != (boolInVec vec0, boolInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const boolInVec +operator & (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() & vec1.getAsBool()); +} + +inline +const boolInVec +operator | (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() | vec1.getAsBool()); +} + +inline +const boolInVec +operator ^ (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() ^ vec1.getAsBool()); +} + +inline +const boolInVec +select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // boolInVec_h diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h new file mode 100644 index 00000000000..12d89e43d3e --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h @@ -0,0 +1,343 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ +#ifndef _FLOATINVEC_H +#define _FLOATINVEC_H + +#include +namespace Vectormath { + +class boolInVec; + +//-------------------------------------------------------------------------------------------------- +// floatInVec class +// + +// A class representing a scalar float value contained in a vector register +// This class does not support fastmath +class floatInVec +{ +private: + float mData; + +public: + // Default constructor; does no initialization + // + inline floatInVec( ) { }; + + // Construct from a value converted from bool + // + inline floatInVec(boolInVec vec); + + // Explicit cast from float + // + explicit inline floatInVec(float scalar); + + // Explicit cast to float + // + inline float getAsFloat() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to float + // + inline operator float() const; +#endif + + // Post increment (add 1.0f) + // + inline const floatInVec operator ++ (int); + + // Post decrement (subtract 1.0f) + // + inline const floatInVec operator -- (int); + + // Pre increment (add 1.0f) + // + inline floatInVec& operator ++ (); + + // Pre decrement (subtract 1.0f) + // + inline floatInVec& operator -- (); + + // Negation operator + // + inline const floatInVec operator - () const; + + // Assignment operator + // + inline floatInVec& operator = (floatInVec vec); + + // Multiplication assignment operator + // + inline floatInVec& operator *= (floatInVec vec); + + // Division assignment operator + // + inline floatInVec& operator /= (floatInVec vec); + + // Addition assignment operator + // + inline floatInVec& operator += (floatInVec vec); + + // Subtraction assignment operator + // + inline floatInVec& operator -= (floatInVec vec); + +}; + +// Multiplication operator +// +inline const floatInVec operator * (floatInVec vec0, floatInVec vec1); + +// Division operator +// +inline const floatInVec operator / (floatInVec vec0, floatInVec vec1); + +// Addition operator +// +inline const floatInVec operator + (floatInVec vec0, floatInVec vec1); + +// Subtraction operator +// +inline const floatInVec operator - (floatInVec vec0, floatInVec vec1); + +// Less than operator +// +inline const boolInVec operator < (floatInVec vec0, floatInVec vec1); + +// Less than or equal operator +// +inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1); + +// Greater than operator +// +inline const boolInVec operator > (floatInVec vec0, floatInVec vec1); + +// Greater than or equal operator +// +inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1); + +// Equal operator +// +inline const boolInVec operator == (floatInVec vec0, floatInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (floatInVec vec0, floatInVec vec1); + +// Conditionally select between two values +// +inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// floatInVec implementation +// + +#include "boolInVec.h" + +namespace Vectormath { + +inline +floatInVec::floatInVec(boolInVec vec) +{ + mData = float(vec.getAsBool()); +} + +inline +floatInVec::floatInVec(float scalar) +{ + mData = scalar; +} + +inline +float +floatInVec::getAsFloat() const +{ + return mData; +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +floatInVec::operator float() const +{ + return getAsFloat(); +} +#endif + +inline +const floatInVec +floatInVec::operator ++ (int) +{ + float olddata = mData; + operator ++(); + return floatInVec(olddata); +} + +inline +const floatInVec +floatInVec::operator -- (int) +{ + float olddata = mData; + operator --(); + return floatInVec(olddata); +} + +inline +floatInVec& +floatInVec::operator ++ () +{ + *this += floatInVec(1.0f); + return *this; +} + +inline +floatInVec& +floatInVec::operator -- () +{ + *this -= floatInVec(1.0f); + return *this; +} + +inline +const floatInVec +floatInVec::operator - () const +{ + return floatInVec(-mData); +} + +inline +floatInVec& +floatInVec::operator = (floatInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +floatInVec& +floatInVec::operator *= (floatInVec vec) +{ + *this = *this * vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator /= (floatInVec vec) +{ + *this = *this / vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator += (floatInVec vec) +{ + *this = *this + vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator -= (floatInVec vec) +{ + *this = *this - vec; + return *this; +} + +inline +const floatInVec +operator * (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() * vec1.getAsFloat()); +} + +inline +const floatInVec +operator / (floatInVec num, floatInVec den) +{ + return floatInVec(num.getAsFloat() / den.getAsFloat()); +} + +inline +const floatInVec +operator + (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() + vec1.getAsFloat()); +} + +inline +const floatInVec +operator - (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() - vec1.getAsFloat()); +} + +inline +const boolInVec +operator < (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() < vec1.getAsFloat()); +} + +inline +const boolInVec +operator <= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 > vec1); +} + +inline +const boolInVec +operator > (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() > vec1.getAsFloat()); +} + +inline +const boolInVec +operator >= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 < vec1); +} + +inline +const boolInVec +operator == (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() == vec1.getAsFloat()); +} + +inline +const boolInVec +operator != (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const floatInVec +select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // floatInVec_h diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h new file mode 100644 index 00000000000..e103243d1e0 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h @@ -0,0 +1,1630 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_MAT_AOS_CPP_H +#define _VECTORMATH_MAT_AOS_CPP_H + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_PI_OVER_2 1.570796327f + +//----------------------------------------------------------------------------- +// Definitions + +inline Matrix3::Matrix3( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; +} + +inline Matrix3::Matrix3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +inline Matrix3::Matrix3( const Quat & unitQuat ) +{ + float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2; + qx = unitQuat.getX(); + qy = unitQuat.getY(); + qz = unitQuat.getZ(); + qw = unitQuat.getW(); + qx2 = ( qx + qx ); + qy2 = ( qy + qy ); + qz2 = ( qz + qz ); + qxqx2 = ( qx * qx2 ); + qxqy2 = ( qx * qy2 ); + qxqz2 = ( qx * qz2 ); + qxqw2 = ( qw * qx2 ); + qyqy2 = ( qy * qy2 ); + qyqz2 = ( qy * qz2 ); + qyqw2 = ( qw * qy2 ); + qzqz2 = ( qz * qz2 ); + qzqw2 = ( qw * qz2 ); + mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) ); + mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) ); + mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) ); +} + +inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; +} + +inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + return *this; +} + +inline Matrix3 & Matrix3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Matrix3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Matrix3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Matrix3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Matrix3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::getRow( int row ) const +{ + return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); +} + +inline Vector3 & Matrix3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + return *this; +} + +inline const Matrix3 transpose( const Matrix3 & mat ) +{ + return Matrix3( + Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ), + Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ), + Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() ) + ); +} + +inline const Matrix3 inverse( const Matrix3 & mat ) +{ + Vector3 tmp0, tmp1, tmp2; + float detinv; + tmp0 = cross( mat.getCol1(), mat.getCol2() ); + tmp1 = cross( mat.getCol2(), mat.getCol0() ); + tmp2 = cross( mat.getCol0(), mat.getCol1() ); + detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) ); + return Matrix3( + Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ), + Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ), + Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ) + ); +} + +inline float determinant( const Matrix3 & mat ) +{ + return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); +} + +inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ) + ); +} + +inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix3 Matrix3::operator -( ) const +{ + return Matrix3( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ) + ); +} + +inline const Matrix3 absPerElem( const Matrix3 & mat ) +{ + return Matrix3( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::operator *( float scalar ) const +{ + return Matrix3( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ) + ); +} + +inline Matrix3 & Matrix3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ) +{ + return mat * scalar; +} + +inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +{ + return Matrix3( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +{ + return Matrix3( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::identity( ) +{ + return Matrix3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ) + ); +} + +inline const Matrix3 Matrix3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ) + ); +} + +inline const Matrix3 Matrix3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix3( + Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ), + Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ), + Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( const Quat & unitQuat ) +{ + return Matrix3( unitQuat ); +} + +inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec ) +{ + return Matrix3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ) + ); +} + +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ) +{ + return Matrix3( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ) + ); +} + +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ) +{ + return Matrix3( + mulPerElem( mat.getCol0(), scaleVec ), + mulPerElem( mat.getCol1(), scaleVec ), + mulPerElem( mat.getCol2(), scaleVec ) + ); +} + +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix3 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); +} + +inline void print( const Matrix3 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Matrix4::Matrix4( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; +} + +inline Matrix4::Matrix4( float scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +inline Matrix4::Matrix4( const Transform3 & mat ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( mat.getCol3(), 1.0f ); +} + +inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec ) +{ + Matrix3 mat; + mat = Matrix3( unitQuat ); + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Matrix4 & Matrix4::setElem( int col, int row, float val ) +{ + Vector4 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix4::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector4 Matrix4::getCol0( ) const +{ + return mCol0; +} + +inline const Vector4 Matrix4::getCol1( ) const +{ + return mCol1; +} + +inline const Vector4 Matrix4::getCol2( ) const +{ + return mCol2; +} + +inline const Vector4 Matrix4::getCol3( ) const +{ + return mCol3; +} + +inline const Vector4 Matrix4::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector4 & Matrix4::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; + return *this; +} + +inline const Matrix4 transpose( const Matrix4 & mat ) +{ + return Matrix4( + Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ), + Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ), + Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ), + Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() ) + ); +} + +inline const Matrix4 inverse( const Matrix4 & mat ) +{ + Vector4 res0, res1, res2, res3; + float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) ); + res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) ); + res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) ); + res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) ); + detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) ); + res1.setX( ( mI * tmp1 ) ); + res1.setY( ( mM * tmp0 ) ); + res1.setZ( ( mA * tmp1 ) ); + res1.setW( ( mE * tmp0 ) ); + res3.setX( ( mI * tmp3 ) ); + res3.setY( ( mM * tmp2 ) ); + res3.setZ( ( mA * tmp3 ) ); + res3.setW( ( mE * tmp2 ) ); + res2.setX( ( mI * tmp5 ) ); + res2.setY( ( mM * tmp4 ) ); + res2.setZ( ( mA * tmp5 ) ); + res2.setW( ( mE * tmp4 ) ); + tmp0 = ( ( mI * mB ) - ( mA * mJ ) ); + tmp1 = ( ( mM * mF ) - ( mE * mN ) ); + tmp2 = ( ( mI * mD ) - ( mA * mL ) ); + tmp3 = ( ( mM * mH ) - ( mE * mP ) ); + tmp4 = ( ( mI * mC ) - ( mA * mK ) ); + tmp5 = ( ( mM * mG ) - ( mE * mO ) ); + res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) ); + res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) ); + res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) ); + res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) ); + res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) ); + res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) ); + res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) ); + res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) ); + res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) ); + res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) ); + res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) ); + res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) ); + return Matrix4( + ( res0 * detInv ), + ( res1 * detInv ), + ( res2 * detInv ), + ( res3 * detInv ) + ); +} + +inline const Matrix4 affineInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( inverse( affineMat ) ); +} + +inline const Matrix4 orthoInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( orthoInverse( affineMat ) ); +} + +inline float determinant( const Matrix4 & mat ) +{ + float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ); + dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ); + dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ); + dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ); + return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) ); +} + +inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ), + ( mCol3 + mat.mCol3 ) + ); +} + +inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ), + ( mCol3 - mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix4 Matrix4::operator -( ) const +{ + return Matrix4( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ), + ( -mCol3 ) + ); +} + +inline const Matrix4 absPerElem( const Matrix4 & mat ) +{ + return Matrix4( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ), + absPerElem( mat.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::operator *( float scalar ) const +{ + return Matrix4( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ), + ( mCol3 * scalar ) + ); +} + +inline Matrix4 & Matrix4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ) +{ + return mat * scalar; +} + +inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ), + ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ), + ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ), + ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const +{ + return Vector4( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ), + ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ), + ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() ) + ); +} + +inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +{ + return Matrix4( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ), + ( *this * mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +{ + return Matrix4( + ( *this * tfrm.getCol0() ), + ( *this * tfrm.getCol1() ), + ( *this * tfrm.getCol2() ), + ( *this * Point3( tfrm.getCol3() ) ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +{ + return Matrix4( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ), + mulPerElem( mat0.getCol3(), mat1.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::identity( ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +{ + mCol0.setXYZ( mat3.getCol0() ); + mCol1.setXYZ( mat3.getCol1() ); + mCol2.setXYZ( mat3.getCol2() ); + return *this; +} + +inline const Matrix3 Matrix4::getUpper3x3( ) const +{ + return Matrix3( + mCol0.getXYZ( ), + mCol1.getXYZ( ), + mCol2.getXYZ( ) + ); +} + +inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec ) +{ + mCol3.setXYZ( translateVec ); + return *this; +} + +inline const Vector3 Matrix4::getTranslation( ) const +{ + return mCol3.getXYZ( ); +} + +inline const Matrix4 Matrix4::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4::xAxis( ), + Vector4( 0.0f, c, s, 0.0f ), + Vector4( 0.0f, -s, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, 0.0f, -s, 0.0f ), + Vector4::yAxis( ), + Vector4( s, 0.0f, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, s, 0.0f, 0.0f ), + Vector4( -s, c, 0.0f, 0.0f ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix4( + Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ), + Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ), + Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix4( + Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ), + Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ), + Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( const Quat & unitQuat ) +{ + return Matrix4( Transform3::rotation( unitQuat ) ); +} + +inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec ) +{ + return Matrix4( + Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ) +{ + return Matrix4( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ), + mat.getCol3() + ); +} + +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ) +{ + Vector4 scale4; + scale4 = Vector4( scaleVec, 1.0f ); + return Matrix4( + mulPerElem( mat.getCol0(), scale4 ), + mulPerElem( mat.getCol1(), scale4 ), + mulPerElem( mat.getCol2(), scale4 ), + mulPerElem( mat.getCol3(), scale4 ) + ); +} + +inline const Matrix4 Matrix4::translation( const Vector3 & translateVec ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4( translateVec, 1.0f ) + ); +} + +inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ) +{ + Matrix4 m4EyeFrame; + Vector3 v3X, v3Y, v3Z; + v3Y = normalize( upVec ); + v3Z = normalize( ( eyePos - lookAtPos ) ); + v3X = normalize( cross( v3Y, v3Z ) ); + v3Y = cross( v3Z, v3X ); + m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) ); + return orthoInverse( m4EyeFrame ); +} + +inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +{ + float f, rangeInv; + f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) ); + rangeInv = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, f, 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + n2 = ( zNear + zNear ); + return Matrix4( + Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ), + Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ), + Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f ) + ); +} + +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix4 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); + print( mat.getRow( 3 ) ); +} + +inline void print( const Matrix4 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Transform3::Transform3( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; +} + +inline Transform3::Transform3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ) +{ + this->setUpper3x3( tfrm ); + this->setTranslation( translateVec ); +} + +inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec ) +{ + this->setUpper3x3( Matrix3( unitQuat ) ); + this->setTranslation( translateVec ); +} + +inline Transform3 & Transform3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Transform3 & Transform3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Transform3 & Transform3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Transform3 & Transform3::setCol3( const Vector3 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Transform3 & Transform3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Transform3 & Transform3::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Transform3 & Transform3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Transform3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Transform3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Transform3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Transform3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Transform3::getCol3( ) const +{ + return mCol3; +} + +inline const Vector3 Transform3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Transform3::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector3 & Transform3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Transform3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; + return *this; +} + +inline const Transform3 inverse( const Transform3 & tfrm ) +{ + Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2; + float detinv; + tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() ); + tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() ); + tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() ); + detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) ); + inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ); + inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ); + inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 orthoInverse( const Transform3 & tfrm ) +{ + Vector3 inv0, inv1, inv2; + inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() ); + inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() ); + inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 absPerElem( const Transform3 & tfrm ) +{ + return Transform3( + absPerElem( tfrm.getCol0() ), + absPerElem( tfrm.getCol1() ), + absPerElem( tfrm.getCol2() ), + absPerElem( tfrm.getCol3() ) + ); +} + +inline const Vector3 Transform3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Point3 Transform3::operator *( const Point3 & pnt ) const +{ + return Point3( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ) + ); +} + +inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +{ + return Transform3( + ( *this * tfrm.mCol0 ), + ( *this * tfrm.mCol1 ), + ( *this * tfrm.mCol2 ), + Vector3( ( *this * Point3( tfrm.mCol3 ) ) ) + ); +} + +inline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +{ + return Transform3( + mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), + mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ), + mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ), + mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() ) + ); +} + +inline const Transform3 Transform3::identity( ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +{ + mCol0 = tfrm.getCol0(); + mCol1 = tfrm.getCol1(); + mCol2 = tfrm.getCol2(); + return *this; +} + +inline const Matrix3 Transform3::getUpper3x3( ) const +{ + return Matrix3( mCol0, mCol1, mCol2 ); +} + +inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec ) +{ + mCol3 = translateVec; + return *this; +} + +inline const Vector3 Transform3::getTranslation( ) const +{ + return mCol3; +} + +inline const Transform3 Transform3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Transform3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec ) +{ + return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::rotation( const Quat & unitQuat ) +{ + return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::scale( const Vector3 & scaleVec ) +{ + return Transform3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ) +{ + return Transform3( + ( tfrm.getCol0() * scaleVec.getX( ) ), + ( tfrm.getCol1() * scaleVec.getY( ) ), + ( tfrm.getCol2() * scaleVec.getZ( ) ), + tfrm.getCol3() + ); +} + +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ) +{ + return Transform3( + mulPerElem( tfrm.getCol0(), scaleVec ), + mulPerElem( tfrm.getCol1(), scaleVec ), + mulPerElem( tfrm.getCol2(), scaleVec ), + mulPerElem( tfrm.getCol3(), scaleVec ) + ); +} + +inline const Transform3 Transform3::translation( const Vector3 & translateVec ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + translateVec + ); +} + +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Transform3 & tfrm ) +{ + print( tfrm.getRow( 0 ) ); + print( tfrm.getRow( 1 ) ); + print( tfrm.getRow( 2 ) ); +} + +inline void print( const Transform3 & tfrm, const char * name ) +{ + printf("%s:\n", name); + print( tfrm ); +} + +#endif + +inline Quat::Quat( const Matrix3 & tfrm ) +{ + float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw; + int negTrace, ZgtX, ZgtY, YgtX; + int largestXorY, largestYorZ, largestZorX; + + xx = tfrm.getCol0().getX(); + yx = tfrm.getCol0().getY(); + zx = tfrm.getCol0().getZ(); + xy = tfrm.getCol1().getX(); + yy = tfrm.getCol1().getY(); + zy = tfrm.getCol1().getZ(); + xz = tfrm.getCol2().getX(); + yz = tfrm.getCol2().getY(); + zz = tfrm.getCol2().getZ(); + + trace = ( ( xx + yy ) + zz ); + + negTrace = ( trace < 0.0f ); + ZgtX = zz > xx; + ZgtY = zz > yy; + YgtX = yy > xx; + largestXorY = ( !ZgtX || !ZgtY ) && negTrace; + largestYorZ = ( YgtX || ZgtX ) && negTrace; + largestZorX = ( ZgtY || !YgtX ) && negTrace; + + if ( largestXorY ) + { + zz = -zz; + xy = -xy; + } + if ( largestYorZ ) + { + xx = -xx; + yz = -yz; + } + if ( largestZorX ) + { + yy = -yy; + zx = -zx; + } + + radicand = ( ( ( xx + yy ) + zz ) + 1.0f ); + scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) ); + + tmpx = ( ( zy - yz ) * scale ); + tmpy = ( ( xz - zx ) * scale ); + tmpz = ( ( yx - xy ) * scale ); + tmpw = ( radicand * scale ); + qx = tmpx; + qy = tmpy; + qz = tmpz; + qw = tmpw; + + if ( largestXorY ) + { + qx = tmpw; + qy = tmpz; + qz = tmpy; + qw = tmpx; + } + if ( largestYorZ ) + { + tmpx = qx; + tmpz = qz; + qx = qy; + qy = tmpx; + qz = qw; + qw = tmpz; + } + + mX = qx; + mY = qy; + mZ = qz; + mW = qw; +} + +inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 ) +{ + return Matrix3( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ) + ); +} + +inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 ) +{ + return Matrix4( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ), + ( tfrm0 * tfrm1.getW( ) ) + ); +} + +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Vector3( + ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ), + ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ), + ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) ) + ); +} + +inline const Matrix3 crossMatrix( const Vector3 & vec ) +{ + return Matrix3( + Vector3( 0.0f, vec.getZ(), -vec.getY() ), + Vector3( -vec.getZ(), 0.0f, vec.getX() ), + Vector3( vec.getY(), -vec.getX(), 0.0f ) + ); +} + +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); +} + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h new file mode 100644 index 00000000000..764e01708f9 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h @@ -0,0 +1,433 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_QUAT_AOS_CPP_H +#define _VECTORMATH_QUAT_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Quat::Quat( const Quat & quat ) +{ + mX = quat.mX; + mY = quat.mY; + mZ = quat.mZ; + mW = quat.mW; +} + +inline Quat::Quat( float _x, float _y, float _z, float _w ) +{ + mX = _x; + mY = _y; + mZ = _z; + mW = _w; +} + +inline Quat::Quat( const Vector3 & xyz, float _w ) +{ + this->setXYZ( xyz ); + this->setW( _w ); +} + +inline Quat::Quat( const Vector4 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + mW = vec.getW(); +} + +inline Quat::Quat( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; + mW = scalar; +} + +inline const Quat Quat::identity( ) +{ + return Quat( 0.0f, 0.0f, 0.0f, 1.0f ); +} + +inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ) +{ + return ( quat0 + ( ( quat1 - quat0 ) * t ) ); +} + +inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ) +{ + Quat start; + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitQuat0, unitQuat1 ); + if ( cosAngle < 0.0f ) { + cosAngle = -cosAngle; + start = ( -unitQuat0 ); + } else { + start = unitQuat0; + } + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) ); +} + +inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ) +{ + Quat tmp0, tmp1; + tmp0 = slerp( t, unitQuat0, unitQuat3 ); + tmp1 = slerp( t, unitQuat1, unitQuat2 ); + return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 ); +} + +inline void loadXYZW( Quat & quat, const float * fptr ) +{ + quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] ); +} + +inline void storeXYZW( const Quat & quat, float * fptr ) +{ + fptr[0] = quat.getX(); + fptr[1] = quat.getY(); + fptr[2] = quat.getZ(); + fptr[3] = quat.getW(); +} + +inline Quat & Quat::operator =( const Quat & quat ) +{ + mX = quat.mX; + mY = quat.mY; + mZ = quat.mZ; + mW = quat.mW; + return *this; +} + +inline Quat & Quat::setXYZ( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + return *this; +} + +inline const Vector3 Quat::getXYZ( ) const +{ + return Vector3( mX, mY, mZ ); +} + +inline Quat & Quat::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Quat::getX( ) const +{ + return mX; +} + +inline Quat & Quat::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Quat::getY( ) const +{ + return mY; +} + +inline Quat & Quat::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Quat::getZ( ) const +{ + return mZ; +} + +inline Quat & Quat::setW( float _w ) +{ + mW = _w; + return *this; +} + +inline float Quat::getW( ) const +{ + return mW; +} + +inline Quat & Quat::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Quat::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Quat::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Quat::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Quat Quat::operator +( const Quat & quat ) const +{ + return Quat( + ( mX + quat.mX ), + ( mY + quat.mY ), + ( mZ + quat.mZ ), + ( mW + quat.mW ) + ); +} + +inline const Quat Quat::operator -( const Quat & quat ) const +{ + return Quat( + ( mX - quat.mX ), + ( mY - quat.mY ), + ( mZ - quat.mZ ), + ( mW - quat.mW ) + ); +} + +inline const Quat Quat::operator *( float scalar ) const +{ + return Quat( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ), + ( mW * scalar ) + ); +} + +inline Quat & Quat::operator +=( const Quat & quat ) +{ + *this = *this + quat; + return *this; +} + +inline Quat & Quat::operator -=( const Quat & quat ) +{ + *this = *this - quat; + return *this; +} + +inline Quat & Quat::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Quat Quat::operator /( float scalar ) const +{ + return Quat( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ), + ( mW / scalar ) + ); +} + +inline Quat & Quat::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Quat Quat::operator -( ) const +{ + return Quat( + -mX, + -mY, + -mZ, + -mW + ); +} + +inline const Quat operator *( float scalar, const Quat & quat ) +{ + return quat * scalar; +} + +inline float dot( const Quat & quat0, const Quat & quat1 ) +{ + float result; + result = ( quat0.getX() * quat1.getX() ); + result = ( result + ( quat0.getY() * quat1.getY() ) ); + result = ( result + ( quat0.getZ() * quat1.getZ() ) ); + result = ( result + ( quat0.getW() * quat1.getW() ) ); + return result; +} + +inline float norm( const Quat & quat ) +{ + float result; + result = ( quat.getX() * quat.getX() ); + result = ( result + ( quat.getY() * quat.getY() ) ); + result = ( result + ( quat.getZ() * quat.getZ() ) ); + result = ( result + ( quat.getW() * quat.getW() ) ); + return result; +} + +inline float length( const Quat & quat ) +{ + return ::sqrtf( norm( quat ) ); +} + +inline const Quat normalize( const Quat & quat ) +{ + float lenSqr, lenInv; + lenSqr = norm( quat ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Quat( + ( quat.getX() * lenInv ), + ( quat.getY() * lenInv ), + ( quat.getZ() * lenInv ), + ( quat.getW() * lenInv ) + ); +} + +inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ) +{ + float cosHalfAngleX2, recipCosHalfAngleX2; + cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) ); + recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 ); + return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) ); +} + +inline const Quat Quat::rotation( float radians, const Vector3 & unitVec ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( ( unitVec * s ), c ); +} + +inline const Quat Quat::rotationX( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( s, 0.0f, 0.0f, c ); +} + +inline const Quat Quat::rotationY( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, s, 0.0f, c ); +} + +inline const Quat Quat::rotationZ( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, 0.0f, s, c ); +} + +inline const Quat Quat::operator *( const Quat & quat ) const +{ + return Quat( + ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ), + ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ), + ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ), + ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) ) + ); +} + +inline Quat & Quat::operator *=( const Quat & quat ) +{ + *this = *this * quat; + return *this; +} + +inline const Vector3 rotate( const Quat & quat, const Vector3 & vec ) +{ + float tmpX, tmpY, tmpZ, tmpW; + tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) ); + tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) ); + tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) ); + tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) ); + return Vector3( + ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ), + ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ), + ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) ) + ); +} + +inline const Quat conj( const Quat & quat ) +{ + return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() ); +} + +inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ) +{ + return Quat( + ( select1 )? quat1.getX() : quat0.getX(), + ( select1 )? quat1.getY() : quat0.getY(), + ( select1 )? quat1.getZ() : quat0.getZ(), + ( select1 )? quat1.getW() : quat0.getW() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Quat & quat ) +{ + printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +inline void print( const Quat & quat, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h new file mode 100644 index 00000000000..46d4d6b3e5c --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h @@ -0,0 +1,1426 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_VEC_AOS_CPP_H +#define _VECTORMATH_VEC_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_SLERP_TOL 0.999f + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Vector3::Vector3( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; +} + +inline Vector3::Vector3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Vector3::Vector3( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); +} + +inline Vector3::Vector3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Vector3 Vector3::xAxis( ) +{ + return Vector3( 1.0f, 0.0f, 0.0f ); +} + +inline const Vector3 Vector3::yAxis( ) +{ + return Vector3( 0.0f, 1.0f, 0.0f ); +} + +inline const Vector3 Vector3::zAxis( ) +{ + return Vector3( 0.0f, 0.0f, 1.0f ); +} + +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZ( Vector3 & vec, const float * fptr ) +{ + vec = Vector3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Vector3 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector3 & Vector3::operator =( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + return *this; +} + +inline Vector3 & Vector3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector3::getX( ) const +{ + return mX; +} + +inline Vector3 & Vector3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector3::getY( ) const +{ + return mY; +} + +inline Vector3 & Vector3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector3::getZ( ) const +{ + return mZ; +} + +inline Vector3 & Vector3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Vector3::operator +( const Vector3 & vec ) const +{ + return Vector3( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ) + ); +} + +inline const Vector3 Vector3::operator -( const Vector3 & vec ) const +{ + return Vector3( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ) + ); +} + +inline const Point3 Vector3::operator +( const Point3 & pnt ) const +{ + return Point3( + ( mX + pnt.getX() ), + ( mY + pnt.getY() ), + ( mZ + pnt.getZ() ) + ); +} + +inline const Vector3 Vector3::operator *( float scalar ) const +{ + return Vector3( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ) + ); +} + +inline Vector3 & Vector3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector3 & Vector3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector3 & Vector3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector3 Vector3::operator /( float scalar ) const +{ + return Vector3( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ) + ); +} + +inline Vector3 & Vector3::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector3 Vector3::operator -( ) const +{ + return Vector3( + -mX, + -mY, + -mZ + ); +} + +inline const Vector3 operator *( float scalar, const Vector3 & vec ) +{ + return vec * scalar; +} + +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ) + ); +} + +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ) + ); +} + +inline const Vector3 recipPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ) + ); +} + +inline const Vector3 sqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ) + ); +} + +inline const Vector3 rsqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ) + ); +} + +inline const Vector3 absPerElem( const Vector3 & vec ) +{ + return Vector3( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ) + ); +} + +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ) + ); +} + +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float maxElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + return result; +} + +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float minElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + return result; +} + +inline float sum( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + return result; +} + +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + return result; +} + +inline float lengthSqr( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + return result; +} + +inline float length( const Vector3 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector3 normalize( const Vector3 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector3( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ) + ); +} + +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ), + ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ), + ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) ) + ); +} + +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ) +{ + return Vector3( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector3 & vec ) +{ + printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() ); +} + +inline void print( const Vector3 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() ); +} + +#endif + +inline Vector4::Vector4( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; +} + +inline Vector4::Vector4( float _x, float _y, float _z, float _w ) +{ + mX = _x; + mY = _y; + mZ = _z; + mW = _w; +} + +inline Vector4::Vector4( const Vector3 & xyz, float _w ) +{ + this->setXYZ( xyz ); + this->setW( _w ); +} + +inline Vector4::Vector4( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + mW = 0.0f; +} + +inline Vector4::Vector4( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); + mW = 1.0f; +} + +inline Vector4::Vector4( const Quat & quat ) +{ + mX = quat.getX(); + mY = quat.getY(); + mZ = quat.getZ(); + mW = quat.getW(); +} + +inline Vector4::Vector4( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; + mW = scalar; +} + +inline const Vector4 Vector4::xAxis( ) +{ + return Vector4( 1.0f, 0.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::yAxis( ) +{ + return Vector4( 0.0f, 1.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::zAxis( ) +{ + return Vector4( 0.0f, 0.0f, 1.0f, 0.0f ); +} + +inline const Vector4 Vector4::wAxis( ) +{ + return Vector4( 0.0f, 0.0f, 0.0f, 1.0f ); +} + +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZW( Vector4 & vec, const float * fptr ) +{ + vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] ); +} + +inline void storeXYZW( const Vector4 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); + fptr[3] = vec.getW(); +} + +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector4 & Vector4::operator =( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; + return *this; +} + +inline Vector4 & Vector4::setXYZ( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + return *this; +} + +inline const Vector3 Vector4::getXYZ( ) const +{ + return Vector3( mX, mY, mZ ); +} + +inline Vector4 & Vector4::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector4::getX( ) const +{ + return mX; +} + +inline Vector4 & Vector4::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector4::getY( ) const +{ + return mY; +} + +inline Vector4 & Vector4::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector4::getZ( ) const +{ + return mZ; +} + +inline Vector4 & Vector4::setW( float _w ) +{ + mW = _w; + return *this; +} + +inline float Vector4::getW( ) const +{ + return mW; +} + +inline Vector4 & Vector4::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector4::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector4::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector4::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector4 Vector4::operator +( const Vector4 & vec ) const +{ + return Vector4( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ), + ( mW + vec.mW ) + ); +} + +inline const Vector4 Vector4::operator -( const Vector4 & vec ) const +{ + return Vector4( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ), + ( mW - vec.mW ) + ); +} + +inline const Vector4 Vector4::operator *( float scalar ) const +{ + return Vector4( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ), + ( mW * scalar ) + ); +} + +inline Vector4 & Vector4::operator +=( const Vector4 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector4 & Vector4::operator -=( const Vector4 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector4 & Vector4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector4 Vector4::operator /( float scalar ) const +{ + return Vector4( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ), + ( mW / scalar ) + ); +} + +inline Vector4 & Vector4::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector4 Vector4::operator -( ) const +{ + return Vector4( + -mX, + -mY, + -mZ, + -mW + ); +} + +inline const Vector4 operator *( float scalar, const Vector4 & vec ) +{ + return vec * scalar; +} + +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ), + ( vec0.getW() * vec1.getW() ) + ); +} + +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ), + ( vec0.getW() / vec1.getW() ) + ); +} + +inline const Vector4 recipPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ), + ( 1.0f / vec.getW() ) + ); +} + +inline const Vector4 sqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ), + sqrtf( vec.getW() ) + ); +} + +inline const Vector4 rsqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ), + ( 1.0f / sqrtf( vec.getW() ) ) + ); +} + +inline const Vector4 absPerElem( const Vector4 & vec ) +{ + return Vector4( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ), + fabsf( vec.getW() ) + ); +} + +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ), + ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() ) + ); +} + +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float maxElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + result = (vec.getW() > result)? vec.getW() : result; + return result; +} + +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float minElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + result = (vec.getW() < result)? vec.getW() : result; + return result; +} + +inline float sum( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + result = ( result + vec.getW() ); + return result; +} + +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + result = ( result + ( vec0.getW() * vec1.getW() ) ); + return result; +} + +inline float lengthSqr( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + result = ( result + ( vec.getW() * vec.getW() ) ); + return result; +} + +inline float length( const Vector4 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector4 normalize( const Vector4 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector4( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ), + ( vec.getW() * lenInv ) + ); +} + +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ) +{ + return Vector4( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ(), + ( select1 )? vec1.getW() : vec0.getW() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector4 & vec ) +{ + printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +inline void print( const Vector4 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +#endif + +inline Point3::Point3( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; +} + +inline Point3::Point3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Point3::Point3( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); +} + +inline Point3::Point3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ) +{ + return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); +} + +inline void loadXYZ( Point3 & pnt, const float * fptr ) +{ + pnt = Point3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Point3 & pnt, float * fptr ) +{ + fptr[0] = pnt.getX(); + fptr[1] = pnt.getY(); + fptr[2] = pnt.getZ(); +} + +inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Point3 & Point3::operator =( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; + return *this; +} + +inline Point3 & Point3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Point3::getX( ) const +{ + return mX; +} + +inline Point3 & Point3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Point3::getY( ) const +{ + return mY; +} + +inline Point3 & Point3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Point3::getZ( ) const +{ + return mZ; +} + +inline Point3 & Point3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Point3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Point3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Point3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Point3::operator -( const Point3 & pnt ) const +{ + return Vector3( + ( mX - pnt.mX ), + ( mY - pnt.mY ), + ( mZ - pnt.mZ ) + ); +} + +inline const Point3 Point3::operator +( const Vector3 & vec ) const +{ + return Point3( + ( mX + vec.getX() ), + ( mY + vec.getY() ), + ( mZ + vec.getZ() ) + ); +} + +inline const Point3 Point3::operator -( const Vector3 & vec ) const +{ + return Point3( + ( mX - vec.getX() ), + ( mY - vec.getY() ), + ( mZ - vec.getZ() ) + ); +} + +inline Point3 & Point3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Point3 & Point3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() * pnt1.getX() ), + ( pnt0.getY() * pnt1.getY() ), + ( pnt0.getZ() * pnt1.getZ() ) + ); +} + +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() / pnt1.getX() ), + ( pnt0.getY() / pnt1.getY() ), + ( pnt0.getZ() / pnt1.getZ() ) + ); +} + +inline const Point3 recipPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / pnt.getX() ), + ( 1.0f / pnt.getY() ), + ( 1.0f / pnt.getZ() ) + ); +} + +inline const Point3 sqrtPerElem( const Point3 & pnt ) +{ + return Point3( + sqrtf( pnt.getX() ), + sqrtf( pnt.getY() ), + sqrtf( pnt.getZ() ) + ); +} + +inline const Point3 rsqrtPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / sqrtf( pnt.getX() ) ), + ( 1.0f / sqrtf( pnt.getY() ) ), + ( 1.0f / sqrtf( pnt.getZ() ) ) + ); +} + +inline const Point3 absPerElem( const Point3 & pnt ) +{ + return Point3( + fabsf( pnt.getX() ), + fabsf( pnt.getY() ), + fabsf( pnt.getZ() ) + ); +} + +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ), + ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ), + ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() ) + ); +} + +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float maxElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() > result)? pnt.getZ() : result; + return result; +} + +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float minElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() < result)? pnt.getZ() : result; + return result; +} + +inline float sum( const Point3 & pnt ) +{ + float result; + result = ( pnt.getX() + pnt.getY() ); + result = ( result + pnt.getZ() ); + return result; +} + +inline const Point3 scale( const Point3 & pnt, float scaleVal ) +{ + return mulPerElem( pnt, Point3( scaleVal ) ); +} + +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ) +{ + return mulPerElem( pnt, Point3( scaleVec ) ); +} + +inline float projection( const Point3 & pnt, const Vector3 & unitVec ) +{ + float result; + result = ( pnt.getX() * unitVec.getX() ); + result = ( result + ( pnt.getY() * unitVec.getY() ) ); + result = ( result + ( pnt.getZ() * unitVec.getZ() ) ); + return result; +} + +inline float distSqrFromOrigin( const Point3 & pnt ) +{ + return lengthSqr( Vector3( pnt ) ); +} + +inline float distFromOrigin( const Point3 & pnt ) +{ + return length( Vector3( pnt ) ); +} + +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return lengthSqr( ( pnt1 - pnt0 ) ); +} + +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return length( ( pnt1 - pnt0 ) ); +} + +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ) +{ + return Point3( + ( select1 )? pnt1.getX() : pnt0.getX(), + ( select1 )? pnt1.getY() : pnt0.getY(), + ( select1 )? pnt1.getZ() : pnt0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Point3 & pnt ) +{ + printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +inline void print( const Point3 & pnt, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h new file mode 100644 index 00000000000..d00456dfeb4 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h @@ -0,0 +1,1872 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_AOS_CPP_H +#define _VECTORMATH_AOS_CPP_H + +#include + +#ifdef _VECTORMATH_DEBUG +#include +#endif + +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Vector3( ) { }; + + // Copy a 3-D vector + // + inline Vector3( const Vector3 & vec ); + + // Construct a 3-D vector from x, y, and z elements + // + inline Vector3( float x, float y, float z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit inline Vector3( const Point3 & pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit inline Vector3( float scalar ); + + // Assign one 3-D vector to another + // + inline Vector3 & operator =( const Vector3 & vec ); + + // Set the x element of a 3-D vector + // + inline Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + inline Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + inline Vector3 & setZ( float z ); + + // Get the x element of a 3-D vector + // + inline float getX( ) const; + + // Get the y element of a 3-D vector + // + inline float getY( ) const; + + // Get the z element of a 3-D vector + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + inline Vector3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 3-D vectors + // + inline const Vector3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + inline const Vector3 operator -( const Vector3 & vec ) const; + + // Add a 3-D vector to a 3-D point + // + inline const Point3 operator +( const Point3 & pnt ) const; + + // Multiply a 3-D vector by a scalar + // + inline const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + inline const Vector3 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Vector3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Vector3 & operator -=( const Vector3 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector3 & operator /=( float scalar ); + + // Negate all elements of a 3-D vector + // + inline const Vector3 operator -( ) const; + + // Construct x axis + // + static inline const Vector3 xAxis( ); + + // Construct y axis + // + static inline const Vector3 yAxis( ); + + // Construct z axis + // + static inline const Vector3 zAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 3-D vector by a scalar +// +inline const Vector3 operator *( float scalar, const Vector3 & vec ); + +// Multiply two 3-D vectors per element +// +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector3 recipPerElem( const Vector3 & vec ); + +// Compute the square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector3 sqrtPerElem( const Vector3 & vec ); + +// Compute the reciprocal square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector3 rsqrtPerElem( const Vector3 & vec ); + +// Compute the absolute value of a 3-D vector per element +// +inline const Vector3 absPerElem( const Vector3 & vec ); + +// Copy sign from one 3-D vector to another, per element +// +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum of two 3-D vectors per element +// +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Minimum of two 3-D vectors per element +// +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum element of a 3-D vector +// +inline float maxElem( const Vector3 & vec ); + +// Minimum element of a 3-D vector +// +inline float minElem( const Vector3 & vec ); + +// Compute the sum of all elements of a 3-D vector +// +inline float sum( const Vector3 & vec ); + +// Compute the dot product of two 3-D vectors +// +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the square of the length of a 3-D vector +// +inline float lengthSqr( const Vector3 & vec ); + +// Compute the length of a 3-D vector +// +inline float length( const Vector3 & vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector3 normalize( const Vector3 & vec ); + +// Compute cross product of two 3-D vectors +// +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ); + +// Outer product of two 3-D vectors +// +inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +inline const Matrix3 crossMatrix( const Vector3 & vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ); + +// Conditionally select between two 3-D vectors +// +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Vector3 & vec, const float * fptr ); + +// Store x, y, and z elements of a 3-D vector in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Vector3 & vec, float * fptr ); + +// Load three-half-floats as a 3-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ); + +// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + float mX; + float mY; + float mZ; + float mW; + +public: + // Default constructor; does no initialization + // + inline Vector4( ) { }; + + // Copy a 4-D vector + // + inline Vector4( const Vector4 & vec ); + + // Construct a 4-D vector from x, y, z, and w elements + // + inline Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + inline Vector4( const Vector3 & xyz, float w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit inline Vector4( const Vector3 & vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit inline Vector4( const Point3 & pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit inline Vector4( const Quat & quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit inline Vector4( float scalar ); + + // Assign one 4-D vector to another + // + inline Vector4 & operator =( const Vector4 & vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + inline Vector4 & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a 4-D vector + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + inline Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + inline Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + inline Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + inline Vector4 & setW( float w ); + + // Get the x element of a 4-D vector + // + inline float getX( ) const; + + // Get the y element of a 4-D vector + // + inline float getY( ) const; + + // Get the z element of a 4-D vector + // + inline float getZ( ) const; + + // Get the w element of a 4-D vector + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + inline Vector4 & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 4-D vectors + // + inline const Vector4 operator +( const Vector4 & vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + inline const Vector4 operator -( const Vector4 & vec ) const; + + // Multiply a 4-D vector by a scalar + // + inline const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + inline const Vector4 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + inline Vector4 & operator +=( const Vector4 & vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + inline Vector4 & operator -=( const Vector4 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector4 & operator /=( float scalar ); + + // Negate all elements of a 4-D vector + // + inline const Vector4 operator -( ) const; + + // Construct x axis + // + static inline const Vector4 xAxis( ); + + // Construct y axis + // + static inline const Vector4 yAxis( ); + + // Construct z axis + // + static inline const Vector4 zAxis( ); + + // Construct w axis + // + static inline const Vector4 wAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 4-D vector by a scalar +// +inline const Vector4 operator *( float scalar, const Vector4 & vec ); + +// Multiply two 4-D vectors per element +// +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector4 recipPerElem( const Vector4 & vec ); + +// Compute the square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector4 sqrtPerElem( const Vector4 & vec ); + +// Compute the reciprocal square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector4 rsqrtPerElem( const Vector4 & vec ); + +// Compute the absolute value of a 4-D vector per element +// +inline const Vector4 absPerElem( const Vector4 & vec ); + +// Copy sign from one 4-D vector to another, per element +// +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum of two 4-D vectors per element +// +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Minimum of two 4-D vectors per element +// +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum element of a 4-D vector +// +inline float maxElem( const Vector4 & vec ); + +// Minimum element of a 4-D vector +// +inline float minElem( const Vector4 & vec ); + +// Compute the sum of all elements of a 4-D vector +// +inline float sum( const Vector4 & vec ); + +// Compute the dot product of two 4-D vectors +// +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the square of the length of a 4-D vector +// +inline float lengthSqr( const Vector4 & vec ); + +// Compute the length of a 4-D vector +// +inline float length( const Vector4 & vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector4 normalize( const Vector4 & vec ); + +// Outer product of two 4-D vectors +// +inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ); + +// Conditionally select between two 4-D vectors +// +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Vector4 & vec, const float * fptr ); + +// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Vector4 & vec, float * fptr ); + +// Load four-half-floats as a 4-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ); + +// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Point3( ) { }; + + // Copy a 3-D point + // + inline Point3( const Point3 & pnt ); + + // Construct a 3-D point from x, y, and z elements + // + inline Point3( float x, float y, float z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit inline Point3( const Vector3 & vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit inline Point3( float scalar ); + + // Assign one 3-D point to another + // + inline Point3 & operator =( const Point3 & pnt ); + + // Set the x element of a 3-D point + // + inline Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + inline Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + inline Point3 & setZ( float z ); + + // Get the x element of a 3-D point + // + inline float getX( ) const; + + // Get the y element of a 3-D point + // + inline float getY( ) const; + + // Get the z element of a 3-D point + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + inline Point3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D point by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + inline const Vector3 operator -( const Point3 & pnt ) const; + + // Add a 3-D point to a 3-D vector + // + inline const Point3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + inline const Point3 operator -( const Vector3 & vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Point3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Point3 & operator -=( const Vector3 & vec ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply two 3-D points per element +// +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Point3 recipPerElem( const Point3 & pnt ); + +// Compute the square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Point3 sqrtPerElem( const Point3 & pnt ); + +// Compute the reciprocal square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Point3 rsqrtPerElem( const Point3 & pnt ); + +// Compute the absolute value of a 3-D point per element +// +inline const Point3 absPerElem( const Point3 & pnt ); + +// Copy sign from one 3-D point to another, per element +// +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum of two 3-D points per element +// +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Minimum of two 3-D points per element +// +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum element of a 3-D point +// +inline float maxElem( const Point3 & pnt ); + +// Minimum element of a 3-D point +// +inline float minElem( const Point3 & pnt ); + +// Compute the sum of all elements of a 3-D point +// +inline float sum( const Point3 & pnt ); + +// Apply uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, float scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +inline float projection( const Point3 & pnt, const Vector3 & unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +inline float distSqrFromOrigin( const Point3 & pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +inline float distFromOrigin( const Point3 & pnt ); + +// Compute the square of the distance between two 3-D points +// +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the distance between two 3-D points +// +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ); + +// Conditionally select between two 3-D points +// +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Point3 & pnt, const float * fptr ); + +// Store x, y, and z elements of a 3-D point in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Point3 & pnt, float * fptr ); + +// Load three-half-floats as a 3-D point +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr ); + +// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ + float mX; + float mY; + float mZ; + float mW; + +public: + // Default constructor; does no initialization + // + inline Quat( ) { }; + + // Copy a quaternion + // + inline Quat( const Quat & quat ); + + // Construct a quaternion from x, y, z, and w elements + // + inline Quat( float x, float y, float z, float w ); + + // Construct a quaternion from a 3-D vector and a scalar + // + inline Quat( const Vector3 & xyz, float w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit inline Quat( const Vector4 & vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit inline Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit inline Quat( float scalar ); + + // Assign one quaternion to another + // + inline Quat & operator =( const Quat & quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + inline Quat & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a quaternion + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + inline Quat & setX( float x ); + + // Set the y element of a quaternion + // + inline Quat & setY( float y ); + + // Set the z element of a quaternion + // + inline Quat & setZ( float z ); + + // Set the w element of a quaternion + // + inline Quat & setW( float w ); + + // Get the x element of a quaternion + // + inline float getX( ) const; + + // Get the y element of a quaternion + // + inline float getY( ) const; + + // Get the z element of a quaternion + // + inline float getZ( ) const; + + // Get the w element of a quaternion + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + inline Quat & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a quaternion by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two quaternions + // + inline const Quat operator +( const Quat & quat ) const; + + // Subtract a quaternion from another quaternion + // + inline const Quat operator -( const Quat & quat ) const; + + // Multiply two quaternions + // + inline const Quat operator *( const Quat & quat ) const; + + // Multiply a quaternion by a scalar + // + inline const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + inline const Quat operator /( float scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + inline Quat & operator +=( const Quat & quat ); + + // Perform compound assignment and subtraction by a quaternion + // + inline Quat & operator -=( const Quat & quat ); + + // Perform compound assignment and multiplication by a quaternion + // + inline Quat & operator *=( const Quat & quat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Quat & operator /=( float scalar ); + + // Negate all elements of a quaternion + // + inline const Quat operator -( ) const; + + // Construct an identity quaternion + // + static inline const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static inline const Quat rotation( float radians, const Vector3 & unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static inline const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static inline const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static inline const Quat rotationZ( float radians ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a quaternion by a scalar +// +inline const Quat operator *( float scalar, const Quat & quat ); + +// Compute the conjugate of a quaternion +// +inline const Quat conj( const Quat & quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec ); + +// Compute the dot product of two quaternions +// +inline float dot( const Quat & quat0, const Quat & quat1 ); + +// Compute the norm of a quaternion +// +inline float norm( const Quat & quat ); + +// Compute the length of a quaternion +// +inline float length( const Quat & quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +inline const Quat normalize( const Quat & quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ); + +// Spherical quadrangle interpolation +// +inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ); + +// Conditionally select between two quaternions +// +inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Quat & quat, const float * fptr ); + +// Store x, y, z, and w elements of a quaternion in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Quat & quat, float * fptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + inline Matrix3( ) { }; + + // Copy a 3x3 matrix + // + inline Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit inline Matrix3( const Quat & unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit inline Matrix3( float scalar ); + + // Assign one 3x3 matrix to another + // + inline Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + inline Matrix3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x3 matrix + // + inline Matrix3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x3 matrix + // + inline Matrix3 & setCol2( const Vector3 & col2 ); + + // Get column 0 of a 3x3 matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + inline const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setRow( int row, const Vector3 & vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + inline Matrix3 & setElem( int col, int row, float val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + inline const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + inline const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + inline const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + inline const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply two 3x3 matrices + // + inline const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + inline Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + inline Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + inline Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static inline const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static inline const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static inline const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static inline const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix3 rotation( const Quat & unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static inline const Matrix3 scale( const Vector3 & scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +inline const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +inline const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +inline float determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + inline Matrix4( ) { }; + + // Copy a 4x4 matrix + // + inline Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit inline Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit inline Matrix4( float scalar ); + + // Assign one 4x4 matrix to another + // + inline Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 4x4 matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + inline Matrix4 & setCol0( const Vector4 & col0 ); + + // Set column 1 of a 4x4 matrix + // + inline Matrix4 & setCol1( const Vector4 & col1 ); + + // Set column 2 of a 4x4 matrix + // + inline Matrix4 & setCol2( const Vector4 & col2 ); + + // Set column 3 of a 4x4 matrix + // + inline Matrix4 & setCol3( const Vector4 & col3 ); + + // Get column 0 of a 4x4 matrix + // + inline const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + inline const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + inline const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + inline const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setCol( int col, const Vector4 & vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + inline Matrix4 & setElem( int col, int row, float val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + inline const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + inline const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + inline const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + inline const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + inline const Vector4 operator *( const Vector4 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + inline const Vector4 operator *( const Vector3 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + inline const Vector4 operator *( const Point3 & pnt ) const; + + // Multiply two 4x4 matrices + // + inline const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + inline const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + inline Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + inline Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + inline Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static inline const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static inline const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static inline const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static inline const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix4 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix4 rotation( const Quat & unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static inline const Matrix4 scale( const Vector3 & scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static inline const Matrix4 translation( const Vector3 & translateVec ); + + // Construct viewing matrix based on eye position, position looked at, and up direction + // + static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ); + + // Construct a perspective projection matrix + // + static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +inline const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +inline const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +inline const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +inline float determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + inline Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + inline Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + inline Transform3( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit inline Transform3( float scalar ); + + // Assign one 3x4 transformation matrix to another + // + inline Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + inline Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + inline Transform3 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + inline Transform3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + inline Transform3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + inline Transform3 & setCol2( const Vector3 & col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + inline Transform3 & setCol3( const Vector3 & col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + inline const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + inline const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + inline Transform3 & setElem( int col, int row, float val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + inline const Point3 operator *( const Point3 & pnt ) const; + + // Multiply two 3x4 transformation matrices + // + inline const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static inline const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static inline const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static inline const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static inline const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static inline const Transform3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Transform3 rotation( const Quat & unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static inline const Transform3 scale( const Vector3 & scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static inline const Transform3 translation( const Vector3 & translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +inline const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +inline const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +inline const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif diff --git a/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h b/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h new file mode 100644 index 00000000000..5a4944a5500 --- /dev/null +++ b/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h @@ -0,0 +1,80 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef AOS_VECTORMATH_BULLET_CONVERT_H +#define AOS_VECTORMATH_BULLET_CONVERT_H + + +///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH +#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH) +#include +#else +#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h" +#endif + +#include "LinearMath/btVector3.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btMatrix3x3.h" + +inline Vectormath::Aos::Vector3 getVmVector3(const btVector3& bulletVec) +{ + return Vectormath::Aos::Vector3(bulletVec.getX(),bulletVec.getY(),bulletVec.getZ()); +} + +inline btVector3 getBtVector3(const Vectormath::Aos::Vector3& vmVec) +{ + return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ()); +} +inline btVector3 getBtVector3(const Vectormath::Aos::Point3& vmVec) +{ + return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ()); +} + +inline Vectormath::Aos::Quat getVmQuat(const btQuaternion& bulletQuat) +{ + Vectormath::Aos::Quat vmQuat(bulletQuat.getX(),bulletQuat.getY(),bulletQuat.getZ(),bulletQuat.getW()); + return vmQuat; +} + +inline btQuaternion getBtQuat(const Vectormath::Aos::Quat& vmQuat) +{ + return btQuaternion (vmQuat.getX(),vmQuat.getY(),vmQuat.getZ(),vmQuat.getW()); +} + +inline Vectormath::Aos::Matrix3 getVmMatrix3(const btMatrix3x3& btMat) +{ + Vectormath::Aos::Matrix3 mat( + getVmVector3(btMat.getColumn(0)), + getVmVector3(btMat.getColumn(1)), + getVmVector3(btMat.getColumn(2))); + return mat; +} + + +#endif //AOS_VECTORMATH_BULLET_CONVERT_H -- cgit v1.2.3