68 files changed, 0 insertions, 17495 deletions
diff --git a/extern/bullet2/BulletMultiThreaded/CMakeLists.txt b/extern/bullet2/BulletMultiThreaded/CMakeLists.txt
deleted file mode 100644
index 90f970afbfd..00000000000
--- a/extern/bullet2/BulletMultiThreaded/CMakeLists.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-INCLUDE_DIRECTORIES(
-	${BULLET_PHYSICS_SOURCE_DIR}/src
-	${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/vectormath/scalar/cpp
-)
-
-ADD_LIBRARY(BulletMultiThreaded
-		PlatformDefinitions.h
-		SpuFakeDma.cpp
-		SpuFakeDma.h
-		SpuDoubleBuffer.h
-		SpuLibspe2Support.cpp
-		SpuLibspe2Support.h
-		btThreadSupportInterface.cpp
-		btThreadSupportInterface.h
-		
-		Win32ThreadSupport.cpp
-		Win32ThreadSupport.h
-		PosixThreadSupport.cpp
-		PosixThreadSupport.h
-		SequentialThreadSupport.cpp
-		SequentialThreadSupport.h
-		SpuSampleTaskProcess.h
-		SpuSampleTaskProcess.cpp
-
-		SpuCollisionObjectWrapper.cpp 
-		SpuCollisionObjectWrapper.h 
-		SpuCollisionTaskProcess.h
-		SpuCollisionTaskProcess.cpp
-		SpuGatheringCollisionDispatcher.h
-		SpuGatheringCollisionDispatcher.cpp
-		SpuContactManifoldCollisionAlgorithm.cpp
-		SpuContactManifoldCollisionAlgorithm.h
-		
-		btParallelConstraintSolver.cpp
-		btParallelConstraintSolver.h
-		
-		SpuNarrowPhaseCollisionTask/Box.h
-		SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
-		SpuNarrowPhaseCollisionTask/boxBoxDistance.h
-		SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
-		SpuNarrowPhaseCollisionTask/SpuContactResult.h
-		SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
-		SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
-		SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
-		SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
-		SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
-		SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
-		SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
-		SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
-
-
-#Some GPU related stuff, mainly CUDA and perhaps OpenCL
-		btGpu3DGridBroadphase.cpp
-		btGpu3DGridBroadphase.h
-		btGpu3DGridBroadphaseSharedCode.h
-		btGpu3DGridBroadphaseSharedDefs.h
-		btGpu3DGridBroadphaseSharedTypes.h
-		btGpuDefines.h
-		btGpuUtilsSharedCode.h
-		btGpuUtilsSharedDefs.h
-
-#MiniCL provides a small subset of OpenCL
-		MiniCL.cpp
-		MiniCLTaskScheduler.cpp
-		MiniCLTaskScheduler.h
-		MiniCLTask/MiniCLTask.cpp
-		MiniCLTask/MiniCLTask.h
-		../MiniCL/cl.h
-		../MiniCL/cl_gl.h
-		../MiniCL/cl_platform.h
-		../MiniCL/cl_MiniCL_Defs.h
-)
-
-IF (BUILD_SHARED_LIBS)
-	TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletCollision)
-ENDIF (BUILD_SHARED_LIBS)
-
-IF (INSTALL_LIBS)
-	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
-		#INSTALL of other files requires CMake 2.6
-		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-			IF(INSTALL_EXTRA_LIBS)
-				IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-					INSTALL(TARGETS BulletMultiThreaded DESTINATION .)
-				ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-				INSTALL(TARGETS BulletMultiThreaded DESTINATION lib)
-					INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
-				ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-			ENDIF (INSTALL_EXTRA_LIBS)
-		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
-ENDIF (INSTALL_LIBS)
-\ No newline at end of file
diff --git a/extern/bullet2/BulletMultiThreaded/Makefile.original b/extern/bullet2/BulletMultiThreaded/Makefile.original
deleted file mode 100644
index 1edc9811f8d..00000000000
--- a/extern/bullet2/BulletMultiThreaded/Makefile.original
+++ /dev/null
@@ -1,187 +0,0 @@
-__ARCH_BITS__ := 32
-
-# define macros
-NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask
-SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-	CELL_SYSROOT := /opt/cell/sysroot
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-	CELL_SYSROOT := $(CELL_TOP)/sysroot
-endif
-
-
-USE_CCACHE=ccache
-RM=rm -f 
-OUTDIR=./out
-DEBUGFLAG=-DNDEBUG
-LIBOUTDIR=../../lib/ibmsdk
-COLLISIONDIR=../../src/BulletCollision
-MATHDIR=../../src/LinearMath
-ARCHITECTUREFLAG=-m$(__ARCH_BITS__)
-ifeq "$(__ARCH_BITS__)" "64"
-  SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64
-else
-  SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__
-endif
-
-SPU_DEFFLAGS+=-DUSE_PE_BOX_BOX
-
-SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc
-SPU_INCLUDEDIR= -Ivectormath/scalar/cpp -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR)
-#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h
-SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h
-
-SPU_LFLAGS= -Wl,-N
-SPU_LIBRARIES=-lstdc++
-SPU_EMBED=/usr/bin/ppu-embedspu
-SPU_AR=/usr/bin/ar
-SYMBOLNAME=spu_program
-
-ifeq "$(__ARCH_BITS__)" "64"
-  PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64
-  PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc
-else
-  PPU_DEFFLAGS= -DUSE_LIBSPE2
-  PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc
-endif
-
-PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h
-PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR)
-PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc
-PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib
-PPU_AR=/usr/bin/ar
-
-MakeOut :
-#	rm -f -R $(OUTDIR) ; mkdir $(OUTDIR)
-	@echo "usage: make spu, make ppu, make all, or make clean"
-# SPU
-SpuTaskFile : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp
-
-boxBoxDistance : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuFakeDma : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuContactManifoldCollisionAlgorithm_spu : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp
-
-SpuCollisionShapes : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuContactResult : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-#SpuGatheringCollisionTask : MakeOut
-#	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuGjkPairDetector: MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuMinkowskiPenetrationDepthSolver : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuVoronoiSimplexSolver : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-#SpuLibspe2Support_spu : MakeOut
-#	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp
-
-## SPU-Bullet
-btPersistentManifold : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp
-
-btOptimizedBvh : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btCollisionObject : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp
-
-btTriangleCallback : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btTriangleIndexVertexArray : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btStridingMeshInterface : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btAlignedAllocator : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp
-
-
-# PPU
-SpuGatheringCollisionDispatcher : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SequentialThreadSupport: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuLibspe2Support: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-btThreadSupportInterface: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuCollisionTaskProcess : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuContactManifoldCollisionAlgorithm : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-	
-SpuSampleTaskProcess : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-	
-
-
-spu : boxBoxDistance SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \
-      SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \
-      btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \
-      btStridingMeshInterface btAlignedAllocator
-	$(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \
-                            $(OUTDIR)/SpuTaskFile.o \
-                            $(OUTDIR)/SpuFakeDma.o \
-			    $(OUTDIR)/boxBoxDistance.o \
-                            $(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \
-                            $(OUTDIR)/SpuContactResult.o \
-                            $(OUTDIR)/SpuCollisionShapes.o \
-                            $(OUTDIR)/SpuGjkPairDetector.o \
-                            $(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \
-                            $(OUTDIR)/SpuVoronoiSimplexSolver.o \
-                            $(OUTDIR)/btPersistentManifold.o \
-                            $(OUTDIR)/btTriangleCallback.o \
-                            $(OUTDIR)/btTriangleIndexVertexArray.o \
-                            $(OUTDIR)/btStridingMeshInterface.o \
-                            $(OUTDIR)/btAlignedAllocator.o \
-                            $(SPU_LFLAGS) $(SPU_LIBRARIES)
-
-spu-embed : spu
-	$(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o
-	$(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o
-
-
-
-ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \
-      SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess
-	$(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \
-                                                          $(OUTDIR)/SpuCollisionTaskProcess.o \
-                                                          $(OUTDIR)/SpuSampleTaskProcess.o \
-                                                          $(OUTDIR)/SpuGatheringCollisionDispatcher.o \
-                                                          $(OUTDIR)/SpuLibspe2Support.o \
-                                                          $(OUTDIR)/btThreadSupportInterface.o \
-							  $(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o
-
-all : spu-embed ppu 
-
-clean:
-	$(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a
-
-
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCL.cpp b/extern/bullet2/BulletMultiThreaded/MiniCL.cpp
deleted file mode 100644
index b7f5a699312..00000000000
--- a/extern/bullet2/BulletMultiThreaded/MiniCL.cpp
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
-   Copyright (C) 2010 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#include "MiniCL/cl.h"
-#define __PHYSICS_COMMON_H__ 1
-#ifdef _WIN32
-#include "BulletMultiThreaded/Win32ThreadSupport.h"
-#endif
-
-#include "BulletMultiThreaded/SequentialThreadSupport.h"
-#include "MiniCLTaskScheduler.h"
-#include "MiniCLTask/MiniCLTask.h"
-#include "LinearMath/btMinMax.h"
-
-//#define DEBUG_MINICL_KERNELS 1
-
-
-
-
-CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
-	cl_device_id            device ,
-	cl_device_info          param_name ,
-	size_t                  param_value_size ,
-	void *                  param_value ,
-	size_t *                /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
-{
-
-	switch (param_name)
-	{
-	case CL_DEVICE_NAME:
-		{
-			char deviceName[] = "CPU";
-			unsigned int nameLen = strlen(deviceName)+1;
-			assert(param_value_size>strlen(deviceName));
-			if (nameLen < param_value_size)
-			{
-				const char* cpuName = "CPU";
-				sprintf((char*)param_value,"%s",cpuName);
-			} else
-			{
-				printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
-			}
-			break;
-		}
-	case CL_DEVICE_TYPE:
-		{
-			if (param_value_size>=sizeof(cl_device_type))
-			{
-				cl_device_type* deviceType = (cl_device_type*)param_value;
-				*deviceType = CL_DEVICE_TYPE_CPU;
-			} else
-			{
-				printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
-			}
-			break;
-		}
-	case CL_DEVICE_MAX_COMPUTE_UNITS:
-		{
-			if (param_value_size>=sizeof(cl_uint))
-			{
-				cl_uint* numUnits = (cl_uint*)param_value;
-				*numUnits= 4;
-			} else
-			{
-				printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
-			}
-
-			break;
-		}
-	case CL_DEVICE_MAX_WORK_ITEM_SIZES:
-		{
-			size_t workitem_size[3];
-
-			if (param_value_size>=sizeof(workitem_size))
-			{
-				size_t* workItemSize = (size_t*)param_value;
-				workItemSize[0] = 64;
-				workItemSize[1] = 24;
-				workItemSize[2] = 16;
-			} else
-			{
-				printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
-			}
-			break;
-		}
-	case CL_DEVICE_MAX_CLOCK_FREQUENCY:
-		{
-			 cl_uint* clock_frequency = (cl_uint*)param_value;
-			 *clock_frequency = 3*1024;
-			break;
-		}
-	default:
-		{
-			printf("error: unsupported param_name:%d\n",param_name);
-		}
-	}
-
-
-	return 0;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
-{
-	return 0;
-}
-
-
-
-CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
-{
-	return 0;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
-{
-	return 0;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0
-{
-	return 0;
-}
-
-
-// Enqueued Commands APIs
-CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue     command_queue ,
-                    cl_mem               buffer ,
-                    cl_bool             /* blocking_read */,
-                    size_t               offset ,
-                    size_t               cb , 
-                    void *               ptr ,
-                    cl_uint             /* num_events_in_wait_list */,
-                    const cl_event *    /* event_wait_list */,
-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
-{
-	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
-
-	///wait for all work items to be completed
-	scheduler->flush();
-
-	memcpy(ptr,(char*)buffer + offset,cb);
-	return 0;
-}
-
-
-CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program            /* program */,
-                      cl_device_id          /* device */,
-                      cl_program_build_info /* param_name */,
-                      size_t                /* param_value_size */,
-                      void *                /* param_value */,
-                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
-{
-
-	return 0;
-}
-
-
-// Program Object APIs
-CL_API_ENTRY cl_program
-clCreateProgramWithSource(cl_context         context ,
-                          cl_uint           /* count */,
-                          const char **     /* strings */,
-                          const size_t *    /* lengths */,
-                          cl_int *          errcode_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-	*errcode_ret = CL_SUCCESS;
-	return (cl_program)context;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue     command_queue ,
-                    cl_mem               buffer ,
-                    cl_bool             /* blocking_read */,
-                    size_t              offset,
-                    size_t               cb , 
-                    const void *         ptr ,
-                    cl_uint             /* num_events_in_wait_list */,
-                    const cl_event *    /* event_wait_list */,
-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
-{
-	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
-
-	///wait for all work items to be completed
-	scheduler->flush();
-
-	memcpy((char*)buffer + offset, ptr,cb);
-	return 0;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue  command_queue)
-{
-	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
-	///wait for all work items to be completed
-	scheduler->flush();
-	return 0;
-}
-
-
-CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
-                       cl_kernel         clKernel ,
-                       cl_uint           work_dim ,
-                       const size_t *   /* global_work_offset */,
-                       const size_t *    global_work_size ,
-                       const size_t *   /* local_work_size */,
-                       cl_uint          /* num_events_in_wait_list */,
-                       const cl_event * /* event_wait_list */,
-                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0
-{
-
-	
-	MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
-	for (unsigned int ii=0;ii<work_dim;ii++)
-	{
-		int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
-		int numWorkItems = global_work_size[ii];
-
-//		//at minimum 64 work items per task
-//		int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
-		int numWorkItemsPerTask = numWorkItems / maxTask;
-		if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
-
-		for (int t=0;t<numWorkItems;)
-		{
-			//Performance Hint: tweak this number during benchmarking
-			int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
-			kernel->m_scheduler->issueTask(t, endIndex, kernel);
-			t = endIndex;
-		}
-	}
-/*
-
-	void* bla = 0;
-
-	scheduler->issueTask(bla,2,3);
-	scheduler->flush();
-
-	*/
-
-	return 0;
-}
-
-#define LOCAL_BUF_SIZE 32768
-static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
-static int* spLocalBufCurr = NULL;
-static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
-static void* localBufMalloc(int size)
-{
-	int size16 = (size + 15) >> 4; // in 16-byte units
-	if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
-	{ // reset
-		spLocalBufCurr = sLocalMemBuf;
-		while((long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
-		sLocalBufUsed = 0;
-	}
-	void* ret = spLocalBufCurr;
-	spLocalBufCurr += size16 * 4;
-	sLocalBufUsed += size;
-	return ret;
-}
-
-
-
-CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel    clKernel ,
-               cl_uint      arg_index ,
-               size_t       arg_size ,
-               const void *  arg_value ) CL_API_SUFFIX__VERSION_1_0
-{
-	MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
-	btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
-	if (arg_index>MINI_CL_MAX_ARG)
-	{
-		printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
-	} else
-	{
-//		if (arg_size>=MINICL_MAX_ARGLENGTH)
-		if (arg_size != MINICL_MAX_ARGLENGTH)
-		{
-			printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
-		} 
-		else
-		{
-			if(arg_value == NULL)
-			{	// this is only for __local memory qualifier
-				void* ptr = localBufMalloc(arg_size);
-				kernel->m_argData[arg_index] = ptr;
-			}
-			else
-			{
-				memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
-			}
-			kernel->m_argSizes[arg_index] = arg_size;
-			if(arg_index >= kernel->m_numArgs)
-			{
-				kernel->m_numArgs = arg_index + 1;
-				kernel->updateLauncher();
-			}
-		}
-	}
-	return 0;
-}
-
-// Kernel Object APIs
-CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program       program ,
-               const char *     kernel_name ,
-               cl_int *         errcode_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
-	MiniCLKernel* kernel = new MiniCLKernel();
-	int nameLen = strlen(kernel_name);
-	if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
-	{
-		*errcode_ret = CL_INVALID_KERNEL_NAME;
-		return NULL;
-	}
-	strcpy(kernel->m_name, kernel_name);
-	kernel->m_numArgs = 0;
-
-	//kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
-	//if (kernel->m_kernelProgramCommandId>=0)
-	//{
-	//	*errcode_ret = CL_SUCCESS;
-	//} else
-	//{
-	//	*errcode_ret = CL_INVALID_KERNEL_NAME;
-	//}
-	kernel->m_scheduler = scheduler;
-	if(kernel->registerSelf() == NULL)
-	{
-		*errcode_ret = CL_INVALID_KERNEL_NAME;
-		return NULL;
-	}
-	else
-	{
-		*errcode_ret = CL_SUCCESS;
-	}
-
-	return (cl_kernel)kernel;
-
-}
-
-
-CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program           /* program */,
-               cl_uint              /* num_devices */,
-               const cl_device_id * /* device_list */,
-               const char *         /* options */, 
-               void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
-               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0
-{
-	return CL_SUCCESS;
-}
-
-CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context                     context ,
-                          cl_uint                        /* num_devices */,
-                          const cl_device_id *           /* device_list */,
-                          const size_t *                 /* lengths */,
-                          const unsigned char **         /* binaries */,
-                          cl_int *                       /* binary_status */,
-                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
-{
-	return (cl_program)context;
-}
-
-
-// Memory Object APIs
-CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context   /* context */,
-               cl_mem_flags flags ,
-               size_t       size,
-               void *       host_ptr ,
-               cl_int *     errcode_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-	cl_mem buf = (cl_mem)malloc(size);
-	if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
-	{
-		memcpy(buf,host_ptr,size);
-	}
-	*errcode_ret = 0;
-	return buf;
-}
-
-// Command Queue APIs
-CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context                      context , 
-                     cl_device_id                   /* device */, 
-                     cl_command_queue_properties    /* properties */,
-                     cl_int *                        errcode_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-	*errcode_ret = 0;
-	return (cl_command_queue) context;
-}
-
-extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context         /* context */, 
-                 cl_context_info    param_name , 
-                 size_t             param_value_size , 
-                 void *             param_value, 
-                 size_t *           param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-
-	switch (param_name)
-	{
-	case CL_CONTEXT_DEVICES:
-		{
-			if (!param_value_size)
-			{
-				*param_value_size_ret = 13;
-			} else
-			{
-				const char* testName = "MiniCL_Test.";
-				sprintf((char*)param_value,"%s",testName);
-			}
-			break;
-		};
-	default:
-		{
-			printf("unsupported\n");
-		}
-	}
-	
-	return 0;
-}
-
-CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
-                        cl_device_type          /* device_type */,
-                        void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
-                        void *                  /* user_data */,
-                        cl_int *                 errcode_ret ) CL_API_SUFFIX__VERSION_1_0
-{
-	int maxNumOutstandingTasks = 4;
-//	int maxNumOutstandingTasks = 2;
-//	int maxNumOutstandingTasks = 1;
-	gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
-	const int maxNumOfThreadSupports = 8;
-	static int sUniqueThreadSupportIndex = 0;
-	static char* sUniqueThreadSupportName[maxNumOfThreadSupports] = 
-	{
-		"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" 
-	};
-
-#ifdef DEBUG_MINICL_KERNELS
-	SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
-	SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
-#else
-
-#if _WIN32
-	btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
-	Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
-//								"MiniCL",
-								sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
-								processMiniCLTask, //processCollisionTask,
-								createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
-								maxNumOutstandingTasks));
-#else
-	///todo: add posix thread support for other platforms
-	SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
-	SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
-#endif
-
-#endif //DEBUG_MINICL_KERNELS
-	
-	
-	MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
-
-	*errcode_ret = 0;
-	return (cl_context)scheduler;
-}
-
-CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context  context ) CL_API_SUFFIX__VERSION_1_0
-{
-
-	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
-	
-	btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
-	delete scheduler;
-	delete threadSupport;
-	
-	return 0;
-}
-extern CL_API_ENTRY cl_int CL_API_CALL
-clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
-{
-	return CL_SUCCESS;
-}
-
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelWorkGroupInfo(cl_kernel                   kernel ,
-                         cl_device_id               /* device */,
-                         cl_kernel_work_group_info  wgi/* param_name */,
-                         size_t   sz                  /* param_value_size */,
-                         void *     ptr                /* param_value */,
-                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
-{
-	if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
-	 &&(sz == sizeof(int))
-	 &&(ptr != NULL))
-	{
-		MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
-		MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
-		*((int*)ptr) = scheduler->getMaxNumOutstandingTasks();
-		return CL_SUCCESS;
-	}
-	else
-	{
-		return CL_INVALID_VALUE;
-	}
-}
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
deleted file mode 100644
index babb1d24af5..00000000000
--- a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#include "MiniCLTask.h"
-#include "BulletMultiThreaded/PlatformDefinitions.h"
-#include "BulletMultiThreaded/SpuFakeDma.h"
-#include "LinearMath/btMinMax.h"
-#include "MiniCLTask.h"
-#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
-
-
-#ifdef __SPU__
-#include <spu_printf.h>
-#else
-#include <stdio.h>
-#define spu_printf printf
-#endif
-
-int gMiniCLNumOutstandingTasks = 0;
-
-struct MiniCLTask_LocalStoreMemory
-{
-	
-};
-
-
-//-- MAIN METHOD
-void processMiniCLTask(void* userPtr, void* lsMemory)
-{
-	//	BT_PROFILE("processSampleTask");
-
-	MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
-
-	MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
-	MiniCLTaskDesc& taskDesc = *taskDescPtr;
-
-	for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
-	{
-		taskDesc.m_kernel->m_launcher(&taskDesc, i);
-	}
-
-//	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
-	
-}
-
-
-#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
-
-ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory	gLocalStoreMemory);
-
-void* createMiniCLLocalStoreMemory()
-{
-	return &gLocalStoreMemory;
-}
-#else
-void* createMiniCLLocalStoreMemory()
-{
-	return new MiniCLTask_LocalStoreMemory;
-};
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h b/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
deleted file mode 100644
index 7e78be0855e..00000000000
--- a/extern/bullet2/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef MINICL__TASK_H
-#define MINICL__TASK_H
-
-#include "BulletMultiThreaded/PlatformDefinitions.h"
-#include "LinearMath/btScalar.h"
-
-#include "LinearMath/btAlignedAllocator.h"
-
-
-#define MINICL_MAX_ARGLENGTH (sizeof(void*))
-#define MINI_CL_MAX_ARG 16
-#define MINI_CL_MAX_KERNEL_NAME 256
-
-struct MiniCLKernel;
-
-ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
-{
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	MiniCLTaskDesc()
-	{
-		for (int i=0;i<MINI_CL_MAX_ARG;i++)
-		{
-			m_argSizes[i]=0;
-		}
-	}
-
-	uint32_t		m_taskId;
-
-	uint32_t		m_firstWorkUnit;
-	uint32_t		m_lastWorkUnit;
-
-	MiniCLKernel*	m_kernel;
-
-	void*			m_argData[MINI_CL_MAX_ARG];
-	int				m_argSizes[MINI_CL_MAX_ARG];
-};
-
-extern "C" int gMiniCLNumOutstandingTasks;
-
-
-void	processMiniCLTask(void* userPtr, void* lsMemory);
-void*	createMiniCLLocalStoreMemory();
-
-
-#endif //MINICL__TASK_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp
deleted file mode 100644
index 7adee88d245..00000000000
--- a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.cpp
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//#define __CELLOS_LV2__ 1
-#define __BT_SKIP_UINT64_H 1
-
-#define USE_SAMPLE_PROCESS 1
-#ifdef USE_SAMPLE_PROCESS
-
-
-#include "MiniCLTaskScheduler.h"
-#include <stdio.h>
-
-#ifdef __SPU__
-
-
-
-void	SampleThreadFunc(void* userPtr,void* lsMemory)
-{
-	//do nothing
-	printf("hello world\n");
-}
-
-
-void*	SamplelsMemoryFunc()
-{
-	//don't create local store memory, just return 0
-	return 0;
-}
-
-
-#else
-
-
-#include "BulletMultiThreaded/btThreadSupportInterface.h"
-
-//#	include "SPUAssert.h"
-#include <string.h>
-
-#include "MiniCL/cl_platform.h"
-
-extern "C" {
-	extern char SPU_SAMPLE_ELF_SYMBOL[];
-}
-
-
-MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
-:m_threadInterface(threadInterface),
-m_maxNumOutstandingTasks(maxNumOutstandingTasks)
-{
-
-	m_taskBusy.resize(m_maxNumOutstandingTasks);
-	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
-
-	m_kernels.resize(0);
-
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-
-	m_initialized = false;
-
-	m_threadInterface->startSPU();
-
-
-}
-
-MiniCLTaskScheduler::~MiniCLTaskScheduler()
-{
-	m_threadInterface->stopSPU();
-	
-}
-
-
-
-void	MiniCLTaskScheduler::initialize()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("MiniCLTaskScheduler::initialize()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-	m_initialized = true;
-
-}
-
-
-void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
-{
-
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
-#endif //DEBUG_SPU_TASK_SCHEDULING
-
-	m_taskBusy[m_currentTask] = true;
-	m_numBusyTasks++;
-
-	MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
-	{
-		// send task description in event message
-		taskDesc.m_firstWorkUnit = firstWorkUnit;
-		taskDesc.m_lastWorkUnit = lastWorkUnit;
-		taskDesc.m_kernel = kernel;
-		//some bookkeeping to recognize finished tasks
-		taskDesc.m_taskId = m_currentTask;
-		
-//		for (int i=0;i<MINI_CL_MAX_ARG;i++)
-		for (unsigned int i=0; i < kernel->m_numArgs; i++)
-		{
-			taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
-			if (taskDesc.m_argSizes[i])
-			{
-				taskDesc.m_argData[i] = kernel->m_argData[i];
-//				memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
-			}
-		}
-	}
-
-
-	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
-
-	// if all tasks busy, wait for spu event to clear the task.
-	
-	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
-	{
-		unsigned int taskId;
-		unsigned int outputSize;
-
-		for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-		m_threadInterface->waitForResponse(&taskId, &outputSize);
-
-		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-	// find new task buffer
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		if (!m_taskBusy[i])
-		{
-			m_currentTask = i;
-			break;
-		}
-	}
-}
-
-
-///Optional PPU-size post processing for each task
-void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
-{
-
-}
-
-
-void MiniCLTaskScheduler::flush()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("\nSpuCollisionTaskProcess::flush()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-
-	// all tasks are issued, wait for all tasks to be complete
-	while(m_numBusyTasks > 0)
-	{
-// Consolidating SPU code
-	  unsigned int taskId;
-	  unsigned int outputSize;
-	  
-	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-	  {
-			
-		  m_threadInterface->waitForResponse(&taskId, &outputSize);
-	  }
-
-		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-
-}
-
-
-
-typedef void (*MiniCLKernelLauncher0)(int);
-typedef void (*MiniCLKernelLauncher1)(void*, int);
-typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
-typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
-
-
-static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
-}
-static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												guid);
-}
-static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												guid);
-}
-static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												guid);
-}
-static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												guid);
-}
-static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												guid);
-}
-static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												guid);
-}
-static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												guid);
-}
-static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												guid);
-}
-static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												guid);
-}
-static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												guid);
-}
-static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												guid);
-}
-static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												taskDesc->m_argData[11], 
-												guid);
-}
-static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												taskDesc->m_argData[11], 
-												taskDesc->m_argData[12], 
-												guid);
-}
-static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												taskDesc->m_argData[11], 
-												taskDesc->m_argData[12], 
-												taskDesc->m_argData[13], 
-												guid);
-}
-static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												taskDesc->m_argData[11], 
-												taskDesc->m_argData[12], 
-												taskDesc->m_argData[13], 
-												taskDesc->m_argData[14], 
-												guid);
-}
-static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
-{
-	((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
-												taskDesc->m_argData[1], 
-												taskDesc->m_argData[2], 
-												taskDesc->m_argData[3], 
-												taskDesc->m_argData[4], 
-												taskDesc->m_argData[5], 
-												taskDesc->m_argData[6], 
-												taskDesc->m_argData[7], 
-												taskDesc->m_argData[8], 
-												taskDesc->m_argData[9], 
-												taskDesc->m_argData[10], 
-												taskDesc->m_argData[11], 
-												taskDesc->m_argData[12], 
-												taskDesc->m_argData[13], 
-												taskDesc->m_argData[14], 
-												taskDesc->m_argData[15], 
-												guid);
-}
-
-static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] = 
-{
-	kernelLauncher0,
-	kernelLauncher1,
-	kernelLauncher2,
-	kernelLauncher3,
-	kernelLauncher4,
-	kernelLauncher5,
-	kernelLauncher6,
-	kernelLauncher7,
-	kernelLauncher8,
-	kernelLauncher9,
-	kernelLauncher10,
-	kernelLauncher11,
-	kernelLauncher12,
-	kernelLauncher13,
-	kernelLauncher14,
-	kernelLauncher15,
-	kernelLauncher16
-};
-
-void MiniCLKernel::updateLauncher()
-{
-	m_launcher = spLauncherList[m_numArgs];
-}
-
-struct MiniCLKernelDescEntry
-{
-	void* pCode;
-	char* pName;
-};
-static MiniCLKernelDescEntry spKernelDesc[256];
-static int sNumKernelDesc = 0;
-
-MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, char* pName)
-{
-	for(int i = 0; i < sNumKernelDesc; i++)
-	{
-		if(!strcmp(pName, spKernelDesc[i].pName))
-		{	// already registered
-			btAssert(spKernelDesc[i].pCode == pCode);
-			return; 
-		}
-	}
-	spKernelDesc[sNumKernelDesc].pCode = pCode;
-	spKernelDesc[sNumKernelDesc].pName = pName;
-	sNumKernelDesc++;
-}
-
-
-MiniCLKernel* MiniCLKernel::registerSelf()
-{
-	m_scheduler->registerKernel(this);
-	for(int i = 0; i < sNumKernelDesc; i++)
-	{
-		if(!strcmp(m_name, spKernelDesc[i].pName))
-		{
-			m_pCode = spKernelDesc[i].pCode;
-			return this;
-		}
-	}
-	return NULL;
-}
-
-#endif
-
-
-#endif //USE_SAMPLE_PROCESS
diff --git a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h b/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h
deleted file mode 100644
index 3061a713436..00000000000
--- a/extern/bullet2/BulletMultiThreaded/MiniCLTaskScheduler.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-
-#ifndef MINICL_TASK_SCHEDULER_H
-#define MINICL_TASK_SCHEDULER_H
-
-#include <assert.h>
-
-
-#include "BulletMultiThreaded/PlatformDefinitions.h"
-
-#include <stdlib.h>
-
-#include "LinearMath/btAlignedObjectArray.h"
-
-
-#include "MiniCLTask/MiniCLTask.h"
-
-//just add your commands here, try to keep them globally unique for debugging purposes
-#define CMD_SAMPLE_TASK_COMMAND 10
-
-struct MiniCLKernel;
-
-/// MiniCLTaskScheduler handles SPU processing of collision pairs.
-/// When PPU issues a task, it will look for completed task buffers
-/// PPU will do postprocessing, dependent on workunit output (not likely)
-class MiniCLTaskScheduler
-{
-	// track task buffers that are being used, and total busy tasks
-	btAlignedObjectArray<bool>	m_taskBusy;
-	btAlignedObjectArray<MiniCLTaskDesc>	m_spuSampleTaskDesc;
-
-
-	btAlignedObjectArray<const MiniCLKernel*>	m_kernels;
-
-
-	int   m_numBusyTasks;
-
-	// the current task and the current entry to insert a new work unit
-	int   m_currentTask;
-
-	bool m_initialized;
-
-	void postProcess(int taskId, int outputSize);
-	
-	class	btThreadSupportInterface*	m_threadInterface;
-
-	int	m_maxNumOutstandingTasks;
-
-
-
-public:
-	MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface, int maxNumOutstandingTasks);
-	
-	~MiniCLTaskScheduler();
-	
-	///call initialize in the beginning of the frame, before addCollisionPairToTask
-	void initialize();
-
-	void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
-
-	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
-	void flush();
-
-	class	btThreadSupportInterface*	getThreadSupportInterface()
-	{
-		return m_threadInterface;
-	}
-
-	int	findProgramCommandIdByName(const char* programName) const;
-
-	int getMaxNumOutstandingTasks() const
-	{
-		return m_maxNumOutstandingTasks;
-	}
-
-	void registerKernel(MiniCLKernel* kernel)
-	{
-		m_kernels.push_back(kernel);
-	}
-};
-
-typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
-
-struct	MiniCLKernel
-{
-	MiniCLTaskScheduler* m_scheduler;
-	
-//	int	m_kernelProgramCommandId;
-
-	char	m_name[MINI_CL_MAX_KERNEL_NAME];
-	unsigned int	m_numArgs;
-	kernelLauncherCB	m_launcher;
-	void* m_pCode;
-	void updateLauncher();
-	MiniCLKernel* registerSelf();
-
-	void*	m_argData[MINI_CL_MAX_ARG];
-	int				m_argSizes[MINI_CL_MAX_ARG];
-};
-
-
-#if defined(USE_LIBSPE2) && defined(__SPU__)
-////////////////////MAIN/////////////////////////////
-#include "../SpuLibspe2Support.h"
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-#include <SpuFakeDma.h>
-
-void * SamplelsMemoryFunc();
-void SampleThreadFunc(void* userPtr,void* lsMemory);
-
-//#define DEBUG_LIBSPE2_MAINLOOP
-
-int main(unsigned long long speid, addr64 argp, addr64 envp)
-{
-	printf("SPU is up \n");
-	
-	ATTRIBUTE_ALIGNED128(btSpuStatus status);
-	ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
-	unsigned int received_message = Spu_Mailbox_Event_Nothing;
-        bool shutdown = false;
-
-	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-
-	status.m_status = Spu_Status_Free;
-	status.m_lsMemory.p = SamplelsMemoryFunc();
-
-	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-	
-	
-	while (!shutdown)
-	{
-		received_message = spu_read_in_mbox();
-		
-
-		
-		switch(received_message)
-		{
-		case Spu_Mailbox_Event_Shutdown:
-			shutdown = true;
-			break; 
-		case Spu_Mailbox_Event_Task:
-			// refresh the status
-#ifdef DEBUG_LIBSPE2_MAINLOOP
-			printf("SPU recieved Task \n");
-#endif //DEBUG_LIBSPE2_MAINLOOP
-			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-		
-			btAssert(status.m_status==Spu_Status_Occupied);
-			
-			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-			
-			SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
-			break;
-		case Spu_Mailbox_Event_Nothing:
-		default:
-			break;
-		}
-
-		// set to status free and wait for next task
-		status.m_status = Spu_Status_Free;
-		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-		cellDmaWaitTagStatusAll(DMA_MASK(3));		
-				
-		
-  	}
-  	return 0;
-}
-//////////////////////////////////////////////////////
-#endif
-
-
-
-#endif // MINICL_TASK_SCHEDULER_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h b/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h
deleted file mode 100644
index 16362f4bce3..00000000000
--- a/extern/bullet2/BulletMultiThreaded/PlatformDefinitions.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef TYPE_DEFINITIONS_H
-#define TYPE_DEFINITIONS_H
-
-///This file provides some platform/compiler checks for common definitions
-
-#ifdef _WIN32
-
-typedef union
-{
-  unsigned int u;
-  void *p;
-} addr64;
-
-#define USE_WIN32_THREADING 1
-
-		#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
-		#else
-		#endif //__MINGW32__
-
-		typedef unsigned char     uint8_t;
-#ifndef __PHYSICS_COMMON_H__
-#ifndef __BT_SKIP_UINT64_H
-		typedef unsigned long int uint64_t;
-#endif //__BT_SKIP_UINT64_H
-		typedef unsigned int      uint32_t;
-#endif //__PHYSICS_COMMON_H__
-		typedef unsigned short    uint16_t;
-
-		#include <malloc.h>
-		#define memalign(alignment, size) malloc(size);
-			
-#include <string.h> //memcpy
-
-		
-
-		#include <stdio.h>		
-		#define spu_printf printf
-		
-#else
-		#include <stdint.h>
-		#include <stdlib.h>
-		#include <string.h> //for memcpy
-
-#if defined	(__CELLOS_LV2__)
-	// Playstation 3 Cell SDK
-#include <spu_printf.h>
-		
-#else
-	// posix system
-
-#define USE_PTHREADS    (1)
-
-#ifdef USE_LIBSPE2
-#include <stdio.h>		
-#define spu_printf printf	
-#define DWORD unsigned int
-		
-			typedef union
-			{
-			  unsigned long long ull;
-			  unsigned int ui[2];
-			  void *p;
-			} addr64;
-		
-		
-#else
-
-#include <stdio.h>		
-#define spu_printf printf	
-
-#endif // USE_LIBSPE2
-	
-#endif	//__CELLOS_LV2__
-	
-#endif
-
-
-/* Included here because we need uint*_t typedefs */
-#include "PpuAddressSpace.h"
-
-#endif //TYPE_DEFINITIONS_H
-
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp
deleted file mode 100644
index 540f0dcf106..00000000000
--- a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include <stdio.h>
-#include "PosixThreadSupport.h"
-#ifdef USE_PTHREADS
-#include <errno.h>
-#include <unistd.h>
-
-#include "SpuCollisionTaskProcess.h"
-#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
-
-#define checkPThreadFunction(returnValue) \
-    if(0 != returnValue) { \
-        printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
-    }
-
-// The number of threads should be equal to the number of available cores
-// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
-
-// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-// Setup and initialize SPU/CELL/Libspe2
-PosixThreadSupport::PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo)
-{
-	startThreads(threadConstructionInfo);
-}
-
-// cleanup/shutdown Libspe2
-PosixThreadSupport::~PosixThreadSupport()
-{
-	stopSPU();
-}
-
-#if (defined (__APPLE__))
-#define NAMED_SEMAPHORES
-#endif
-
-// this semaphore will signal, if and how many threads are finished with their work
-static sem_t* mainSemaphore;
-
-static sem_t* createSem(const char* baseName)
-{
-	static int semCount = 0;
-#ifdef NAMED_SEMAPHORES
-        /// Named semaphore begin
-        char name[32];
-        snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++); 
-        sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
-        if (tempSem != reinterpret_cast<sem_t *>(SEM_FAILED))
-        {
-        	//printf("Created \"%s\" Semaphore %x\n", name, tempSem);
-        }
-        else
-	{
-		//printf("Error creating Semaphore %d\n", errno);
-		exit(-1);
-	}
-        /// Named semaphore end
-#else
-	sem_t* tempSem = new sem_t;
-	checkPThreadFunction(sem_init(tempSem, 0, 0));
-#endif
-	return tempSem;
-}
-
-static void destroySem(sem_t* semaphore)
-{
-#ifdef NAMED_SEMAPHORES
-	checkPThreadFunction(sem_close(semaphore));
-#else
-	checkPThreadFunction(sem_destroy(semaphore));
-	delete semaphore;
-#endif	
-}
-
-static void *threadFunction(void *argument) 
-{
-
-	PosixThreadSupport::btSpuStatus* status = (PosixThreadSupport::btSpuStatus*)argument;
-
-	
-	while (1)
-	{
-            checkPThreadFunction(sem_wait(status->startSemaphore));
-		
-		void* userPtr = status->m_userPtr;
-
-		if (userPtr)
-		{
-			btAssert(status->m_status);
-			status->m_userThreadFunc(userPtr,status->m_lsMemory);
-			status->m_status = 2;
-			checkPThreadFunction(sem_post(mainSemaphore));
-	                status->threadUsed++;
-		} else {
-			//exit Thread
-			status->m_status = 3;
-			checkPThreadFunction(sem_post(mainSemaphore));
-			printf("Thread with taskId %i exiting\n",status->m_taskId);
-			break;
-		}
-		
-	}
-
-	printf("Thread TERMINATED\n");
-	return 0;
-
-}
-
-///send messages to SPUs
-void PosixThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
-{
-	///	gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc);
-	
-	///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
-	
-
-
-	switch (uiCommand)
-	{
-	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
-		{
-			btSpuStatus&	spuStatus = m_activeSpuStatus[taskId];
-			btAssert(taskId >= 0);
-			btAssert(taskId < m_activeSpuStatus.size());
-
-			spuStatus.m_commandId = uiCommand;
-			spuStatus.m_status = 1;
-			spuStatus.m_userPtr = (void*)uiArgument0;
-
-			// fire event to start new task
-			checkPThreadFunction(sem_post(spuStatus.startSemaphore));
-			break;
-		}
-	default:
-		{
-			///not implemented
-			btAssert(0);
-		}
-
-	};
-
-
-}
-
-
-///check for messages from SPUs
-void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
-{
-	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
-	
-	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
-
-
-	btAssert(m_activeSpuStatus.size());
-
-        // wait for any of the threads to finish
-	checkPThreadFunction(sem_wait(mainSemaphore));
-        
-	// get at least one thread which has finished
-        size_t last = -1;
-        
-        for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
-            if(2 == m_activeSpuStatus[t].m_status) {
-                last = t;
-                break;
-            }
-        }
-
-	btSpuStatus& spuStatus = m_activeSpuStatus[last];
-
-	btAssert(spuStatus.m_status > 1);
-	spuStatus.m_status = 0;
-
-	// need to find an active spu
-	btAssert(last >= 0);
-
-	*puiArgument0 = spuStatus.m_taskId;
-	*puiArgument1 = spuStatus.m_status;
-}
-
-
-
-void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstructionInfo)
-{
-        printf("%s creating %i threads.\n", __FUNCTION__, threadConstructionInfo.m_numThreads);
-	m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
-        
-	mainSemaphore = createSem("main");                
-        
-	for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
-	{
-		printf("starting thread %d\n",i);
-
-		btSpuStatus&	spuStatus = m_activeSpuStatus[i];
-
-		spuStatus.startSemaphore = createSem("threadLocal");                
-                
-                checkPThreadFunction(pthread_create(&spuStatus.thread, NULL, &threadFunction, (void*)&spuStatus));
-
-		spuStatus.m_userPtr=0;
-
-		spuStatus.m_taskId = i;
-		spuStatus.m_commandId = 0;
-		spuStatus.m_status = 0;
-		spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
-		spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
-        spuStatus.threadUsed = 0;
-
-		printf("started thread %d \n",i);
-		
-	}
-
-}
-
-void PosixThreadSupport::startSPU()
-{
-}
-
-
-///tell the task scheduler we are done with the SPU tasks
-void PosixThreadSupport::stopSPU()
-{
-	for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
-            btSpuStatus&	spuStatus = m_activeSpuStatus[t];
-            printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed);
-        
-            destroySem(spuStatus.startSemaphore);
-            checkPThreadFunction(pthread_cancel(spuStatus.thread));
-        }
-        destroySem(mainSemaphore);
-
-	m_activeSpuStatus.clear();
-}
-
-#endif // USE_PTHREADS
-
diff --git a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h b/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h
deleted file mode 100644
index 7cc49115b4b..00000000000
--- a/extern/bullet2/BulletMultiThreaded/PosixThreadSupport.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-#include "LinearMath/btScalar.h"
-#include "PlatformDefinitions.h"
-
-#ifdef USE_PTHREADS  //platform specific defines are defined in PlatformDefinitions.h
-#include <pthread.h>
-#include <semaphore.h>
-
-#ifndef POSIX_THREAD_SUPPORT_H
-#define POSIX_THREAD_SUPPORT_H
-
-#include "LinearMath/btAlignedObjectArray.h"
-
-#include "btThreadSupportInterface.h"
-
-
-typedef void (*PosixThreadFunc)(void* userPtr,void* lsMemory);
-typedef void* (*PosixlsMemorySetupFunc)();
-
-// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-class PosixThreadSupport : public btThreadSupportInterface 
-{
-public:
-    typedef enum sStatus {
-        STATUS_BUSY,
-        STATUS_READY,
-        STATUS_FINISHED
-    } Status;
-
-	// placeholder, until libspe2 support is there
-	struct	btSpuStatus
-	{
-		uint32_t	m_taskId;
-		uint32_t	m_commandId;
-		uint32_t	m_status;
-
-		PosixThreadFunc	m_userThreadFunc;
-		void*	m_userPtr; //for taskDesc etc
-		void*	m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc
-
-                pthread_t thread;
-                sem_t* startSemaphore;
-
-        unsigned long threadUsed;
-	};
-private:
-
-	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
-public:
-	///Setup and initialize SPU/CELL/Libspe2
-
-	
-
-	struct	ThreadConstructionInfo
-	{
-		ThreadConstructionInfo(char* uniqueName,
-									PosixThreadFunc userThreadFunc,
-									PosixlsMemorySetupFunc	lsMemoryFunc,
-									int numThreads=1,
-									int threadStackSize=65535
-									)
-									:m_uniqueName(uniqueName),
-									m_userThreadFunc(userThreadFunc),
-									m_lsMemoryFunc(lsMemoryFunc),
-									m_numThreads(numThreads),
-									m_threadStackSize(threadStackSize)
-		{
-
-		}
-
-		char*					m_uniqueName;
-		PosixThreadFunc			m_userThreadFunc;
-		PosixlsMemorySetupFunc	m_lsMemoryFunc;
-		int						m_numThreads;
-		int						m_threadStackSize;
-
-	};
-
-	PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo);
-
-///cleanup/shutdown Libspe2
-	virtual	~PosixThreadSupport();
-
-	void	startThreads(ThreadConstructionInfo&	threadInfo);
-
-
-///send messages to SPUs
-	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
-
-///check for messages from SPUs
-	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
-
-///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	virtual	void startSPU();
-
-///tell the task scheduler we are done with the SPU tasks
-	virtual	void stopSPU();
-
-	virtual void setNumTasks(int numTasks) {}
-
-	virtual int getNumTasks() const
-	{
-		return m_activeSpuStatus.size();
-	}
-};
-
-#endif // POSIX_THREAD_SUPPORT_H
-
-#endif // USE_PTHREADS
diff --git a/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h b/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h
deleted file mode 100644
index f36fdfb3cd7..00000000000
--- a/extern/bullet2/BulletMultiThreaded/PpuAddressSpace.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __PPU_ADDRESS_SPACE_H
-#define __PPU_ADDRESS_SPACE_H
-
-
-#ifdef _WIN32
-//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
-#pragma warning (disable: 4311)
-#pragma warning (disable: 4312)
-#endif //_WIN32
-
-#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(USE_ADDR64)
-typedef uint64_t ppu_address_t;
-#else
-
-typedef uint32_t ppu_address_t;
-
-#endif
-
-#endif
-
diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp
deleted file mode 100644
index 4e9c822bbc0..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SequentialThreadSupport.h"
-
-
-#include "SpuCollisionTaskProcess.h"
-#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
-
-SequentialThreadSupport::SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo)
-{
-	startThreads(threadConstructionInfo);
-}
-
-///cleanup/shutdown Libspe2
-SequentialThreadSupport::~SequentialThreadSupport()
-{
-	stopSPU();
-}
-
-#include <stdio.h>
-
-///send messages to SPUs
-void SequentialThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
-{
-	switch (uiCommand)
-	{
-	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
-		{
-			btSpuStatus&	spuStatus = m_activeSpuStatus[0];
-			spuStatus.m_userPtr=(void*)uiArgument0;
-			spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
-		}
-	break;
-	default:
-		{
-			///not implemented
-			btAssert(0 && "Not implemented");
-		}
-
-	};
-
-
-}
-
-///check for messages from SPUs
-void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
-{
-	btAssert(m_activeSpuStatus.size());
-	btSpuStatus& spuStatus = m_activeSpuStatus[0];
-	*puiArgument0 = spuStatus.m_taskId;
-	*puiArgument1 = spuStatus.m_status;
-}
-
-void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo)
-{
-	m_activeSpuStatus.resize(1);
-	printf("STS: Not starting any threads\n");
-	btSpuStatus& spuStatus = m_activeSpuStatus[0];
-	spuStatus.m_userPtr = 0;
-	spuStatus.m_taskId = 0;
-	spuStatus.m_commandId = 0;
-	spuStatus.m_status = 0;
-	spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
-	spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
-	printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName);
-}
-
-void SequentialThreadSupport::startSPU()
-{
-}
-
-void SequentialThreadSupport::stopSPU()
-{
-	m_activeSpuStatus.clear();
-}
-
-void SequentialThreadSupport::setNumTasks(int numTasks)
-{
-	printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks);
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h b/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h
deleted file mode 100644
index 4256ebd2aa9..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SequentialThreadSupport.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "LinearMath/btScalar.h"
-#include "PlatformDefinitions.h"
-
-
-#ifndef SEQUENTIAL_THREAD_SUPPORT_H
-#define SEQUENTIAL_THREAD_SUPPORT_H
-
-#include "LinearMath/btAlignedObjectArray.h"
-
-#include "btThreadSupportInterface.h"
-
-typedef void (*SequentialThreadFunc)(void* userPtr,void* lsMemory);
-typedef void* (*SequentiallsMemorySetupFunc)();
-
-
-
-///The SequentialThreadSupport is a portable non-parallel implementation of the btThreadSupportInterface
-///This is useful for debugging and porting SPU Tasks to other platforms.
-class SequentialThreadSupport : public btThreadSupportInterface 
-{
-public:
-	struct	btSpuStatus
-	{
-		uint32_t	m_taskId;
-		uint32_t	m_commandId;
-		uint32_t	m_status;
-
-		SequentialThreadFunc	m_userThreadFunc;
-
-		void*	m_userPtr; //for taskDesc etc
-		void*	m_lsMemory; //initialized using SequentiallsMemorySetupFunc
-	};
-private:
-	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
-	btAlignedObjectArray<void*>			m_completeHandles;	
-public:
-	struct	SequentialThreadConstructionInfo
-	{
-		SequentialThreadConstructionInfo (char* uniqueName,
-									SequentialThreadFunc userThreadFunc,
-									SequentiallsMemorySetupFunc	lsMemoryFunc
-									)
-									:m_uniqueName(uniqueName),
-									m_userThreadFunc(userThreadFunc),
-									m_lsMemoryFunc(lsMemoryFunc)
-		{
-
-		}
-
-		char*						m_uniqueName;
-		SequentialThreadFunc		m_userThreadFunc;
-		SequentiallsMemorySetupFunc	m_lsMemoryFunc;
-	};
-
-	SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo);
-	virtual	~SequentialThreadSupport();
-	void	startThreads(SequentialThreadConstructionInfo&	threadInfo);
-///send messages to SPUs
-	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
-///check for messages from SPUs
-	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
-///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	virtual	void startSPU();
-///tell the task scheduler we are done with the SPU tasks
-	virtual	void stopSPU();
-
-	virtual void setNumTasks(int numTasks);
-
-	virtual int getNumTasks() const
-	{
-		return 1;
-	}
-
-};
-
-#endif //SEQUENTIAL_THREAD_SUPPORT_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
deleted file mode 100644
index 182aa269478..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuCollisionObjectWrapper.h"
-#include "BulletCollision/CollisionShapes/btCollisionShape.h"
-
-SpuCollisionObjectWrapper::SpuCollisionObjectWrapper ()
-{
-}
-
-#ifndef __SPU__
-SpuCollisionObjectWrapper::SpuCollisionObjectWrapper (const btCollisionObject* collisionObject)
-{
-	m_shapeType = collisionObject->getCollisionShape()->getShapeType ();
-	m_collisionObjectPtr = (ppu_address_t)collisionObject;
-	m_margin = collisionObject->getCollisionShape()->getMargin ();
-}
-#endif
-
-int
-SpuCollisionObjectWrapper::getShapeType () const
-{
-	return m_shapeType;
-}
-
-float
-SpuCollisionObjectWrapper::getCollisionMargin () const
-{
-	return m_margin;
-}
-
-ppu_address_t
-SpuCollisionObjectWrapper::getCollisionObjectPtr () const
-{
-	return m_collisionObjectPtr;
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h
deleted file mode 100644
index 36ea49209e2..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuCollisionObjectWrapper.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_COLLISION_OBJECT_WRAPPER_H
-#define SPU_COLLISION_OBJECT_WRAPPER_H
-
-#include "PlatformDefinitions.h"
-#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-
-ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper
-{
-protected:
-	int m_shapeType;
-	float m_margin;
-	ppu_address_t m_collisionObjectPtr;
-
-public:
-	SpuCollisionObjectWrapper ();
-
-	SpuCollisionObjectWrapper (const btCollisionObject* collisionObject);
-
-	int           getShapeType () const;
-	float         getCollisionMargin () const;
-	ppu_address_t getCollisionObjectPtr () const;
-};
-
-
-#endif //SPU_COLLISION_OBJECT_WRAPPER_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
deleted file mode 100644
index 86eda8697d0..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-//#define DEBUG_SPU_TASK_SCHEDULING 1
-
-
-//class OptimizedBvhNode;
-
-#include "SpuCollisionTaskProcess.h"
-
-
-
-
-void	SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
-{
-	if (int(m_maxNumOutstandingTasks) != maxNumTasks)
-	{
-		m_maxNumOutstandingTasks = maxNumTasks;
-		m_taskBusy.resize(m_maxNumOutstandingTasks);
-		m_spuGatherTaskDesc.resize(m_maxNumOutstandingTasks);
-
-		for (int i = 0; i < m_taskBusy.size(); i++)
-		{
-			m_taskBusy[i] = false;
-		}
-
-		///re-allocate task memory buffers
-		if (m_workUnitTaskBuffers != 0)
-		{
-			btAlignedFree(m_workUnitTaskBuffers);
-		}
-		
-		m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
-					m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*6, 128);
-	}
-	
-}
-
-
-
-SpuCollisionTaskProcess::SpuCollisionTaskProcess(class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks)
-:m_threadInterface(threadInterface),
-m_maxNumOutstandingTasks(0)
-{
-	m_workUnitTaskBuffers = (unsigned char *)0;
-	setNumTasks(maxNumOutstandingTasks);
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-	m_currentPage = 0;
-	m_currentPageEntry = 0;
-
-#ifdef DEBUG_SpuCollisionTaskProcess
-	m_initialized = false;
-#endif
-
-	m_threadInterface->startSPU();
-
-	//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
-	printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput)));
-
-}
-
-SpuCollisionTaskProcess::~SpuCollisionTaskProcess()
-{
-	
-	if (m_workUnitTaskBuffers != 0)
-	{
-		btAlignedFree(m_workUnitTaskBuffers);
-		m_workUnitTaskBuffers = 0;
-	}
-	
-
-
-	m_threadInterface->stopSPU();
-	
-}
-
-
-
-void SpuCollisionTaskProcess::initialize2(bool useEpa)
-{
-
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("SpuCollisionTaskProcess::initialize()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-	for (int i = 0; i < int (m_maxNumOutstandingTasks); i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-	m_currentPage = 0;
-	m_currentPageEntry = 0;
-	m_useEpa = useEpa;
-
-#ifdef DEBUG_SpuCollisionTaskProcess
-	m_initialized = true;
-	btAssert(MIDPHASE_NUM_WORKUNITS_PER_TASK*sizeof(SpuGatherAndProcessWorkUnitInput) <= MIDPHASE_WORKUNIT_TASK_SIZE);
-#endif
-}
-
-
-void SpuCollisionTaskProcess::issueTask2()
-{
-
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("SpuCollisionTaskProcess::issueTask (m_currentTask= %d\n)", m_currentTask);
-#endif //DEBUG_SPU_TASK_SCHEDULING
-
-	m_taskBusy[m_currentTask] = true;
-	m_numBusyTasks++;
-
-
-	SpuGatherAndProcessPairsTaskDesc& taskDesc = m_spuGatherTaskDesc[m_currentTask];
-	taskDesc.m_useEpa = m_useEpa;
-
-	{
-		// send task description in event message
-		// no error checking here...
-		// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
-	
-		taskDesc.m_inPairPtr = reinterpret_cast<uint64_t>(MIDPHASE_TASK_PTR(m_currentTask));
-	
-		taskDesc.taskId = m_currentTask;
-		taskDesc.numPages = m_currentPage+1;
-		taskDesc.numOnLastPage = m_currentPageEntry;
-	}
-
-
-
-	m_threadInterface->sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc,m_currentTask);
-
-	// if all tasks busy, wait for spu event to clear the task.
-	
-
-	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
-	{
-		unsigned int taskId;
-		unsigned int outputSize;
-
-		
-		for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
-		  {
-			  if (m_taskBusy[i])
-			  {
-				  taskId = i;
-				  break;
-			  }
-		  }
-
-	  btAssert(taskId>=0);
-
-	  
-		m_threadInterface->waitForResponse(&taskId, &outputSize);
-
-//		printf("issueTask taskId %d completed, numBusy=%d\n",taskId,m_numBusyTasks);
-
-		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
-
-		//postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-	
-}
-
-void SpuCollisionTaskProcess::addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex)
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("#");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-#ifdef DEBUG_SpuCollisionTaskProcess
-	btAssert(m_initialized);
-	btAssert(m_workUnitTaskBuffers);
-
-#endif
-
-	bool batch = true;
-
-	if (batch)
-	{
-		if (m_currentPageEntry == MIDPHASE_NUM_WORKUNITS_PER_PAGE)
-		{
-			if (m_currentPage == MIDPHASE_NUM_WORKUNIT_PAGES-1)
-			{
-				// task buffer is full, issue current task.
-				// if all task buffers busy, this waits until SPU is done.
-				issueTask2();
-
-				// find new task buffer
-				for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
-				{
-					if (!m_taskBusy[i])
-					{
-						m_currentTask = i;
-						//init the task data
-
-						break;
-					}
-				}
-
-				m_currentPage = 0;
-			}
-			else
-			{
-				m_currentPage++;
-			}
-
-			m_currentPageEntry = 0;
-		}
-	}
-
-	{
-
-
-
-		SpuGatherAndProcessWorkUnitInput &wuInput = 
-			*(reinterpret_cast<SpuGatherAndProcessWorkUnitInput*>
-			(MIDPHASE_ENTRY_PTR(m_currentTask, m_currentPage, m_currentPageEntry)));
-		
-		wuInput.m_pairArrayPtr = reinterpret_cast<uint64_t>(pairArrayPtr);
-		wuInput.m_startIndex = startIndex;
-		wuInput.m_endIndex = endIndex;
-
-		
-	
-		m_currentPageEntry++;
-
-		if (!batch)
-		{
-			issueTask2();
-
-			// find new task buffer
-			for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
-			{
-				if (!m_taskBusy[i])
-				{
-					m_currentTask = i;
-					//init the task data
-
-					break;
-				}
-			}
-
-			m_currentPage = 0;
-			m_currentPageEntry =0;
-		}
-	}
-}
-
-
-void 
-SpuCollisionTaskProcess::flush2()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("\nSpuCollisionTaskProcess::flush()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-	// if there's a partially filled task buffer, submit that task
-	if (m_currentPage > 0 || m_currentPageEntry > 0)
-	{
-		issueTask2();
-	}
-
-
-	// all tasks are issued, wait for all tasks to be complete
-	while(m_numBusyTasks > 0)
-	{
-	  // Consolidating SPU code
-	  unsigned int taskId=-1;
-	  unsigned int outputSize;
-	  
-	  for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-
-	  btAssert(taskId>=0);
-
-	
-	  {
-			
-		// SPURS support.
-		  m_threadInterface->waitForResponse(&taskId, &outputSize);
-	  }
-//		 printf("flush2 taskId %d completed, numBusy =%d \n",taskId,m_numBusyTasks);
-		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
-
-		//postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h
deleted file mode 100644
index 2614be6c479..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuCollisionTaskProcess.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_COLLISION_TASK_PROCESS_H
-#define SPU_COLLISION_TASK_PROCESS_H
-
-#include <assert.h>
-
-#include "LinearMath/btScalar.h"
-
-#include "PlatformDefinitions.h"
-#include "LinearMath/btAlignedObjectArray.h"
-#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" // for definitions processCollisionTask and createCollisionLocalStoreMemory
-
-#include "btThreadSupportInterface.h"
-
-
-//#include "SPUAssert.h"
-#include <string.h>
-
-
-#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-#include "BulletCollision/CollisionShapes/btCollisionShape.h"
-#include "BulletCollision/CollisionShapes/btConvexShape.h"
-
-#include "LinearMath/btAlignedAllocator.h"
-
-#include <stdio.h>
-
-
-#define DEBUG_SpuCollisionTaskProcess 1
-
-
-#define CMD_GATHER_AND_PROCESS_PAIRLIST	1
-
-class btCollisionObject;
-class btPersistentManifold;
-class btDispatcher;
-
-
-/////Task Description for SPU collision detection
-//struct SpuGatherAndProcessPairsTaskDesc
-//{
-//	uint64_t	inPtr;//m_pairArrayPtr;
-//	//mutex variable
-//	uint32_t	m_someMutexVariableInMainMemory;
-//
-//	uint64_t	m_dispatcher;
-//
-//	uint32_t	numOnLastPage;
-//
-//	uint16_t numPages;
-//	uint16_t taskId;
-//
-//	struct	CollisionTask_LocalStoreMemory*	m_lsMemory; 
-//}
-//
-//#if  defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
-//__attribute__ ((aligned (16)))
-//#endif
-//;
-
-
-///MidphaseWorkUnitInput stores individual primitive versus mesh collision detection input, to be processed by the SPU.
-ATTRIBUTE_ALIGNED16(struct) SpuGatherAndProcessWorkUnitInput
-{
-	uint64_t m_pairArrayPtr;
-	int		m_startIndex;
-	int		m_endIndex;
-};
-
-
-
-
-/// SpuCollisionTaskProcess handles SPU processing of collision pairs.
-/// Maintains a set of task buffers.
-/// When the task is full, the task is issued for SPUs to process.  Contact output goes into btPersistentManifold
-/// associated with each task.
-/// When PPU issues a task, it will look for completed task buffers
-/// PPU will do postprocessing, dependent on workunit output (not likely)
-class SpuCollisionTaskProcess
-{
-
-  unsigned char  *m_workUnitTaskBuffers;
-
-
-	// track task buffers that are being used, and total busy tasks
-	btAlignedObjectArray<bool>	m_taskBusy;
-	btAlignedObjectArray<SpuGatherAndProcessPairsTaskDesc>	m_spuGatherTaskDesc;
-
-	class	btThreadSupportInterface*	m_threadInterface;
-
-	unsigned int	m_maxNumOutstandingTasks;
-
-	unsigned int   m_numBusyTasks;
-
-	// the current task and the current entry to insert a new work unit
-	unsigned int   m_currentTask;
-	unsigned int   m_currentPage;
-	unsigned int   m_currentPageEntry;
-
-	bool m_useEpa;
-
-#ifdef DEBUG_SpuCollisionTaskProcess
-	bool m_initialized;
-#endif
-	void issueTask2();
-	//void postProcess(unsigned int taskId, int outputSize);
-
-public:
-	SpuCollisionTaskProcess(btThreadSupportInterface*	threadInterface, unsigned int maxNumOutstandingTasks);
-	
-	~SpuCollisionTaskProcess();
-	
-	///call initialize in the beginning of the frame, before addCollisionPairToTask
-	void initialize2(bool useEpa = false);
-
-	///batch up additional work to a current task for SPU processing. When batch is full, it issues the task.
-	void addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex);
-
-	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
-	void flush2();
-
-	/// set the maximum number of SPU tasks allocated
-	void	setNumTasks(int maxNumTasks);
-
-	int		getNumTasks() const
-	{
-		return m_maxNumOutstandingTasks;
-	}
-};
-
-
-
-#define MIDPHASE_TASK_PTR(task) (&m_workUnitTaskBuffers[0] + MIDPHASE_WORKUNIT_TASK_SIZE*task)
-#define MIDPHASE_ENTRY_PTR(task,page,entry) (MIDPHASE_TASK_PTR(task) + MIDPHASE_WORKUNIT_PAGE_SIZE*page + sizeof(SpuGatherAndProcessWorkUnitInput)*entry)
-#define MIDPHASE_OUTPUT_PTR(task) (&m_contactOutputBuffers[0] + MIDPHASE_MAX_CONTACT_BUFFER_SIZE*task)
-#define MIDPHASE_TREENODES_PTR(task) (&m_complexShapeBuffers[0] + MIDPHASE_COMPLEX_SHAPE_BUFFER_SIZE*task)
-
-
-#define MIDPHASE_WORKUNIT_PAGE_SIZE (16)
-//#define MIDPHASE_WORKUNIT_PAGE_SIZE (128)
-
-#define MIDPHASE_NUM_WORKUNIT_PAGES 1
-#define MIDPHASE_WORKUNIT_TASK_SIZE (MIDPHASE_WORKUNIT_PAGE_SIZE*MIDPHASE_NUM_WORKUNIT_PAGES)
-#define MIDPHASE_NUM_WORKUNITS_PER_PAGE (MIDPHASE_WORKUNIT_PAGE_SIZE / sizeof(SpuGatherAndProcessWorkUnitInput))
-#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
-
-
-#endif // SPU_COLLISION_TASK_PROCESS_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
deleted file mode 100644
index 286b63191ee..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuContactManifoldCollisionAlgorithm.h"
-#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
-#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-#include "BulletCollision/CollisionShapes/btCollisionShape.h"
-#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
-
-
-
-
-void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
-{
-	btAssert(0);
-}
-
-btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
-{
-	btAssert(0);
-	return 1.f;
-}
-
-#ifndef __SPU__
-SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
-:btCollisionAlgorithm(ci)
-#ifdef USE_SEPDISTANCE_UTIL
-,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())
-#endif //USE_SEPDISTANCE_UTIL
-{
-	m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
-	m_shapeType0 = body0->getCollisionShape()->getShapeType();
-	m_shapeType1 = body1->getCollisionShape()->getShapeType();
-	m_collisionMargin0 = body0->getCollisionShape()->getMargin();
-	m_collisionMargin1 = body1->getCollisionShape()->getMargin();
-	m_collisionObject0 = body0;
-	m_collisionObject1 = body1;
-
-	if (body0->getCollisionShape()->isPolyhedral())
-	{
-		btPolyhedralConvexShape* convex0 = (btPolyhedralConvexShape*)body0->getCollisionShape();
-		m_shapeDimensions0 = convex0->getImplicitShapeDimensions();
-	}
-	if (body1->getCollisionShape()->isPolyhedral())
-	{
-		btPolyhedralConvexShape* convex1 = (btPolyhedralConvexShape*)body1->getCollisionShape();
-		m_shapeDimensions1 = convex1->getImplicitShapeDimensions();
-	}
-}
-#endif //__SPU__
-
-
-SpuContactManifoldCollisionAlgorithm::~SpuContactManifoldCollisionAlgorithm()
-{
-	if (m_manifoldPtr)
-			m_dispatcher->releaseManifold(m_manifoldPtr);
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
deleted file mode 100644
index 151cb2c7966..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
-#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
-
-#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
-#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
-#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
-#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
-#include "LinearMath/btTransformUtil.h"
-
-class btPersistentManifold;
-
-//#define USE_SEPDISTANCE_UTIL 1
-
-/// SpuContactManifoldCollisionAlgorithm  provides contact manifold and should be processed on SPU.
-ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm
-{
-	btVector3	m_shapeDimensions0;
-	btVector3	m_shapeDimensions1;
-	btPersistentManifold*	m_manifoldPtr;
-	int		m_shapeType0;
-	int		m_shapeType1;
-	float	m_collisionMargin0;
-	float	m_collisionMargin1;
-
-	btCollisionObject*	m_collisionObject0;
-	btCollisionObject*	m_collisionObject1;
-	
-	
-
-	
-public:
-	
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
-
-	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
-
-	
-	SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
-#ifdef USE_SEPDISTANCE_UTIL
-	btConvexSeparatingDistanceUtil	m_sepDistance;
-#endif //USE_SEPDISTANCE_UTIL
-
-	virtual ~SpuContactManifoldCollisionAlgorithm();
-
-	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
-	{
-		if (m_manifoldPtr)
-			manifoldArray.push_back(m_manifoldPtr);
-	}
-
-	btPersistentManifold*	getContactManifoldPtr()
-	{
-		return m_manifoldPtr;
-	}
-
-	btCollisionObject*	getCollisionObject0()
-	{
-		return m_collisionObject0;
-	}
-	
-	btCollisionObject*	getCollisionObject1()
-	{
-		return m_collisionObject1;
-	}
-
-	int		getShapeType0() const
-	{
-		return m_shapeType0;
-	}
-
-	int		getShapeType1() const
-	{
-		return m_shapeType1;
-	}
-	float	getCollisionMargin0() const
-	{
-		return m_collisionMargin0;
-	}
-	float	getCollisionMargin1() const
-	{
-		return m_collisionMargin1;
-	}
-
-	const btVector3&	getShapeDimensions0() const
-	{
-		return m_shapeDimensions0;
-	}
-
-	const btVector3&	getShapeDimensions1() const
-	{
-		return m_shapeDimensions1;
-	}
-
-	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
-	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
-		{
-			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
-			return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
-		}
-	};
-
-};
-
-#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h b/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h
deleted file mode 100644
index a0695744bd5..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuDoubleBuffer.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef DOUBLE_BUFFER_H
-#define DOUBLE_BUFFER_H
-
-#include "SpuFakeDma.h"
-#include "LinearMath/btScalar.h"
-
-
-///DoubleBuffer
-template<class T, int size>
-class DoubleBuffer
-{
-#if defined(__SPU__) || defined(USE_LIBSPE2)
-	ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
-	ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
-#else
-	T m_buffer0[size];
-	T m_buffer1[size];
-#endif
-	
-	T *m_frontBuffer;
-	T *m_backBuffer;
-
-	unsigned int m_dmaTag;
-	bool m_dmaPending;
-public:
-	bool	isPending() const { return m_dmaPending;}
-	DoubleBuffer();
-
-	void init ();
-
-	// dma get and put commands
-	void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag);
-	void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag);
-
-	// gets pointer to a buffer
-	T *getFront();
-	T *getBack();
-
-	// if back buffer dma was started, wait for it to complete
-	// then move back to front and vice versa
-	T *swapBuffers();
-};
-
-template<class T, int size>
-DoubleBuffer<T,size>::DoubleBuffer()
-{
-	init ();
-}
-
-template<class T, int size>
-void DoubleBuffer<T,size>::init()
-{
-	this->m_dmaPending = false;
-	this->m_frontBuffer = &this->m_buffer0[0];
-	this->m_backBuffer = &this->m_buffer1[0];
-}
-
-template<class T, int size>
-void
-DoubleBuffer<T,size>::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag)
-{
-	m_dmaPending = true;
-	m_dmaTag = tag;
-	if (numBytes)
-	{
-		m_backBuffer = (T*)cellDmaLargeGetReadOnly(m_backBuffer, ea, numBytes, tag, 0, 0);
-	}
-}
-
-template<class T, int size>
-void
-DoubleBuffer<T,size>::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag)
-{
-	m_dmaPending = true;
-	m_dmaTag = tag;
-	cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0);
-}
-
-template<class T, int size>
-T *
-DoubleBuffer<T,size>::getFront()
-{
-	return m_frontBuffer;
-}
-
-template<class T, int size>
-T *
-DoubleBuffer<T,size>::getBack()
-{
-	return m_backBuffer;
-}
-
-template<class T, int size>
-T *
-DoubleBuffer<T,size>::swapBuffers()
-{
-	if (m_dmaPending)
-	{
-		cellDmaWaitTagStatusAll(1<<m_dmaTag);
-		m_dmaPending = false;
-	}
-
-	T *tmp = m_backBuffer;
-	m_backBuffer = m_frontBuffer;
-	m_frontBuffer = tmp;
-
-	return m_frontBuffer;
-}
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp
deleted file mode 100644
index 62cef39612d..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuFakeDma.h"
-#include <LinearMath/btScalar.h> //for btAssert
-//Disabling memcpy sometimes helps debugging DMA
-
-#define USE_MEMCPY 1
-#ifdef USE_MEMCPY
-
-#endif
-
-
-void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-	cellDmaLargeGet(ls,ea,size,tag,tid,rid);
-	return ls;
-#else
-	return (void*)(uint32_t)ea;
-#endif
-}
-
-void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-	mfc_get(ls,ea,size,tag,0,0);
-	return ls;
-#else
-	return (void*)(uint32_t)ea;
-#endif
-}
-
-
-
-
-void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-	cellDmaGet(ls,ea,size,tag,tid,rid);
-	return ls;
-#else
-	return (void*)(uint32_t)ea;
-#endif
-}
-
-
-///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
-int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
-{
-	
-	btAssert(size<32);
-	
-	ATTRIBUTE_ALIGNED16(char	tmpBuffer[32]);
-
-
-	char* localStore = (char*)ls;
-	uint32_t i;
-	
-
-	///make sure last 4 bits are the same, for cellDmaSmallGet
-	uint32_t last4BitsOffset = ea & 0x0f;
-	char* tmpTarget = tmpBuffer + last4BitsOffset;
-	
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-	
-	int remainingSize = size;
-
-//#define FORCE_cellDmaUnalignedGet 1
-#ifdef FORCE_cellDmaUnalignedGet
-	cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
-#else
-	char* remainingTmpTarget = tmpTarget;
-	uint64_t remainingEa = ea;
-
-	while (remainingSize)
-	{
-		switch (remainingSize)
-		{
-		case 1:
-		case 2:
-		case 4:
-		case 8:
-		case 16:
-			{
-				mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0);
-				remainingSize=0;
-				break;
-			}
-		default:
-			{
-				//spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize);
-				int actualSize = 0;
-
-				if (remainingSize > 16)
-					actualSize = 16;
-				else
-					if (remainingSize >8)
-						actualSize=8;
-					else
-						if (remainingSize >4)
-							actualSize=4;
-						else
-							if (remainingSize >2)
-								actualSize=2;
-				mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0);
-				remainingSize-=actualSize;
-				remainingTmpTarget+=actualSize;
-				remainingEa += actualSize;
-			}
-		}
-	}
-#endif//FORCE_cellDmaUnalignedGet
-
-#else
-	char* mainMem = (char*)ea;
-	//copy into final destination
-#ifdef USE_MEMCPY
-		
-		memcpy(tmpTarget,mainMem,size);
-#else
-		for ( i=0;i<size;i++)
-		{
-			tmpTarget[i] = mainMem[i];
-		}
-#endif //USE_MEMCPY
-
-#endif
-
-	cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-	//this is slowish, perhaps memcpy on SPU is smarter?
-	for (i=0; btLikely( i<size );i++)
-	{
-		localStore[i] = tmpTarget[i];
-	}
-
-	return 0;
-}
-
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-#else
-
-int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-	char* mainMem = (char*)ea;
-	char* localStore = (char*)ls;
-
-#ifdef USE_MEMCPY
-	memcpy(localStore,mainMem,size);
-#else
-	for (uint32_t i=0;i<size;i++)
-	{
-		localStore[i] = mainMem[i];
-	}
-#endif
-	return 0;
-}
-
-int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-	char* mainMem = (char*)ea;
-	char* localStore = (char*)ls;
-#ifdef USE_MEMCPY
-	memcpy(localStore,mainMem,size);
-#else
-	for (uint32_t i=0;i<size;i++)
-	{
-		localStore[i] = mainMem[i];
-	}	
-#endif //#ifdef USE_MEMCPY
-	return 0;
-}
-
-int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
-{
-	char* mainMem = (char*)ea;
-	const char* localStore = (const char*)ls;
-#ifdef USE_MEMCPY
-	memcpy(mainMem,localStore,size);
-#else
-	for (uint32_t i=0;i<size;i++)
-	{
-		mainMem[i] = localStore[i];
-	}	
-#endif //#ifdef USE_MEMCPY
-
-	return 0;
-}
-
-
-
-void	cellDmaWaitTagStatusAll(int ignore)
-{
-
-}
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h b/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h
deleted file mode 100644
index f5e49b7be14..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuFakeDma.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef FAKE_DMA_H
-#define FAKE_DMA_H
-
-
-#include "PlatformDefinitions.h"
-#include "LinearMath/btScalar.h"
-
-
-#ifdef __SPU__
-
-#ifndef USE_LIBSPE2
-
-#include <cell/dma.h>
-#include <stdint.h>
-
-#define DMA_TAG(xfer) (xfer + 1)
-#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
-
-#else // !USE_LIBSPE2
-
-#define DMA_TAG(xfer) (xfer + 1)
-#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
-		
-#include <spu_mfcio.h>		
-		
-#define DEBUG_DMA		
-#ifdef DEBUG_DMA
-#define dUASSERT(a,b) if (!(a)) { printf(b);}
-#define uintsize ppu_address_t
-		
-#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-															dUASSERT(size < 16384, "size too big: "); \
-															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-															} \
-															mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-														dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-														dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-														dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-														dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-    													dUASSERT(size < 16384, "size too big: "); \
-														dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    													printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-														} \
-														mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-        													dUASSERT(size < 16384, "size too big: "); \
-															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-        													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    														printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
-															} \
-															mfc_put(ls, ea, size, tag, tid, rid)
-#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-																dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-																dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-																dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-    															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-    															dUASSERT(size < 16384, "size too big: "); \
-    															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-    	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-																} \
-																mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
-
-#else
-#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
-#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
-#endif // DEBUG_DMA
-
-		
-		
-		
-		
-		
-		
-		
-#endif // USE_LIBSPE2
-#else // !__SPU__
-//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
-//Potential to add networked simulation using this interface
-
-#define DMA_TAG(a) (a)
-#define DMA_MASK(a) (a)
-
-		/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		void	cellDmaWaitTagStatusAll(int ignore);
-
-
-#endif //__CELLOS_LV2__
-
-///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
-int	stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
-
-
-void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-
-
-#endif //FAKE_DMA_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
deleted file mode 100644
index ee0832f12e2..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuGatheringCollisionDispatcher.h"
-#include "SpuCollisionTaskProcess.h"
-
-
-#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
-#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h"
-#include "SpuContactManifoldCollisionAlgorithm.h"
-#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-#include "BulletCollision/CollisionShapes/btCollisionShape.h"
-#include "LinearMath/btQuickprof.h"
-
-
-
-
-SpuGatheringCollisionDispatcher::SpuGatheringCollisionDispatcher(class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration)
-:btCollisionDispatcher(collisionConfiguration),
-m_spuCollisionTaskProcess(0),
-m_threadInterface(threadInterface),
-m_maxNumOutstandingTasks(maxNumOutstandingTasks)
-{
-	
-}
-
-
-bool	SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,int proxyType1)
-{
-	bool supported0 = (
-		(proxyType0 == BOX_SHAPE_PROXYTYPE) ||
-		(proxyType0 == TRIANGLE_SHAPE_PROXYTYPE) ||
-		(proxyType0 == SPHERE_SHAPE_PROXYTYPE) ||
-		(proxyType0 == CAPSULE_SHAPE_PROXYTYPE) ||
-		(proxyType0 == CYLINDER_SHAPE_PROXYTYPE) ||
-//		(proxyType0 == CONE_SHAPE_PROXYTYPE) ||
-		(proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
-		(proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
-		(proxyType0 == STATIC_PLANE_PROXYTYPE)||
-		(proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
-		);
-
-	bool supported1 = (
-		(proxyType1 == BOX_SHAPE_PROXYTYPE) ||
-		(proxyType1 == TRIANGLE_SHAPE_PROXYTYPE) ||
-		(proxyType1 == SPHERE_SHAPE_PROXYTYPE) ||
-		(proxyType1 == CAPSULE_SHAPE_PROXYTYPE) ||
-		(proxyType1 == CYLINDER_SHAPE_PROXYTYPE) ||
-//		(proxyType1 == CONE_SHAPE_PROXYTYPE) ||
-		(proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
-		(proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
-		(proxyType1 == STATIC_PLANE_PROXYTYPE) ||
-		(proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
-		);
-
-	
-	return supported0 && supported1;
-}
-
-
-
-SpuGatheringCollisionDispatcher::~SpuGatheringCollisionDispatcher()
-{
-	if (m_spuCollisionTaskProcess)
-		delete m_spuCollisionTaskProcess;
-	
-}
-
-#include "stdio.h"
-
-
-
-///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc)
-///this is useful for the collision dispatcher.
-class btSpuCollisionPairCallback : public btOverlapCallback
-{
-	const btDispatcherInfo& m_dispatchInfo;
-	SpuGatheringCollisionDispatcher*	m_dispatcher;
-
-public:
-
-	btSpuCollisionPairCallback(const btDispatcherInfo& dispatchInfo, SpuGatheringCollisionDispatcher*	dispatcher)
-	:m_dispatchInfo(dispatchInfo),
-	m_dispatcher(dispatcher)
-	{
-	}
-
-	virtual bool	processOverlap(btBroadphasePair& collisionPair)
-	{
-
-
-		//PPU version
-		//(*m_dispatcher->getNearCallback())(collisionPair,*m_dispatcher,m_dispatchInfo);
-
-		//only support discrete collision detection for now, we could fallback on PPU/unoptimized version for TOI/CCD
-		btAssert(m_dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE);
-
-		//by default, Bullet will use this near callback
-		{
-			///userInfo is used to determine if the SPU has to handle this case or not (skip PPU tasks)
-			if (!collisionPair.m_internalTmpValue)
-			{
-				collisionPair.m_internalTmpValue = 1;
-			}
-			if (!collisionPair.m_algorithm)
-			{
-				btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
-				btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
-
-				btCollisionAlgorithmConstructionInfo ci;
-				ci.m_dispatcher1 = m_dispatcher;
-				ci.m_manifold = 0;
-
-				if (m_dispatcher->needsCollision(colObj0,colObj1))
-				{
-					int	proxyType0 = colObj0->getCollisionShape()->getShapeType();
-					int	proxyType1 = colObj1->getCollisionShape()->getShapeType();
-					if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1) 
-						&& (colObj0->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) 
-						&& (colObj1->getCollisionFlags() != btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) 
-						)
-					{
-						int so = sizeof(SpuContactManifoldCollisionAlgorithm);
-#ifdef ALLOCATE_SEPARATELY
-						void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
-#else
-						void* mem = m_dispatcher->allocateCollisionAlgorithm(so);
-#endif
-						collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1);
-						collisionPair.m_internalTmpValue =  2;
-					} else
-					{
-						collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
-						collisionPair.m_internalTmpValue = 3;
-					}
-				} 
-			}
-		}
-		return false;
-	}
-};
-
-void	SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo, btDispatcher* dispatcher) 
-{
-
-	if (dispatchInfo.m_enableSPU)
-	{
-		m_maxNumOutstandingTasks = m_threadInterface->getNumTasks();
-
-		{
-			BT_PROFILE("processAllOverlappingPairs");
-
-			if (!m_spuCollisionTaskProcess)
-				m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
-		
-			m_spuCollisionTaskProcess->setNumTasks(m_maxNumOutstandingTasks);
-	//		printf("m_maxNumOutstandingTasks =%d\n",m_maxNumOutstandingTasks);
-
-			m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
-			
-		
-			///modified version of btCollisionDispatcher::dispatchAllCollisionPairs:
-			{
-				btSpuCollisionPairCallback	collisionCallback(dispatchInfo,this);
-
-				pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher);
-			}
-		}
-
-		//send one big batch
-		int numTotalPairs = pairCache->getNumOverlappingPairs();
-		
-		btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
-		int i;
-		{
-			int pairRange =	SPU_BATCHSIZE_BROADPHASE_PAIRS;
-			if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS))
-			{
-				pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1;
-			}
-
-			BT_PROFILE("addWorkToTask");
-			for (i=0;i<numTotalPairs;)
-			{
-				//Performance Hint: tweak this number during benchmarking
-				
-				int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
-				m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
-				i = endIndex;
-			}
-		}
-
-		{
-			BT_PROFILE("PPU fallback");
-			//handle PPU fallback pairs
-			for (i=0;i<numTotalPairs;i++)
-			{
-				btBroadphasePair& collisionPair = pairPtr[i];
-				if (collisionPair.m_internalTmpValue == 3)
-				{
-					if (collisionPair.m_algorithm)
-					{
-						btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
-						btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
-
-						if (dispatcher->needsCollision(colObj0,colObj1))
-						{
-							btManifoldResult contactPointResult(colObj0,colObj1);
-							
-							if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
-							{
-								//discrete collision detection query
-								collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
-							} else
-							{
-								//continuous collision detection query, time of impact (toi)
-								btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
-								if (dispatchInfo.m_timeOfImpact > toi)
-									dispatchInfo.m_timeOfImpact = toi;
-
-							}
-						}
-					}
-				}
-			}
-		}
-		{
-			BT_PROFILE("flush2");
-			//make sure all SPU work is done
-			m_spuCollisionTaskProcess->flush2();
-		}
-
-	} else
-	{
-		///PPU fallback
-		///!Need to make sure to clear all 'algorithms' when switching between SPU and PPU
-		btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher);
-	}
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
deleted file mode 100644
index 7d5be88d71d..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H
-#define SPU_GATHERING_COLLISION__DISPATCHER_H
-
-#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
-
-
-///Tuning value to optimized SPU utilization 
-///Too small value means Task overhead is large compared to computation (too fine granularity)
-///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
-#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
-
-
-
-class SpuCollisionTaskProcess;
-
-///SpuGatheringCollisionDispatcher can use SPU to gather and calculate collision detection
-///Time of Impact, Closest Points and Penetration Depth.
-class SpuGatheringCollisionDispatcher : public btCollisionDispatcher
-{
-	
-	SpuCollisionTaskProcess*	m_spuCollisionTaskProcess;
-	
-protected:
-
-	class	btThreadSupportInterface*	m_threadInterface;
-
-	unsigned int	m_maxNumOutstandingTasks;
-	
-
-public:
-
-	//can be used by SPU collision algorithms	
-	SpuCollisionTaskProcess*	getSpuCollisionTaskProcess()
-	{
-			return m_spuCollisionTaskProcess;
-	}
-	
-	SpuGatheringCollisionDispatcher (class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration);
-	
-	virtual ~SpuGatheringCollisionDispatcher();
-
-	bool	supportsDispatchPairOnSpu(int proxyType0,int proxyType1);
-
-	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
-
-};
-
-
-
-#endif //SPU_GATHERING_COLLISION__DISPATCHER_H
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp
deleted file mode 100644
index a312450ed72..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifdef USE_LIBSPE2
-
-#include "SpuLibspe2Support.h"
-
-
-
-
-//SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-///Setup and initialize SPU/CELL/Libspe2
-SpuLibspe2Support::SpuLibspe2Support(spe_program_handle_t *speprog, int numThreads)
-{
-	this->program = speprog;
-	this->numThreads =  ((numThreads <= spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)) ? numThreads : spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1));
-}
-
-///cleanup/shutdown Libspe2
-SpuLibspe2Support::~SpuLibspe2Support()
-{
-	
-	stopSPU();
-}
-
-
-
-///send messages to SPUs
-void SpuLibspe2Support::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1)
-{
-	spe_context_ptr_t context;
-	
-	switch (uiCommand)
-	{
-	case CMD_SAMPLE_TASK_COMMAND:
-	{
-		//get taskdescription
-		SpuSampleTaskDesc* taskDesc = (SpuSampleTaskDesc*) uiArgument0;
-
-		btAssert(taskDesc->m_taskId<m_activeSpuStatus.size());
-
-		//get status of SPU on which task should run
-		btSpuStatus&	spuStatus = m_activeSpuStatus[taskDesc->m_taskId];
-
-		//set data for spuStatus
-		spuStatus.m_commandId = uiCommand;
-		spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
-		spuStatus.m_taskDesc.p = taskDesc; 
-		
-		//get context
-		context = data[taskDesc->m_taskId].context;
-		
-		
-		taskDesc->m_mainMemoryPtr = reinterpret_cast<uint64_t> (spuStatus.m_lsMemory.p);
-		
-
-		break;
-	}
-	case CMD_GATHER_AND_PROCESS_PAIRLIST:
-		{
-			//get taskdescription
-			SpuGatherAndProcessPairsTaskDesc* taskDesc = (SpuGatherAndProcessPairsTaskDesc*) uiArgument0;
-
-			btAssert(taskDesc->taskId<m_activeSpuStatus.size());
-
-			//get status of SPU on which task should run
-			btSpuStatus&	spuStatus = m_activeSpuStatus[taskDesc->taskId];
-
-			//set data for spuStatus
-			spuStatus.m_commandId = uiCommand;
-			spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
-			spuStatus.m_taskDesc.p = taskDesc; 
-			
-			//get context
-			context = data[taskDesc->taskId].context;
-			
-			
-			taskDesc->m_lsMemory = (CollisionTask_LocalStoreMemory*)spuStatus.m_lsMemory.p;
-			
-			break;
-		}
-	default:
-		{
-			///not implemented
-			btAssert(0);
-		}
-
-	};
-
-	
-	//write taskdescription in mailbox
-	unsigned int event = Spu_Mailbox_Event_Task;
-	spe_in_mbox_write(context, &event, 1, SPE_MBOX_ANY_NONBLOCKING);
-
-}
-
-///check for messages from SPUs
-void SpuLibspe2Support::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
-{
-	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
-	
-	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
-	
-	btAssert(m_activeSpuStatus.size());
-
-	
-	int last = -1;
-	
-	//find an active spu/thread
-	while(last < 0)
-	{
-		for (int i=0;i<m_activeSpuStatus.size();i++)
-		{
-			if ( m_activeSpuStatus[i].m_status == Spu_Status_Free)
-			{
-				last = i;
-				break;
-			}
-		}
-		if(last < 0)
-			sched_yield();
-	}
-
-
-
-	btSpuStatus& spuStatus = m_activeSpuStatus[last];
-
-	///need to find an active spu
-	btAssert(last>=0);
-
-	
-
-	*puiArgument0 = spuStatus.m_taskId;
-	*puiArgument1 = spuStatus.m_status;
-
-
-}
-
-
-void SpuLibspe2Support::startSPU()
-{
-	this->internal_startSPU();
-}
-
-
-
-///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-void SpuLibspe2Support::internal_startSPU()
-{
-	m_activeSpuStatus.resize(numThreads);
-	
-	
-	for (int i=0; i < numThreads; i++)
-	{
-		
-		if(data[i].context == NULL) 
-		{
-					
-			 /* Create context */
-			if ((data[i].context = spe_context_create(0, NULL)) == NULL)
-			{
-			      perror ("Failed creating context");
-		          exit(1);
-			}
-	
-			/* Load program into context */
-			if(spe_program_load(data[i].context, this->program))
-			{
-			      perror ("Failed loading program");
-		          exit(1);
-			}
-			
-			m_activeSpuStatus[i].m_status = Spu_Status_Startup; 
-			m_activeSpuStatus[i].m_taskId = i; 
-			m_activeSpuStatus[i].m_commandId = 0; 
-			m_activeSpuStatus[i].m_lsMemory.p = NULL; 
-			
-			
-			data[i].entry = SPE_DEFAULT_ENTRY;
-			data[i].flags = 0;
-			data[i].argp.p = &m_activeSpuStatus[i];
-			data[i].envp.p = NULL;
-			
-		    /* Create thread for each SPE context */
-			if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) ))
-			{
-			      perror ("Failed creating thread");
-		          exit(1);
-			}
-			/*
-			else
-			{
-				printf("started thread %d\n",i);
-			}*/
-		}		
-	}
-	
-	
-	for (int i=0; i < numThreads; i++)
-	{
-		if(data[i].context != NULL) 
-		{
-			while( m_activeSpuStatus[i].m_status == Spu_Status_Startup)
-			{
-				// wait for spu to set up
-				sched_yield();
-			}
-			printf("Spu %d is ready\n", i);
-		}
-	}
-}
-
-///tell the task scheduler we are done with the SPU tasks
-void SpuLibspe2Support::stopSPU()
-{
-	// wait for all threads to finish 
-	int i;
-	for ( i = 0; i < this->numThreads; i++ ) 
-	{ 
-		
-		unsigned int event = Spu_Mailbox_Event_Shutdown;
-		spe_context_ptr_t context = data[i].context;
-		spe_in_mbox_write(context, &event, 1, SPE_MBOX_ALL_BLOCKING);
-		pthread_join (data[i].pthread, NULL); 
-		
-	} 
-	// close SPE program 
-	spe_image_close(program); 
-	// destroy SPE contexts 
-	for ( i = 0; i < this->numThreads; i++ ) 
-	{ 
-		if(data[i].context != NULL)
-		{
-			spe_context_destroy (data[i].context);
-		}
-	} 
-	
-	m_activeSpuStatus.clear();
-	
-}
-
-
-
-#endif //USE_LIBSPE2
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h b/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h
deleted file mode 100644
index a6d6baca47b..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuLibspe2Support.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-#ifndef SPU_LIBSPE2_SUPPORT_H
-#define SPU_LIBSPE2_SUPPORT_H
-
-#include <LinearMath/btScalar.h> //for uint32_t etc.
-
-#ifdef USE_LIBSPE2
-
-#include <stdlib.h>
-#include <stdio.h>
-//#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
-#include "PlatformDefinitions.h"
-
-
-//extern struct SpuGatherAndProcessPairsTaskDesc;
-
-enum
-{
-	Spu_Mailbox_Event_Nothing = 0,
-	Spu_Mailbox_Event_Task = 1,
-	Spu_Mailbox_Event_Shutdown = 2,
-	
-	Spu_Mailbox_Event_ForceDword = 0xFFFFFFFF
-	
-};
-
-enum
-{
-	Spu_Status_Free = 0,
-	Spu_Status_Occupied = 1,
-	Spu_Status_Startup = 2,
-	
-	Spu_Status_ForceDword = 0xFFFFFFFF
-	
-};
-
-
-struct btSpuStatus
-{
-	uint32_t	m_taskId;
-	uint32_t	m_commandId;
-	uint32_t	m_status;
-
-	addr64 m_taskDesc;
-	addr64 m_lsMemory;
-	
-}
-__attribute__ ((aligned (128)))
-;
-
-
-
-#ifndef __SPU__
-
-#include "LinearMath/btAlignedObjectArray.h"
-#include "SpuCollisionTaskProcess.h"
-#include "SpuSampleTaskProcess.h"
-#include "btThreadSupportInterface.h"
-#include <libspe2.h>
-#include <pthread.h>
-#include <sched.h>
-
-#define MAX_SPUS 4 
-
-typedef struct ppu_pthread_data 
-{
-	spe_context_ptr_t context;
-	pthread_t pthread;
-	unsigned int entry;
-	unsigned int flags;
-	addr64 argp;
-	addr64 envp;
-	spe_stop_info_t stopinfo;
-} ppu_pthread_data_t;
-
-
-static void *ppu_pthread_function(void *arg)
-{
-    ppu_pthread_data_t * datap = (ppu_pthread_data_t *)arg;
-    /*
-    int rc;
-    do 
-    {*/
-        spe_context_run(datap->context, &datap->entry, datap->flags, datap->argp.p, datap->envp.p, &datap->stopinfo);
-        if (datap->stopinfo.stop_reason == SPE_EXIT) 
-        {
-           if (datap->stopinfo.result.spe_exit_code != 0) 
-           {
-             perror("FAILED: SPE returned a non-zero exit status: \n");
-             exit(1);
-           }
-         } 
-        else 
-         {
-           perror("FAILED: SPE abnormally terminated\n");
-           exit(1);
-         }
-        
-        
-    //} while (rc > 0); // loop until exit or error, and while any stop & signal
-    pthread_exit(NULL);
-}
-
-
-
-
-
-
-///SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-class SpuLibspe2Support : public btThreadSupportInterface
-{
-
-	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
-	
-public:
-	//Setup and initialize SPU/CELL/Libspe2
-	SpuLibspe2Support(spe_program_handle_t *speprog,int numThreads);
-	
-	// SPE program handle ptr.
-	spe_program_handle_t *program;
-	
-	// SPE program data
-	ppu_pthread_data_t data[MAX_SPUS];
-	
-	//cleanup/shutdown Libspe2
-	~SpuLibspe2Support();
-
-	///send messages to SPUs
-	void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1=0);
-
-	//check for messages from SPUs
-	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
-
-	//start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	virtual void startSPU();
-
-	//tell the task scheduler we are done with the SPU tasks
-	virtual void stopSPU();
-
-	virtual void setNumTasks(int numTasks)
-	{
-		//changing the number of tasks after initialization is not implemented (yet)
-	}
-
-private:
-	
-	///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	void internal_startSPU();
-
-
-	
-	
-	int numThreads;
-
-};
-
-#endif // NOT __SPU__
-
-#endif //USE_LIBSPE2
-
-#endif //SPU_LIBSPE2_SUPPORT_H
-
-
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
deleted file mode 100644
index 9bc2ebf51ec..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef __BOX_H__
-#define __BOX_H__
-
-
-#ifndef PE_REF
-#define PE_REF(a) a&
-#endif
-
-#include <math.h>
-
-///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH
-#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH)
-#include <vectormath_aos.h>
-#else
-#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
-#endif
-
-
-
-using namespace Vectormath::Aos;
-
-enum FeatureType { F, E, V };
-
-//----------------------------------------------------------------------------
-// Box
-//----------------------------------------------------------------------------
-///The Box is an internal class used by the boxBoxDistance calculation.
-class Box
-{
-public:
-	Vector3 half;
-
-	inline Box()
-	{}
-	inline Box(PE_REF(Vector3) half_);
-	inline Box(float hx, float hy, float hz);
-
-	inline void Set(PE_REF(Vector3) half_);
-	inline void Set(float hx, float hy, float hz);
-
-	inline Vector3 GetAABB(const Matrix3& rotation) const;
-};
-
-inline
-Box::Box(PE_REF(Vector3) half_)
-{
-	Set(half_);
-}
-
-inline
-Box::Box(float hx, float hy, float hz)
-{
-	Set(hx, hy, hz);
-}
-
-inline
-void
-Box::Set(PE_REF(Vector3) half_)
-{
-	half = half_;
-}
-
-inline
-void
-Box::Set(float hx, float hy, float hz)
-{
-	half = Vector3(hx, hy, hz);
-}
-
-inline
-Vector3
-Box::GetAABB(const Matrix3& rotation) const
-{
-	return absPerElem(rotation) * half;
-}
-
-//-------------------------------------------------------------------------------------------------
-// BoxPoint
-//-------------------------------------------------------------------------------------------------
-
-///The BoxPoint class is an internally used class to contain feature information for boxBoxDistance calculation.
-class BoxPoint
-{
-public:
-	BoxPoint() : localPoint(0.0f) {}
-
-	Point3      localPoint;
-	FeatureType featureType;
-	int         featureIdx;
-
-	inline void setVertexFeature(int plusX, int plusY, int plusZ);
-	inline void setEdgeFeature(int dim0, int plus0, int dim1, int plus1);
-	inline void setFaceFeature(int dim, int plus);
-
-	inline void getVertexFeature(int & plusX, int & plusY, int & plusZ) const;
-	inline void getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const;
-	inline void getFaceFeature(int & dim, int & plus) const;
-};
-
-inline
-void
-BoxPoint::setVertexFeature(int plusX, int plusY, int plusZ)
-{
-	featureType = V;
-	featureIdx = plusX << 2 | plusY << 1 | plusZ;
-}
-
-inline
-void
-BoxPoint::setEdgeFeature(int dim0, int plus0, int dim1, int plus1)
-{
-	featureType = E;
-
-	if (dim0 > dim1) {
-		featureIdx = plus1 << 5 | dim1 << 3 | plus0 << 2 | dim0;
-	} else {
-		featureIdx = plus0 << 5 | dim0 << 3 | plus1 << 2 | dim1;
-	}
-}
-
-inline
-void
-BoxPoint::setFaceFeature(int dim, int plus)
-{
-	featureType = F;
-	featureIdx = plus << 2 | dim;
-}
-
-inline
-void
-BoxPoint::getVertexFeature(int & plusX, int & plusY, int & plusZ) const
-{
-	plusX = featureIdx >> 2;
-	plusY = featureIdx >> 1 & 1;
-	plusZ = featureIdx & 1;
-}
-
-inline
-void
-BoxPoint::getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const
-{
-	plus0 = featureIdx >> 5;
-	dim0 = featureIdx >> 3 & 3;
-	plus1 = featureIdx >> 2 & 1;
-	dim1 = featureIdx & 3;
-}
-
-inline
-void
-BoxPoint::getFaceFeature(int & dim, int & plus) const
-{
-	plus = featureIdx >> 2;
-	dim = featureIdx & 3;
-}
-
-#endif /* __BOX_H__ */
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
deleted file mode 100644
index dfcd8426695..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-#include "SpuCollisionShapes.h"
-
-///not supported on IBM SDK, until we fix the alignment of btVector3
-#if defined (__CELLOS_LV2__) && defined (__SPU__)
-#include <spu_intrinsics.h>
-static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
-}
-#endif //__SPU__
-
-
-void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform)
-{
-	//calculate the aabb, given the types...
-	switch (shapeType)
-	{
-	case CYLINDER_SHAPE_PROXYTYPE:
-		/* fall through */
-	case BOX_SHAPE_PROXYTYPE:
-	{
-		btScalar margin=convexShape->getMarginNV();
-		btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
-		halfExtents += btVector3(margin,margin,margin);
-		const btTransform& t = xform;
-		btMatrix3x3 abs_b = t.getBasis().absolute();  
-		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
-		
-		aabbMin = center - extent;
-		aabbMax = center + extent;
-		break;
-	}
-	case CAPSULE_SHAPE_PROXYTYPE:
-	{
-		btScalar margin=convexShape->getMarginNV();
-		btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
-		//add the radius to y-axis to get full height
-		btScalar radius = halfExtents[0];
-		halfExtents[1] += radius;
-		halfExtents += btVector3(margin,margin,margin);
-#if 0
-		int capsuleUpAxis = convexShape->getUpAxis();
-		btScalar halfHeight = convexShape->getHalfHeight();
-		btScalar radius = convexShape->getRadius();
-		halfExtents[capsuleUpAxis] = radius + halfHeight;
-#endif
-		const btTransform& t = xform;
-		btMatrix3x3 abs_b = t.getBasis().absolute();  
-		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
-		
-		aabbMin = center - extent;
-		aabbMax = center + extent;
-		break;
-	}
-	case SPHERE_SHAPE_PROXYTYPE:
-	{
-		btScalar radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
-		btScalar margin = radius + convexShape->getMarginNV();
-		const btTransform& t = xform;
-		const btVector3& center = t.getOrigin();
-		btVector3 extent(margin,margin,margin);
-		aabbMin = center - extent;
-		aabbMax = center + extent;
-		break;
-	}
-	case CONVEX_HULL_SHAPE_PROXYTYPE:
-	{
-		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
-		cellDmaGet(&convexHullShape0, convexShapePtr  , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-		btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0;
-		const btTransform& t = xform;
-		btScalar margin = convexShape->getMarginNV();
-		localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin);
-		//spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ());
-		//spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ());
-		break;
-	}
-	default:
-		{
-	//	spu_printf("SPU: unsupported shapetype %d in AABB calculation\n");
-		}
-	};
-}
-
-void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape)
-{
-	register int dmaSize;
-	register ppu_address_t	dmaPpuAddress2;
-
-	dmaSize = sizeof(btTriangleIndexVertexArray);
-	dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getMeshInterface());
-	//	spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
-#ifdef __SPU__
-	cellDmaGet(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-	bvhMeshShape->gTriangleMeshInterfacePtr = &bvhMeshShape->gTriangleMeshInterfaceStorage;
-#else
-	bvhMeshShape->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-#endif
-
-	//cellDmaWaitTagStatusAll(DMA_MASK(1));
-	
-	///now DMA over the BVH
-	
-	dmaSize = sizeof(btOptimizedBvh);
-	dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getOptimizedBvh());
-	//spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2);
-	cellDmaGet(&bvhMeshShape->gOptimizedBvh, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);
-	//cellDmaWaitTagStatusAll(DMA_MASK(2));
-	cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-}
-
-void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag)
-{		
-	cellDmaGet(IndexMesh, (ppu_address_t)&indexArray[index]  , sizeof(btIndexedMesh), DMA_TAG(dmaTag), 0, 0);
-	
-}
-
-void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag)
-{
-	cellDmaGet(subTreeHeaders, subTreePtr, batchSize * sizeof(btBvhSubtreeInfo), DMA_TAG(dmaTag), 0, 0);
-}
-
-void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray&	nodeArray, int dmaTag)
-{
-	cellDmaGet(nodes, reinterpret_cast<ppu_address_t>(&nodeArray[subtree.m_rootNodeIndex]) , subtree.m_subtreeSize* sizeof(btQuantizedBvhNode), DMA_TAG(2), 0, 0);
-}
-
-///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck
-int		getShapeTypeSize(int shapeType)
-{
-
-
-	switch (shapeType)
-	{
-	case CYLINDER_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btCylinderShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-	case BOX_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btBoxShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-	case SPHERE_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btSphereShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-	case TRIANGLE_MESH_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btBvhTriangleMeshShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-	case CAPSULE_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btCapsuleShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-
-	case CONVEX_HULL_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btConvexHullShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-
-	case COMPOUND_SHAPE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btCompoundShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-	case STATIC_PLANE_PROXYTYPE:
-		{
-			int shapeSize = sizeof(btStaticPlaneShape);
-			btAssert(shapeSize < MAX_SHAPE_SIZE);
-			return shapeSize;
-		}
-
-	default:
-		btAssert(0);
-		//unsupported shapetype, please add here
-		return 0;
-	}
-}
-
-void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU)
-{
-	convexVertexData->gNumConvexPoints = convexShapeSPU->getNumPoints();
-	if (convexVertexData->gNumConvexPoints>MAX_NUM_SPU_CONVEX_POINTS)
-	{
-		btAssert(0);
-	//	spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,convexVertexData->gNumConvexPoints);
-		return;
-	}
-			
-	register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btVector3);
-	ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getUnscaledPoints();
-	cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU  , dmaSize, DMA_TAG(2), 0, 0);
-}
-
-void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType)
-{
-	register int dmaSize = getShapeTypeSize(shapeType);
-	cellDmaGet(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
-	//cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
-	//cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
-}
-
-void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
-{
-	register int dmaSize;
-	register	ppu_address_t	dmaPpuAddress2;
-	int childShapeCount = spuCompoundShape->getNumChildShapes();
-	dmaSize = childShapeCount * sizeof(btCompoundShapeChild);
-	dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList();
-	cellDmaGet(&compoundShapeLocation->gSubshapes[0], dmaPpuAddress2, dmaSize, DMA_TAG(dmaTag), 0, 0);
-}
-
-void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
-{
-	int childShapeCount = spuCompoundShape->getNumChildShapes();
-	int i;
-	// DMA all the subshapes 
-	for ( i = 0; i < childShapeCount; ++i)
-	{
-		btCompoundShapeChild& childShape = compoundShapeLocation->gSubshapes[i];
-		dmaCollisionShape (&compoundShapeLocation->gSubshapeShape[i],(ppu_address_t)childShape.m_childShape, dmaTag, childShape.m_childShapeType);
-	}
-}
-
-
-void	spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex)
-{
-
-	int curIndex = startNodeIndex;
-	int walkIterations = 0;
-#ifdef BT_DEBUG
-	int subTreeSize = endNodeIndex - startNodeIndex;
-#endif
-
-	int escapeIndex;
-
-	unsigned int aabbOverlap, isLeafNode;
-
-	while (curIndex < endNodeIndex)
-	{
-		//catch bugs in tree data
-		btAssert (walkIterations < subTreeSize);
-
-		walkIterations++;
-		aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
-		isLeafNode = rootNode->isLeafNode();
-
-		if (isLeafNode && aabbOverlap)
-		{
-			//printf("overlap with node %d\n",rootNode->getTriangleIndex());
-			nodeCallback->processNode(0,rootNode->getTriangleIndex());
-			//			spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex());
-		} 
-
-		if (aabbOverlap || isLeafNode)
-		{
-			rootNode++;
-			curIndex++;
-		} else
-		{
-			escapeIndex = rootNode->getEscapeIndex();
-			rootNode += escapeIndex;
-			curIndex += escapeIndex;
-		}
-	}
-
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
deleted file mode 100644
index d369395e160..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-#ifndef __SPU_COLLISION_SHAPES_H
-#define __SPU_COLLISION_SHAPES_H
-
-#include "../SpuDoubleBuffer.h"
-
-#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
-#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
-#include "BulletCollision/CollisionShapes/btCylinderShape.h"
-#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
-
-#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
-#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
-#include "BulletCollision/CollisionShapes/btSphereShape.h"
-
-#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
-
-#include "BulletCollision/CollisionShapes/btConvexShape.h"
-#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
-#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
-#include "BulletCollision/CollisionShapes/btCompoundShape.h"
-
-#define MAX_NUM_SPU_CONVEX_POINTS 128
-
-ATTRIBUTE_ALIGNED16(struct)	SpuConvexPolyhedronVertexData
-{
-	void*	gSpuConvexShapePtr;
-	btVector3* gConvexPoints;
-	int gNumConvexPoints;
-	int unused;
-	ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
-};
-
-#define MAX_SHAPE_SIZE 256
-
-ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
-{
-	ATTRIBUTE_ALIGNED16(char collisionShape[MAX_SHAPE_SIZE]);
-};
-
-ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory
-{
-	// Compound data
-#define MAX_SPU_COMPOUND_SUBSHAPES 16
-	ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
-	ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
-};
-
-ATTRIBUTE_ALIGNED16(struct) bvhMeshShape_LocalStoreMemory
-{
-	//ATTRIBUTE_ALIGNED16(btOptimizedBvh	gOptimizedBvh);
-	ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]);
-	btOptimizedBvh*	getOptimizedBvh()
-	{
-		return (btOptimizedBvh*) gOptimizedBvh;
-	}
-
-	ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray	gTriangleMeshInterfaceStorage);
-	btTriangleIndexVertexArray*	gTriangleMeshInterfacePtr;
-	///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment 
-	ATTRIBUTE_ALIGNED16(btIndexedMesh	gIndexMesh);
-	#define MAX_SPU_SUBTREE_HEADERS 32
-	//1024
-	ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo	gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]);
-	ATTRIBUTE_ALIGNED16(btQuantizedBvhNode	gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]);
-};
-
-
-void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform);
-void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape);
-void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag);
-void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag);
-void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray&	nodeArray, int dmaTag);
-
-int  getShapeTypeSize(int shapeType);
-void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU);
-void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType);
-void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
-void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
-
-
-#define USE_BRANCHFREE_TEST 1
-#ifdef USE_BRANCHFREE_TEST
-SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
-{		
-#if defined(__CELLOS_LV2__) && defined (__SPU__)
-	vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0};
-	vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0};
-	vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax);
-	return spu_extract(spu_gather(isGt),0)==0;
-
-#else
-	return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
-		& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
-		& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
-		1, 0);
-#endif
-}
-#else
-
-SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int*  aabbMax2)
-{
-	unsigned int overlap = 1;
-	overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap;
-	overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap;
-	overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap;
-	return overlap;
-}
-#endif
-
-void	spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex);
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
deleted file mode 100644
index 8e540d9297b..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuContactResult.h"
-
-//#define DEBUG_SPU_COLLISION_DETECTION 1
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-#ifndef __SPU__
-#include <stdio.h>
-#define spu_printf printf
-#endif
-#endif DEBUG_SPU_COLLISION_DETECTION
-
-SpuContactResult::SpuContactResult()
-{
-	m_manifoldAddress = 0;
-	m_spuManifold = NULL;
-	m_RequiresWriteBack = false;
-}
-
- SpuContactResult::~SpuContactResult()
-{
-	g_manifoldDmaExport.swapBuffers();
-}
-
- 	///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
-inline btScalar	calculateCombinedFriction(btScalar friction0,btScalar friction1)
-{
-	btScalar friction = friction0*friction1;
-
-	const btScalar MAX_FRICTION  = btScalar(10.);
-
-	if (friction < -MAX_FRICTION)
-		friction = -MAX_FRICTION;
-	if (friction > MAX_FRICTION)
-		friction = MAX_FRICTION;
-	return friction;
-
-}
-
-inline btScalar	calculateCombinedRestitution(btScalar restitution0,btScalar restitution1)
-{
-	return restitution0*restitution1;
-}
-
-
-
- void	SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t	manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped)
- {
-	//spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress);
-	m_rootWorldTransform0 = worldTrans0;
-	m_rootWorldTransform1 = worldTrans1;
-	m_manifoldAddress = manifoldAddress;    
-	m_spuManifold = spuManifold;
-
-	m_combinedFriction = calculateCombinedFriction(friction0,friction1);
-	m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1);
-	m_isSwapped = isSwapped;
- }
-
- void SpuContactResult::setShapeIdentifiersA(int partId0,int index0)
- {
-	
- }
-
- void SpuContactResult::setShapeIdentifiersB(int partId1,int index1)
- {
-	
- }
-
-
-
- ///return true if it requires a dma transfer back
-bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
-								   const btVector3& pointInWorld,
-								   float depth,
-								   btPersistentManifold* manifoldPtr,
-								   btTransform& transA,
-								   btTransform& transB,
-									btScalar	combinedFriction,
-									btScalar	combinedRestitution,
-								   bool isSwapped)
-{
-	
-//	float contactTreshold = manifoldPtr->getContactBreakingThreshold();
-
-	//spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr);
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-	spu_printf("SPU: contactTreshold %f\n",contactTreshold);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-	if (depth > manifoldPtr->getContactBreakingThreshold())
-		return false;
-
-	btVector3 pointA;
-	btVector3 localA;
-	btVector3 localB;
-	btVector3 normal;
-
-
-	if (isSwapped)
-	{
-		normal = normalOnBInWorld * -1;
-		pointA = pointInWorld + normal * depth;
-		localA = transA.invXform(pointA );
-		localB = transB.invXform(pointInWorld);
-	}
-	else
-	{
-		normal = normalOnBInWorld;
-		pointA = pointInWorld + normal * depth;
-		localA = transA.invXform(pointA );
-		localB = transB.invXform(pointInWorld);
-	}
-
-	btManifoldPoint newPt(localA,localB,normal,depth);
-	newPt.m_positionWorldOnA = pointA;
-	newPt.m_positionWorldOnB = pointInWorld;
-
-	newPt.m_combinedFriction = combinedFriction;
-	newPt.m_combinedRestitution = combinedRestitution;
-
-
-	int insertIndex = manifoldPtr->getCacheEntry(newPt);
-	if (insertIndex >= 0)
-	{
-		// we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc)
-		manifoldPtr->replaceContactPoint(newPt,insertIndex);
-		return true;
-		
-	} else
-	{
-
-		/*
-		///@todo: SPU callbacks, either immediate (local on the SPU), or deferred
-		//User can override friction and/or restitution
-		if (gContactAddedCallback &&
-			//and if either of the two bodies requires custom material
-			 ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) ||
-			   (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback)))
-		{
-			//experimental feature info, for per-triangle material etc.
-			(*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
-		}
-		*/
-		manifoldPtr->addManifoldPoint(newPt);
-		return true;
-
-	}
-	return false;
-	
-}
-
-
-void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold)
-{
-	///only write back the contact information on SPU. Other platforms avoid copying, and use the data in-place
-	///see SpuFakeDma.cpp 'cellDmaLargeGetReadOnly'
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-    memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold));
-
-    g_manifoldDmaExport.swapBuffers();
-    ppu_address_t mmAddr = (ppu_address_t)mmManifold;
-    g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9));
-	// Should there be any kind of wait here?  What if somebody tries to use this tag again?  What if we call this function again really soon?
-	//no, the swapBuffers does the wait
-#endif
-}
-
-void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
-{
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-	spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
-	spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
-	spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
-#endif //DEBUG_SPU_COLLISION_DETECTION
-	
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
- //   int sman = sizeof(rage::phManifold);
-//	spu_printf("sizeof_manifold = %i\n",sman);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-
-	btPersistentManifold* localManifold = m_spuManifold;
-
-	btVector3	normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
-	btVector3	pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
-
-	//process the contact point
-	const bool retVal = ManifoldResultAddContactPoint(normalB,
-		pointWrld,
-		depth,
-		localManifold,
-		m_rootWorldTransform0,
-		m_rootWorldTransform1,
-		m_combinedFriction,
-		m_combinedRestitution,
-		m_isSwapped);
-	m_RequiresWriteBack = m_RequiresWriteBack || retVal;
-}
-
-void SpuContactResult::flush()
-{
-
-	if (m_spuManifold && m_spuManifold->getNumContacts())
-	{
-		m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1);
-		m_RequiresWriteBack = true;
-	}
-
-
-	if (m_RequiresWriteBack)
-	{
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n");
-		spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts());
-		spu_printf("Manifold address: %llu\n", m_manifoldAddress);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-	//	spu_printf("writeDoubleBufferedManifold\n");
-		writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress);
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("SPU: Finished (Put) DMA\n");
-#endif //DEBUG_SPU_COLLISION_DETECTION
-	}
-	m_spuManifold = NULL;
-	m_RequiresWriteBack = false;
-}
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
deleted file mode 100644
index 394f56dcbd1..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_CONTACT_RESULT2_H
-#define SPU_CONTACT_RESULT2_H
-
-
-#ifndef _WIN32
-#include <stdint.h>
-#endif
-
-
-
-#include "../SpuDoubleBuffer.h"
-
-
-#include "LinearMath/btTransform.h"
-
-
-#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
-#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
-
-class btCollisionShape;
-
-
-struct SpuCollisionPairInput
-{
-	ppu_address_t m_collisionShapes[2];
-	btCollisionShape*	m_spuCollisionShapes[2];
-
-	ppu_address_t m_persistentManifoldPtr;
-	btVector3	m_primitiveDimensions0;
-	btVector3	m_primitiveDimensions1;
-	int		m_shapeType0;
-	int		m_shapeType1;	
-	float	m_collisionMargin0;
-	float	m_collisionMargin1;
-
-	btTransform	m_worldTransform0;
-	btTransform m_worldTransform1;
-	
-	bool	m_isSwapped;
-	bool    m_useEpa;
-};
-
-
-struct SpuClosestPointInput : public btDiscreteCollisionDetectorInterface::ClosestPointInput
-{
-	struct SpuConvexPolyhedronVertexData* m_convexVertexData[2];
-};
-
-///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed
-///So when an existing contact point is duplicated, no transfer/refresh is performed.
-class SpuContactResult : public btDiscreteCollisionDetectorInterface::Result
-{
-    btTransform		m_rootWorldTransform0;
-	btTransform		m_rootWorldTransform1;
-	ppu_address_t	m_manifoldAddress;
-
-    btPersistentManifold* m_spuManifold;
-	bool m_RequiresWriteBack;
-	btScalar	m_combinedFriction;
-	btScalar	m_combinedRestitution;
-	
-	bool m_isSwapped;
-
-	DoubleBuffer<btPersistentManifold, 1> g_manifoldDmaExport;
-
-	public:
-		SpuContactResult();
-		virtual ~SpuContactResult();
-
-		btPersistentManifold*	GetSpuManifold() const
-		{
-			return m_spuManifold;
-		}
-
-		virtual void setShapeIdentifiersA(int partId0,int index0);
-		virtual void setShapeIdentifiersB(int partId1,int index1);
-
-		void	setContactInfo(btPersistentManifold* spuManifold, ppu_address_t	manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped);
-
-
-        void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold);
-
-        virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth);
-
-		void flush();
-};
-
-
-
-#endif //SPU_CONTACT_RESULT2_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
deleted file mode 100644
index 449f19288c4..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-#ifndef SPU_CONVEX_PENETRATION_DEPTH_H
-#define SPU_CONVEX_PENETRATION_DEPTH_H
-
-
-
-class btStackAlloc;
-class btIDebugDraw;
-#include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h"
-
-#include "LinearMath/btTransform.h"
-
-
-///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
-class SpuConvexPenetrationDepthSolver : public btConvexPenetrationDepthSolver
-{
-public:	
-	
-	virtual ~SpuConvexPenetrationDepthSolver() {};
-	virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
-	        void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
-            btTransform& transA,const btTransform& transB,
-			btVector3& v, btVector3& pa, btVector3& pb,
-			class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
-			struct SpuConvexPolyhedronVertexData* convexVertexDataA,
-			struct SpuConvexPolyhedronVertexData* convexVertexDataB
-			) const = 0;
-
-
-};
-
-
-
-#endif //SPU_CONVEX_PENETRATION_DEPTH_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
deleted file mode 100644
index c3dfaa793e3..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
+++ /dev/null
@@ -1,1381 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuGatheringCollisionTask.h"
-
-//#define DEBUG_SPU_COLLISION_DETECTION 1
-#include "../SpuDoubleBuffer.h"
-
-#include "../SpuCollisionTaskProcess.h"
-#include "../SpuGatheringCollisionDispatcher.h" //for SPU_BATCHSIZE_BROADPHASE_PAIRS
-
-#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
-#include "../SpuContactManifoldCollisionAlgorithm.h"
-#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-#include "SpuContactResult.h"
-#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
-#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
-#include "BulletCollision/CollisionShapes/btSphereShape.h"
-#include "BulletCollision/CollisionShapes/btConvexPointCloudShape.h"
-
-#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
-
-#include "BulletCollision/CollisionShapes/btConvexShape.h"
-#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
-#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
-#include "BulletCollision/CollisionShapes/btCompoundShape.h"
-
-#include "SpuMinkowskiPenetrationDepthSolver.h"
-//#include "SpuEpaPenetrationDepthSolver.h"
-#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
-
-
-#include "boxBoxDistance.h"
-#include "BulletMultiThreaded/vectormath2bullet.h"
-#include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData
-#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h"
-#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
-#include "BulletCollision/CollisionShapes/btTriangleShape.h"
-
-#ifdef __SPU__
-///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases
-#ifndef USE_LIBSPE2
-#define USE_SOFTWARE_CACHE 1
-#endif
-#endif //__SPU__
-
-int gSkippedCol = 0;
-int gProcessedCol = 0;
-
-////////////////////////////////////////////////
-/// software caching
-#if USE_SOFTWARE_CACHE
-#include <spu_intrinsics.h>
-#include <sys/spu_thread.h>
-#include <sys/spu_event.h>
-#include <stdint.h>
-#define SPE_CACHE_NWAY   		4
-//#define SPE_CACHE_NSETS 		32, 16
-#define SPE_CACHE_NSETS 		8
-//#define SPE_CACHELINE_SIZE 		512
-#define SPE_CACHELINE_SIZE 		128
-#define SPE_CACHE_SET_TAGID(set) 	15
-///make sure that spe_cache.h is below those defines!
-#include "../Extras/software_cache/cache/include/spe_cache.h"
-
-
-int g_CacheMisses=0;
-int g_CacheHits=0;
-
-#if 0 // Added to allow cache misses and hits to be tracked, change this to 1 to restore unmodified version
-#define spe_cache_read(ea)		_spe_cache_lookup_xfer_wait_(ea, 0, 1)
-#else
-#define spe_cache_read(ea)		\
-({								\
-    int set, idx, line, byte;					\
-    _spe_cache_nway_lookup_(ea, set, idx);			\
-								\
-    if (btUnlikely(idx < 0)) {					\
-        ++g_CacheMisses;                        \
-	    idx = _spe_cache_miss_(ea, set, -1);			\
-        spu_writech(22, SPE_CACHE_SET_TAGMASK(set));		\
-        spu_mfcstat(MFC_TAG_UPDATE_ALL);			\
-    } 								\
-    else                            \
-    {                               \
-        ++g_CacheHits;              \
-    }                               \
-    line = _spe_cacheline_num_(set, idx);			\
-    byte = _spe_cacheline_byte_offset_(ea);			\
-    (void *) &spe_cache_mem[line + byte];			\
-})
-
-#endif
-
-#endif // USE_SOFTWARE_CACHE
-
-bool gUseEpa = false;
-
-#ifdef USE_SN_TUNER
-#include <LibSN_SPU.h>
-#endif //USE_SN_TUNER
-
-#if defined (__SPU__) && !defined (USE_LIBSPE2)
-#include <spu_printf.h>
-#elif defined (USE_LIBSPE2)
-#define spu_printf(a)
-#else
-#define IGNORE_ALIGNMENT 1
-#include <stdio.h>
-#include <stdlib.h>
-#define spu_printf printf
-
-#endif
-
-//int gNumConvexPoints0=0;
-
-///Make sure no destructors are called on this memory
-struct	CollisionTask_LocalStoreMemory
-{
-	///This CollisionTask_LocalStoreMemory is mainly used for the SPU version, using explicit DMA
-	///Other platforms can use other memory programming models.
-
-	ATTRIBUTE_ALIGNED16(btBroadphasePair	gBroadphasePairsBuffer[SPU_BATCHSIZE_BROADPHASE_PAIRS]);
-	DoubleBuffer<unsigned char, MIDPHASE_WORKUNIT_PAGE_SIZE> g_workUnitTaskBuffers;
-	ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgoBuffer [sizeof(SpuContactManifoldCollisionAlgorithm)+16]);
-	ATTRIBUTE_ALIGNED16(char gColObj0Buffer [sizeof(btCollisionObject)+16]);
-	ATTRIBUTE_ALIGNED16(char gColObj1Buffer [sizeof(btCollisionObject)+16]);
-	///we reserve 32bit integer indices, even though they might be 16bit
-	ATTRIBUTE_ALIGNED16(int	spuIndices[16]);
-	btPersistentManifold	gPersistentManifoldBuffer;
-	CollisionShape_LocalStoreMemory gCollisionShapes[2];
-	bvhMeshShape_LocalStoreMemory bvhShapeData;
-	SpuConvexPolyhedronVertexData convexVertexData[2];
-	CompoundShape_LocalStoreMemory compoundShapeData[2];
-		
-	///The following pointers might either point into this local store memory, or to the original/other memory locations.
-	///See SpuFakeDma for implementation of cellDmaSmallGetReadOnly.
-	btCollisionObject*	m_lsColObj0Ptr;
-	btCollisionObject*	m_lsColObj1Ptr;
-	btBroadphasePair* m_pairsPointer;
-	btPersistentManifold*	m_lsManifoldPtr;
-	SpuContactManifoldCollisionAlgorithm*	m_lsCollisionAlgorithmPtr;
-
-	bool	needsDmaPutContactManifoldAlgo;
-
-	btCollisionObject* getColObj0()
-	{
-		return m_lsColObj0Ptr;
-	}
-	btCollisionObject* getColObj1()
-	{
-		return m_lsColObj1Ptr;
-	}
-
-
-	btBroadphasePair* getBroadphasePairPtr()
-	{
-		return m_pairsPointer;
-	}
-
-	SpuContactManifoldCollisionAlgorithm*	getlocalCollisionAlgorithm()
-	{
-		return m_lsCollisionAlgorithmPtr;
-	}
-	
-	btPersistentManifold*	getContactManifoldPtr()
-	{
-		return m_lsManifoldPtr;
-	}
-};
-
-
-#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) 
-
-ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory	gLocalStoreMemory);
-
-void* createCollisionLocalStoreMemory()
-{
-	return &gLocalStoreMemory;
-}
-#else
-void* createCollisionLocalStoreMemory()
-{
-        return new CollisionTask_LocalStoreMemory;
-}
-
-#endif
-
-void	ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts);
-
-
-SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
-{
-#if USE_SOFTWARE_CACHE
-	// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
-	// so the first and last bytes should fall on the same cache line
-	btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
-
-	void* ls = spe_cache_read(ea);
-	memcpy(buffer, ls, size);
-#else
-	stallingUnalignedDmaSmallGet(buffer,ea,size);
-#endif
-}
-
-SIMD_FORCE_INLINE void small_cache_read_triple(	void* ls0, ppu_address_t ea0,
-												void* ls1, ppu_address_t ea1,
-												void* ls2, ppu_address_t ea2,
-												size_t size)
-{
-		btAssert(size<16);
-		ATTRIBUTE_ALIGNED16(char	tmpBuffer0[32]);
-		ATTRIBUTE_ALIGNED16(char	tmpBuffer1[32]);
-		ATTRIBUTE_ALIGNED16(char	tmpBuffer2[32]);
-
-		uint32_t i;
-		
-
-		///make sure last 4 bits are the same, for cellDmaSmallGet
-		char* localStore0 = (char*)ls0;
-		uint32_t last4BitsOffset = ea0 & 0x0f;
-		char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
-#ifdef __SPU__
-		cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
-#else
-		tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
-#endif
-
-
-		char* localStore1 = (char*)ls1;
-		last4BitsOffset = ea1 & 0x0f;
-		char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
-#ifdef __SPU__
-		cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
-#else
-		tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
-#endif
-		
-		char* localStore2 = (char*)ls2;
-		last4BitsOffset = ea2 & 0x0f;
-		char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
-#ifdef __SPU__
-		cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
-#else
-		tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
-#endif
-		
-		
-		cellDmaWaitTagStatusAll( DMA_MASK(1) );
-
-		//this is slowish, perhaps memcpy on SPU is smarter?
-		for (i=0; btLikely( i<size );i++)
-		{
-			localStore0[i] = tmpTarget0[i];
-			localStore1[i] = tmpTarget1[i];
-			localStore2[i] = tmpTarget2[i];
-		}
-
-		
-}
-
-
-
-
-class spuNodeCallback : public btNodeOverlapCallback
-{
-	SpuCollisionPairInput* m_wuInput;
-	SpuContactResult&		m_spuContacts;
-	CollisionTask_LocalStoreMemory*	m_lsMemPtr;
-	ATTRIBUTE_ALIGNED16(btTriangleShape)	m_tmpTriangleShape;
-
-	ATTRIBUTE_ALIGNED16(btVector3	spuTriangleVertices[3]);
-	ATTRIBUTE_ALIGNED16(btScalar	spuUnscaledVertex[4]);
-	
-
-
-public:
-	spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory*	lsMemPtr,SpuContactResult& spuContacts)
-		:	m_wuInput(wuInput),
-		m_spuContacts(spuContacts),
-		m_lsMemPtr(lsMemPtr)
-	{
-	}
-
-	virtual void processNode(int subPart, int triangleIndex)
-	{
-		///Create a triangle on the stack, call process collision, with GJK
-		///DMA the vertices, can benefit from software caching
-
-		//		spu_printf("processNode with triangleIndex %d\n",triangleIndex);
-
-		if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
-		{
-			unsigned short int* indexBasePtr = (unsigned short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
-			ATTRIBUTE_ALIGNED16(unsigned short int tmpIndices[3]);
-
-			small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
-									&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
-									&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
-									sizeof(unsigned short int));
-
-			m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
-			m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
-			m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
-		} else
-		{
-			unsigned int* indexBasePtr = (unsigned int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
-
-			small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
-								&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
-								&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
-								sizeof(int));
-		}
-		
-		//		spu_printf("SPU index0=%d ,",spuIndices[0]);
-		//		spu_printf("SPU index1=%d ,",spuIndices[1]);
-		//		spu_printf("SPU index2=%d ,",spuIndices[2]);
-		//		spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
-
-		const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
-		for (int j=2;btLikely( j>=0 );j--)
-		{
-			int graphicsindex = m_lsMemPtr->spuIndices[j];
-
-			//			spu_printf("SPU index=%d ,",graphicsindex);
-			btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
-			//			spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
-
-
-			///handle un-aligned vertices...
-
-			//another DMA for each vertex
-			small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
-									&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
-									&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
-									sizeof(btScalar));
-			
-			m_tmpTriangleShape.getVertexPtr(j).setValue(spuUnscaledVertex[0]*meshScaling.getX(),
-				spuUnscaledVertex[1]*meshScaling.getY(),
-				spuUnscaledVertex[2]*meshScaling.getZ());
-
-			//			spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
-		}
-
-
-		SpuCollisionPairInput triangleConcaveInput(*m_wuInput);
-//		triangleConcaveInput.m_spuCollisionShapes[1] = &spuTriangleVertices[0];
-		triangleConcaveInput.m_spuCollisionShapes[1] = &m_tmpTriangleShape;
-		triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE;
-
-		m_spuContacts.setShapeIdentifiersB(subPart,triangleIndex);
-
-		//		m_spuContacts.flush();
-
-		ProcessSpuConvexConvexCollision(&triangleConcaveInput, m_lsMemPtr,m_spuContacts);
-		///this flush should be automatic
-		//	m_spuContacts.flush();
-	}
-
-};
-
-
-
-void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult&  spuContacts)
-{
-	
-	btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0];
-	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1];
-
-    bool hasCollision = false;
-	const btVector3& planeNormal = planeShape->getPlaneNormal();
-	const btScalar& planeConstant = planeShape->getPlaneConstant();
-	
-	
-	btTransform convexWorldTransform = wuInput->m_worldTransform0;
-	btTransform convexInPlaneTrans;
-	convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform;
-	btTransform planeInConvex;
-	planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1;
-	
-	//btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal);
-	btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal);
-
-	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
-	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
-
-	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
-	btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected;
-
-	hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold();
-	//resultOut->setPersistentManifold(m_manifoldPtr);
-	if (hasCollision)
-	{
-		/// report a contact. internally this will be kept persistent, and contact reduction is done
-		btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal;
-		btVector3 pOnB = vtxInPlaneWorld;
-		spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance);
-	}
-}
-
-void	ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
-{
-
-		register	int dmaSize = 0;
-		register ppu_address_t	dmaPpuAddress2;
-		btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
-
-		///DMA in the vertices for convex shapes
-		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
-		ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
-
-		if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			//	spu_printf("SPU: DMA btConvexHullShape\n");
-			
-			dmaSize = sizeof(btConvexHullShape);
-			dmaPpuAddress2 = wuInput->m_collisionShapes[0];
-
-			cellDmaGet(&convexHullShape0, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-			//cellDmaWaitTagStatusAll(DMA_MASK(1));
-		}
-
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			//	spu_printf("SPU: DMA btConvexHullShape\n");
-			dmaSize = sizeof(btConvexHullShape);
-			dmaPpuAddress2 = wuInput->m_collisionShapes[1];
-			cellDmaGet(&convexHullShape1, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-			//cellDmaWaitTagStatusAll(DMA_MASK(1));
-		}
-		
-		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{		
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
-			lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
-		}
-
-			
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
-			lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
-		}
-
-		
-		btConvexPointCloudShape cpc0,cpc1;
-
-		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(2));
-			lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
-			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
-			const btVector3& localScaling = ch->getLocalScalingNV();
-			cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
-			wuInput->m_spuCollisionShapes[0] = &cpc0;
-		}
-
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(2));		
-			lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
-			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
-			const btVector3& localScaling = ch->getLocalScalingNV();
-			cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
-			wuInput->m_spuCollisionShapes[1] = &cpc1;
-
-		}
-
-
-//		const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
-//		const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
-//		int shapeType0 = wuInput->m_shapeType0;
-//		int shapeType1 = wuInput->m_shapeType1;
-		float marginA = wuInput->m_collisionMargin0;
-		float marginB = wuInput->m_collisionMargin1;
-
-		SpuClosestPointInput	cpInput;
-		cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
-		cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
-		cpInput.m_transformA = wuInput->m_worldTransform0;
-		cpInput.m_transformB = wuInput->m_worldTransform1;
-		float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
-		cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
-
-		ppu_address_t manifoldAddress = (ppu_address_t)manifold;
-
-		btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
-		//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
-		spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
-			lsMemPtr->getColObj1()->getWorldTransform(),
-			lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
-			lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
-			wuInput->m_isSwapped);
-
-
-		btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts);
-
-
-		
-	
-}
-
-
-
-
-////////////////////////
-/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc)
-///////////////////
-void	ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
-{
-	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
-	
-	btBvhTriangleMeshShape*	trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1];
-	//need the mesh interface, for access to triangle vertices
-	dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape);
-
-	btVector3 aabbMin(-1,-400,-1);
-	btVector3 aabbMax(1,400,1);
-
-
-	//recalc aabbs
-	btTransform convexInTriangleSpace;
-	convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0;
-	btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0];
-
-	computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace);
-
-
-	//CollisionShape* triangleShape = static_cast<btCollisionShape*>(triBody->m_collisionShape);
-	//convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax);
-
-	//	btScalar extraMargin = collisionMarginTriangle;
-	//	btVector3 extra(extraMargin,extraMargin,extraMargin);
-	//	aabbMax += extra;
-	//	aabbMin -= extra;
-
-	///quantize query AABB
-	unsigned short int quantizedQueryAabbMin[3];
-	unsigned short int quantizedQueryAabbMax[3];
-	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0);
-	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1);
-
-	QuantizedNodeArray&	nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
-	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
-
-	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
-
-
-	spuNodeCallback	nodeCallback(wuInput,lsMemPtr,spuContacts);
-	IndexedMeshArray&	indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
-	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
-
-	//	spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
-	//not likely to happen
-	if (subTrees.size() && indexArray.size() == 1)
-	{
-		///DMA in the index info
-		dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-		
-		//display the headers
-		int numBatch = subTrees.size();
-		for (int i=0;i<numBatch;)
-		{
-			//@todo- can reorder DMA transfers for less stall
-			int remaining = subTrees.size() - i;
-			int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
-			
-			dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			
-
-			//			spu_printf("nextBatch = %d\n",nextBatch);
-
-			for (int j=0;j<nextBatch;j++)
-			{
-				const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
-
-				unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
-				if (overlap)
-				{
-					btAssert(subtree.m_subtreeSize);
-
-					//dma the actual nodes of this subtree
-					dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
-					cellDmaWaitTagStatusAll(DMA_MASK(2));
-
-					/* Walk this subtree */
-					spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
-						&lsMemPtr->bvhShapeData.gSubtreeNodes[0],
-						0,
-						subtree.m_subtreeSize);
-				}
-				//				spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
-			}
-
-			//	unsigned short int	m_quantizedAabbMin[3];
-			//	unsigned short int	m_quantizedAabbMax[3];
-			//	int			m_rootNodeIndex;
-			//	int			m_subtreeSize;
-			i+=nextBatch;
-		}
-
-		//pre-fetch first tree, then loop and double buffer
-	}
-
-}
-
-
-int stats[11]={0,0,0,0,0,0,0,0,0,0,0};
-int degenerateStats[11]={0,0,0,0,0,0,0,0,0,0,0};
-
-
-////////////////////////
-/// Convex versus Convex collision detection (handles collision between sphere, box, cylinder, triangle, cone, convex polyhedron etc)
-///////////////////
-void	ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
-{
-	register int dmaSize;
-	register ppu_address_t	dmaPpuAddress2;
-	
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-	//spu_printf("SPU: ProcessSpuConvexConvexCollision\n");
-#endif //DEBUG_SPU_COLLISION_DETECTION
-	//CollisionShape* shape0 = (CollisionShape*)wuInput->m_collisionShapes[0];
-	//CollisionShape* shape1 = (CollisionShape*)wuInput->m_collisionShapes[1];
-	btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
-
-	bool genericGjk = true;
-
-	if (genericGjk)
-	{
-		//try generic GJK
-
-		
-		
-		//SpuConvexPenetrationDepthSolver* penetrationSolver=0;
-		btVoronoiSimplexSolver simplexSolver;
-		btGjkEpaPenetrationDepthSolver	epaPenetrationSolver2;
-		
-		btConvexPenetrationDepthSolver* penetrationSolver = &epaPenetrationSolver2;
-		
-		//SpuMinkowskiPenetrationDepthSolver	minkowskiPenetrationSolver;
-#ifdef ENABLE_EPA
-		if (gUseEpa)
-		{
-			penetrationSolver = &epaPenetrationSolver2;
-		} else
-#endif
-		{
-			//penetrationSolver = &minkowskiPenetrationSolver;
-		}
-
-
-		///DMA in the vertices for convex shapes
-		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
-		ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
-
-		if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			//	spu_printf("SPU: DMA btConvexHullShape\n");
-			
-			dmaSize = sizeof(btConvexHullShape);
-			dmaPpuAddress2 = wuInput->m_collisionShapes[0];
-
-			cellDmaGet(&convexHullShape0, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-			//cellDmaWaitTagStatusAll(DMA_MASK(1));
-		}
-
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			//	spu_printf("SPU: DMA btConvexHullShape\n");
-			dmaSize = sizeof(btConvexHullShape);
-			dmaPpuAddress2 = wuInput->m_collisionShapes[1];
-			cellDmaGet(&convexHullShape1, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-			//cellDmaWaitTagStatusAll(DMA_MASK(1));
-		}
-		
-		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{		
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
-			lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
-		}
-
-			
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
-			lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
-		}
-
-		
-		btConvexPointCloudShape cpc0,cpc1;
-
-		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(2));
-			lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
-			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
-			const btVector3& localScaling = ch->getLocalScalingNV();
-			cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
-			wuInput->m_spuCollisionShapes[0] = &cpc0;
-		}
-
-		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
-		{
-			cellDmaWaitTagStatusAll(DMA_MASK(2));		
-			lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
-			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
-			const btVector3& localScaling = ch->getLocalScalingNV();
-			cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
-			wuInput->m_spuCollisionShapes[1] = &cpc1;
-
-		}
-
-
-		const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
-		const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
-		int shapeType0 = wuInput->m_shapeType0;
-		int shapeType1 = wuInput->m_shapeType1;
-		float marginA = wuInput->m_collisionMargin0;
-		float marginB = wuInput->m_collisionMargin1;
-
-		SpuClosestPointInput	cpInput;
-		cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
-		cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
-		cpInput.m_transformA = wuInput->m_worldTransform0;
-		cpInput.m_transformB = wuInput->m_worldTransform1;
-		float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
-		cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
-
-		ppu_address_t manifoldAddress = (ppu_address_t)manifold;
-
-		btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
-		//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
-		spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
-			lsMemPtr->getColObj1()->getWorldTransform(),
-			lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
-			lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
-			wuInput->m_isSwapped);
-
-		{
-			btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver);
-			gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw);
-			
-			stats[gjk.m_lastUsedMethod]++;
-			degenerateStats[gjk.m_degenerateSimplex]++;
-
-#ifdef USE_SEPDISTANCE_UTIL			
-			btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold();
-			lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1);
-			lsMemPtr->needsDmaPutContactManifoldAlgo = true;
-#endif //USE_SEPDISTANCE_UTIL
-
-		}
-
-	}
-
-
-}
-
-
-template<typename T> void DoSwap(T& a, T& b)
-{
-	char tmp[sizeof(T)];
-	memcpy(tmp, &a, sizeof(T));
-	memcpy(&a, &b, sizeof(T));
-	memcpy(&b, tmp, sizeof(T));
-}
-
-SIMD_FORCE_INLINE void	dmaAndSetupCollisionObjects(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem)
-{
-	register int dmaSize;
-	register ppu_address_t	dmaPpuAddress2;
-		
-	dmaSize = sizeof(btCollisionObject);//btTransform);
-	dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0();
-	lsMem.m_lsColObj0Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj0Buffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);		
-
-	dmaSize = sizeof(btCollisionObject);//btTransform);
-	dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1();
-	lsMem.m_lsColObj1Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj1Buffer, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);		
-	
-	cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-
-	btCollisionObject* ob0 = lsMem.getColObj0();
-	btCollisionObject* ob1 = lsMem.getColObj1();
-
-	collisionPairInput.m_worldTransform0 = ob0->getWorldTransform();
-	collisionPairInput.m_worldTransform1 = ob1->getWorldTransform();
-}
-
-
-
-void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem,
-							SpuContactResult &spuContacts,
-							ppu_address_t collisionShape0Ptr, void* collisionShape0Loc,
-							ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true)
-{
-	
-	if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) 
-		&& btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
-	{
-		if (dmaShapes)
-		{
-			dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
-			dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
-			cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-		}
-
-		btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
-		btConvexInternalShape* spuConvexShape1 = (btConvexInternalShape*)collisionShape1Loc;
-
-		btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
-		btVector3 dim1 = spuConvexShape1->getImplicitShapeDimensions();
-
-		collisionPairInput.m_primitiveDimensions0 = dim0;
-		collisionPairInput.m_primitiveDimensions1 = dim1;
-		collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
-		collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
-		collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
-		collisionPairInput.m_spuCollisionShapes[1] = spuConvexShape1;
-		ProcessSpuConvexConvexCollision(&collisionPairInput,&lsMem,spuContacts);
-	} 
-	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) && 
-			btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1))
-	{
-		//snPause();
-
-		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
-		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-
-		// Both are compounds, do N^2 CD for now
-		///@todo: add some AABB-based pruning (probably not -> slower)
-	
-		btCompoundShape* spuCompoundShape0 = (btCompoundShape*)collisionShape0Loc;
-		btCompoundShape* spuCompoundShape1 = (btCompoundShape*)collisionShape1Loc;
-
-		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
-		dmaCompoundShapeInfo (&lsMem.compoundShapeData[1], spuCompoundShape1, 2);
-		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-		
-
-		dmaCompoundSubShapes (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-		dmaCompoundSubShapes (&lsMem.compoundShapeData[1], spuCompoundShape1, 1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-		int childShapeCount0 = spuCompoundShape0->getNumChildShapes();
-		int childShapeCount1 = spuCompoundShape1->getNumChildShapes();
-
-		// Start the N^2
-		for (int i = 0; i < childShapeCount0; ++i)
-		{
-			btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i];
-			btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType));
-
-			for (int j = 0; j < childShapeCount1; ++j)
-			{
-				btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j];
-				btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType));
-
-
-				/* Create a new collision pair input struct using the two child shapes */
-				SpuCollisionPairInput cinput (collisionPairInput);
-
-				cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform;
-				cinput.m_shapeType0 = childShape0.m_childShapeType;
-				cinput.m_collisionMargin0 = childShape0.m_childMargin;
-
-				cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform;
-				cinput.m_shapeType1 = childShape1.m_childShapeType;
-				cinput.m_collisionMargin1 = childShape1.m_childMargin;
-				/* Recursively call handleCollisionPair () with new collision pair input */
-				
-				handleCollisionPair(cinput, lsMem, spuContacts,			
-					(ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], 
-					(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false);
-			}
-		}
-	}
-	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) )
-	{
-		//snPause();
-		
-		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
-		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-
-		// object 0 compound, object 1 non-compound
-		btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape0Loc;
-		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-		int childShapeCount = spuCompoundShape->getNumChildShapes();
-
-		for (int i = 0; i < childShapeCount; ++i)
-		{
-			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
-			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
-			// Dma the child shape
-			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-			
-			SpuCollisionPairInput cinput (collisionPairInput);
-			cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform;
-			cinput.m_shapeType0 = childShape.m_childShapeType;
-			cinput.m_collisionMargin0 = childShape.m_childMargin;
-
-			handleCollisionPair(cinput, lsMem, spuContacts,			
-				(ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], 
-				collisionShape1Ptr, collisionShape1Loc, false);
-		}
-	}
-	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1) )
-	{
-		//snPause();
-		
-		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
-		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-		// object 0 non-compound, object 1 compound
-		btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape1Loc;
-		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
-		cellDmaWaitTagStatusAll(DMA_MASK(1));
-		
-		int childShapeCount = spuCompoundShape->getNumChildShapes();
-
-		for (int i = 0; i < childShapeCount; ++i)
-		{
-			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
-			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
-			// Dma the child shape
-			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
-			cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-			SpuCollisionPairInput cinput (collisionPairInput);
-			cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform;
-			cinput.m_shapeType1 = childShape.m_childShapeType;
-			cinput.m_collisionMargin1 = childShape.m_childMargin;
-			handleCollisionPair(cinput, lsMem, spuContacts,
-				collisionShape0Ptr, collisionShape0Loc, 
-				(ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], false);
-		}
-		
-	}
-	else
-	{
-		//a non-convex shape is involved									
-		bool handleConvexConcave = false;
-
-		//snPause();
-
-		if (btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType0) &&
-			btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
-		{
-			// Swap stuff
-			DoSwap(collisionShape0Ptr, collisionShape1Ptr);
-			DoSwap(collisionShape0Loc, collisionShape1Loc);
-			DoSwap(collisionPairInput.m_shapeType0, collisionPairInput.m_shapeType1);
-			DoSwap(collisionPairInput.m_worldTransform0, collisionPairInput.m_worldTransform1);
-			DoSwap(collisionPairInput.m_collisionMargin0, collisionPairInput.m_collisionMargin1);
-			
-			collisionPairInput.m_isSwapped = true;
-		}
-		
-		if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)&&
-			btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType1))
-		{
-			handleConvexConcave = true;
-		}
-		if (handleConvexConcave)
-		{
-			if (dmaShapes)
-			{
-				dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
-				dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
-				cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
-			}
-			
-			if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE)
-			{
-				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
-				btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc;
-
-				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
-				collisionPairInput.m_primitiveDimensions0 = dim0;
-				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
-				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
-				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
-				collisionPairInput.m_spuCollisionShapes[1] = planeShape;
-
-				ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts);
-			} else
-			{
-				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
-				btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
-
-				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
-				collisionPairInput.m_primitiveDimensions0 = dim0;
-				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
-				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
-				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
-				collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
-
-				ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
-			}
-		}
-
-	}
-	
-	spuContacts.flush();
-
-}
-
-
-void	processCollisionTask(void* userPtr, void* lsMemPtr)
-{
-
-	SpuGatherAndProcessPairsTaskDesc* taskDescPtr = (SpuGatherAndProcessPairsTaskDesc*)userPtr;
-	SpuGatherAndProcessPairsTaskDesc& taskDesc = *taskDescPtr;
-	CollisionTask_LocalStoreMemory*	colMemPtr = (CollisionTask_LocalStoreMemory*)lsMemPtr;
-	CollisionTask_LocalStoreMemory& lsMem = *(colMemPtr);
-
-	gUseEpa = taskDesc.m_useEpa;
-
-	//	spu_printf("taskDescPtr=%llx\n",taskDescPtr);
-
-	SpuContactResult spuContacts;
-
-	////////////////////
-
-	ppu_address_t dmaInPtr = taskDesc.m_inPairPtr;
-	unsigned int numPages = taskDesc.numPages;
-	unsigned int numOnLastPage = taskDesc.numOnLastPage;
-
-	// prefetch first set of inputs and wait
-	lsMem.g_workUnitTaskBuffers.init();
-
-	unsigned int nextNumOnPage = (numPages > 1)? MIDPHASE_NUM_WORKUNITS_PER_PAGE : numOnLastPage;
-	lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
-	dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
-
-	
-	register unsigned char *inputPtr;
-	register unsigned int numOnPage;
-	register unsigned int j;
-	SpuGatherAndProcessWorkUnitInput* wuInputs;	
-	register int dmaSize;
-	register ppu_address_t	dmaPpuAddress;
-	register ppu_address_t	dmaPpuAddress2;
-
-	int numPairs;
-	register int p;
-	SpuCollisionPairInput collisionPairInput;
-	
-	for (unsigned int i = 0; btLikely(i < numPages); i++)
-	{
-
-		// wait for back buffer dma and swap buffers
-		inputPtr = lsMem.g_workUnitTaskBuffers.swapBuffers();
-
-		// number on current page is number prefetched last iteration
-		numOnPage = nextNumOnPage;
-
-
-		// prefetch next set of inputs
-#if MIDPHASE_NUM_WORKUNIT_PAGES > 2
-		if ( btLikely( i < numPages-1 ) )
-#else
-		if ( btUnlikely( i < numPages-1 ) )
-#endif
-		{
-			nextNumOnPage = (i == numPages-2)? numOnLastPage : MIDPHASE_NUM_WORKUNITS_PER_PAGE;
-			lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
-			dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
-		}
-
-		wuInputs = reinterpret_cast<SpuGatherAndProcessWorkUnitInput *>(inputPtr);
-		
-		
-		for (j = 0; btLikely( j < numOnPage ); j++)
-		{
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		//	printMidphaseInput(&wuInputs[j]);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-
-
-			numPairs = wuInputs[j].m_endIndex - wuInputs[j].m_startIndex;
-			
-			if ( btLikely( numPairs ) )
-			{
-					dmaSize = numPairs*sizeof(btBroadphasePair);
-					dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair);
-					lsMem.m_pairsPointer = (btBroadphasePair*)cellDmaGetReadOnly(&lsMem.gBroadphasePairsBuffer, dmaPpuAddress  , dmaSize, DMA_TAG(1), 0, 0);
-					cellDmaWaitTagStatusAll(DMA_MASK(1));
-				
-
-				for (p=0;p<numPairs;p++)
-				{
-
-					//for each broadphase pair, do something
-
-					btBroadphasePair& pair = lsMem.getBroadphasePairPtr()[p];
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-					spu_printf("pair->m_userInfo = %d\n",pair.m_userInfo);
-					spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm);
-					spu_printf("pair->m_pProxy0 = %d\n",pair.m_pProxy0);
-					spu_printf("pair->m_pProxy1 = %d\n",pair.m_pProxy1);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-
-					if (pair.m_internalTmpValue == 2 && pair.m_algorithm && pair.m_pProxy0 && pair.m_pProxy1)
-					{
-						dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
-						dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
-						lsMem.m_lsCollisionAlgorithmPtr = (SpuContactManifoldCollisionAlgorithm*)cellDmaGetReadOnly(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-
-						cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-						lsMem.needsDmaPutContactManifoldAlgo = false;
-
-						collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr();
-						collisionPairInput.m_isSwapped = false;
-
-						if (1)
-						{
-
-							///can wait on the combined DMA_MASK, or dma on the same tag
-
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-					//		spu_printf("SPU collisionPairInput->m_shapeType0 = %d\n",collisionPairInput->m_shapeType0);
-					//		spu_printf("SPU collisionPairInput->m_shapeType1 = %d\n",collisionPairInput->m_shapeType1);
-#endif //DEBUG_SPU_COLLISION_DETECTION
-
-							
-							dmaSize = sizeof(btPersistentManifold);
-
-							dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr;
-							lsMem.m_lsManifoldPtr = (btPersistentManifold*)cellDmaGetReadOnly(&lsMem.gPersistentManifoldBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-
-							collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0();
-							collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1();
-							collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
-							collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
-							
-							
-							
-							//??cellDmaWaitTagStatusAll(DMA_MASK(1));
-							
-
-							if (1)
-							{
-								//snPause();
-
-								// Get the collision objects
-								dmaAndSetupCollisionObjects(collisionPairInput, lsMem);
-
-								if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive())
-								{
-
-									lsMem.needsDmaPutContactManifoldAlgo = true;
-#ifdef USE_SEPDISTANCE_UTIL
-									lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
-#endif //USE_SEPDISTANCE_UTIL
-							
-#define USE_DEDICATED_BOX_BOX 1
-#ifdef USE_DEDICATED_BOX_BOX
-									bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&&
-										(lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE));
-									if (boxbox)
-									{
-										//spu_printf("boxbox dist = %f\n",distance);
-										btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
-										btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
-										ppu_address_t manifoldAddress = (ppu_address_t)manifold;
-
-										spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
-											lsMem.getColObj1()->getWorldTransform(),
-											lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
-											lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
-											collisionPairInput.m_isSwapped);
-
-						
-									//float distance=0.f;
-									btVector3 normalInB;
-
-
-									if (//!gUseEpa &&
-#ifdef USE_SEPDISTANCE_UTIL
-										lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
-#else
-										1
-#endif											
-										)
-										{
-//#define USE_PE_BOX_BOX 1
-#ifdef USE_PE_BOX_BOX
-											{
-
-												//getCollisionMargin0
-												btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
-												btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
-												btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
-												btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
-
-												Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
-												Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
-												Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
-												Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
-												Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
-
-												Transform3 transformA(vmMatrix0,vmPos0);
-												Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
-												Transform3 transformB(vmMatrix1,vmPos1);
-												BoxPoint resultClosestBoxPointA;
-												BoxPoint resultClosestBoxPointB;
-												Vector3 resultNormal;
-#ifdef USE_SEPDISTANCE_UTIL
-												float distanceThreshold = FLT_MAX
-#else
-												float distanceThreshold = 0.f;
-#endif
-
-
-												distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB,  boxA, transformA, boxB,transformB,distanceThreshold);
-												
-												normalInB = -getBtVector3(resultNormal);
-
-												if(distance < spuManifold->getContactBreakingThreshold())
-												{
-													btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
-
-													spuContacts.addContactPoint(
-														normalInB,
-														pointOnB,
-														distance);
-												}
-											} 
-#else									
-											{
-
-												btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
-												btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
-												btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
-												btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
-
-
-												btBoxShape box0(shapeDim0);
-												btBoxShape box1(shapeDim1);
-
-												struct SpuBridgeContactCollector : public btDiscreteCollisionDetectorInterface::Result
-												{
-													SpuContactResult&	m_spuContacts;
-
-													virtual void setShapeIdentifiersA(int partId0,int index0)
-													{
-														m_spuContacts.setShapeIdentifiersA(partId0,index0);
-													}
-													virtual void setShapeIdentifiersB(int partId1,int index1)
-													{
-														m_spuContacts.setShapeIdentifiersB(partId1,index1);
-													}
-													virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
-													{
-														m_spuContacts.addContactPoint(normalOnBInWorld,pointInWorld,depth);
-													}
-
-													SpuBridgeContactCollector(SpuContactResult& spuContacts)
-														:m_spuContacts(spuContacts)
-													{
-
-													}
-												};
-												
-												SpuBridgeContactCollector  bridgeOutput(spuContacts);
-
-												btDiscreteCollisionDetectorInterface::ClosestPointInput input;
-												input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
-												input.m_transformA = collisionPairInput.m_worldTransform0;
-												input.m_transformB = collisionPairInput.m_worldTransform1;
-
-												btBoxBoxDetector detector(&box0,&box1);
-												
-												detector.getClosestPoints(input,bridgeOutput,0);
-
-											}
-#endif //USE_PE_BOX_BOX
-											
-											lsMem.needsDmaPutContactManifoldAlgo = true;
-#ifdef USE_SEPDISTANCE_UTIL
-											btScalar sepDist2 = distance+spuManifold->getContactBreakingThreshold();
-											lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist2,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
-#endif //USE_SEPDISTANCE_UTIL
-											gProcessedCol++;
-										} else
-										{
-											gSkippedCol++;
-										}
-
-										spuContacts.flush();
-											
-
-									} else
-#endif //USE_DEDICATED_BOX_BOX
-									{
-										if (
-#ifdef USE_SEPDISTANCE_UTIL
-											lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
-#else
-											1
-#endif //USE_SEPDISTANCE_UTIL
-											)
-										{
-											handleCollisionPair(collisionPairInput, lsMem, spuContacts,
-												(ppu_address_t)lsMem.getColObj0()->getRootCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
-												(ppu_address_t)lsMem.getColObj1()->getRootCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
-										} else
-										{
-												//spu_printf("boxbox dist = %f\n",distance);
-											btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
-											btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
-											ppu_address_t manifoldAddress = (ppu_address_t)manifold;
-
-											spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
-												lsMem.getColObj1()->getWorldTransform(),
-												lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
-												lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
-												collisionPairInput.m_isSwapped);
-
-											spuContacts.flush();
-										}
-									}
-								
-								}
-
-							}
-						}
-
-#ifdef USE_SEPDISTANCE_UTIL
-#if defined (__SPU__) || defined (USE_LIBSPE2)
-						if (lsMem.needsDmaPutContactManifoldAlgo)
-						{
-							dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
-							dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
-							cellDmaLargePut(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
-							cellDmaWaitTagStatusAll(DMA_MASK(1));
-						}
-#endif
-#endif //#ifdef USE_SEPDISTANCE_UTIL
-
-					}
-				}
-			}
-		} //end for (j = 0; j < numOnPage; j++)
-
-	}//	for 
-
-
-
-	return;
-}
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
deleted file mode 100644
index bbaa555ee1b..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_GATHERING_COLLISION_TASK_H
-#define SPU_GATHERING_COLLISION_TASK_H
-
-#include "../PlatformDefinitions.h"
-//#define DEBUG_SPU_COLLISION_DETECTION 1
-
-
-///Task Description for SPU collision detection
-struct SpuGatherAndProcessPairsTaskDesc 
-{
-	ppu_address_t	m_inPairPtr;//m_pairArrayPtr;
-	//mutex variable
-	uint32_t	m_someMutexVariableInMainMemory;
-
-	ppu_address_t	m_dispatcher;
-
-	uint32_t	numOnLastPage;
-
-	uint16_t numPages;
-	uint16_t taskId;
-	bool m_useEpa;
-
-	struct	CollisionTask_LocalStoreMemory*	m_lsMemory; 
-}
-
-#if  defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
-__attribute__ ((aligned (128)))
-#endif
-;
-
-
-void	processCollisionTask(void* userPtr, void* lsMemory);
-
-void*	createCollisionLocalStoreMemory();
-
-
-#if defined(USE_LIBSPE2) && defined(__SPU__)
-#include "../SpuLibspe2Support.h"
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-#include <SpuFakeDma.h>
-
-//#define DEBUG_LIBSPE2_SPU_TASK
-
-
-
-int main(unsigned long long speid, addr64 argp, addr64 envp)
-{
-	printf("SPU: hello \n");
-	
-	ATTRIBUTE_ALIGNED128(btSpuStatus status);
-	ATTRIBUTE_ALIGNED16( SpuGatherAndProcessPairsTaskDesc taskDesc ) ;
-	unsigned int received_message = Spu_Mailbox_Event_Nothing;
-    bool shutdown = false;
-
-	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-
-	status.m_status = Spu_Status_Free;
-	status.m_lsMemory.p = createCollisionLocalStoreMemory();
-
-	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-	
-	
-	while ( btLikely( !shutdown ) )
-	{
-		
-		received_message = spu_read_in_mbox();
-		
-		if( btLikely( received_message == Spu_Mailbox_Event_Task ))
-		{
-#ifdef DEBUG_LIBSPE2_SPU_TASK
-			printf("SPU: received Spu_Mailbox_Event_Task\n");
-#endif //DEBUG_LIBSPE2_SPU_TASK
-
-			// refresh the status
-			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-		
-			btAssert(status.m_status==Spu_Status_Occupied);
-			
-			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuGatherAndProcessPairsTaskDesc), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-#ifdef DEBUG_LIBSPE2_SPU_TASK		
-			printf("SPU:processCollisionTask\n");	
-#endif //DEBUG_LIBSPE2_SPU_TASK
-			processCollisionTask((void*)&taskDesc, taskDesc.m_lsMemory);
-			
-#ifdef DEBUG_LIBSPE2_SPU_TASK
-			printf("SPU:finished processCollisionTask\n");
-#endif //DEBUG_LIBSPE2_SPU_TASK
-		}
-		else
-		{
-#ifdef DEBUG_LIBSPE2_SPU_TASK
-			printf("SPU: received ShutDown\n");
-#endif //DEBUG_LIBSPE2_SPU_TASK
-			if( btLikely( received_message == Spu_Mailbox_Event_Shutdown ) )
-			{
-				shutdown = true;
-			}
-			else
-			{
-				//printf("SPU - Sth. recieved\n");
-			}
-		}
-
-		// set to status free and wait for next task
-		status.m_status = Spu_Status_Free;
-		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-		cellDmaWaitTagStatusAll(DMA_MASK(3));		
-				
-		
-  	}
-
-	printf("SPU: shutdown\n");
-  	return 0;
-}
-#endif // USE_LIBSPE2
-
-
-#endif //SPU_GATHERING_COLLISION_TASK_H
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
deleted file mode 100644
index 8b89de03f59..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
deleted file mode 100644
index 9f7e64dd1b3..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "SpuMinkowskiPenetrationDepthSolver.h"
-#include "SpuContactResult.h"
-#include "SpuPreferredPenetrationDirections.h"
-#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
-#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
-#include "SpuCollisionShapes.h"
-
-#define NUM_UNITSPHERE_POINTS 42
-static btVector3	sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
-{
-btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
-btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
-btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
-btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
-btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
-btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
-btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
-btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
-btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
-btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
-btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
-btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
-btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
-btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
-btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
-btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
-btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
-btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
-btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
-btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
-btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
-btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
-btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
-btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
-btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
-btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
-btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
-btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
-btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
-btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
-btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
-btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
-btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
-btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
-btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
-btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
-btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
-btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
-};
-
-
-bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( btSimplexSolverInterface& simplexSolver,
-		const btConvexShape* convexA,const btConvexShape* convexB,
-					const btTransform& transA,const btTransform& transB,
-				btVector3& v, btVector3& pa, btVector3& pb,
-				class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc)
-{
-#if 0
-	(void)stackAlloc;
-	(void)v;
-	
-
-	struct btIntermediateResult : public SpuContactResult
-	{
-
-		btIntermediateResult():m_hasResult(false)
-		{
-		}
-		
-		btVector3 m_normalOnBInWorld;
-		btVector3 m_pointInWorld;
-		btScalar m_depth;
-		bool	m_hasResult;
-
-		virtual void setShapeIdentifiersA(int partId0,int index0)
-		{
-			(void)partId0;
-			(void)index0;
-		}
-
-		virtual void setShapeIdentifiersB(int partId1,int index1)
-		{
-			(void)partId1;
-			(void)index1;
-		}
-		void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
-		{
-			m_normalOnBInWorld = normalOnBInWorld;
-			m_pointInWorld = pointInWorld;
-			m_depth = depth;
-			m_hasResult = true;
-		}
-	};
-
-	//just take fixed number of orientation, and sample the penetration depth in that direction
-	btScalar minProj = btScalar(BT_LARGE_FLOAT);
-	btVector3 minNorm(0.f,0.f,0.f);
-	btVector3 minVertex;
-	btVector3 minA,minB;
-	btVector3 seperatingAxisInA,seperatingAxisInB;
-	btVector3 pInA,qInB,pWorld,qWorld,w;
-
-//#define USE_BATCHED_SUPPORT 1
-#ifdef USE_BATCHED_SUPPORT
-
-	btVector3	supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
-	btVector3	supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
-	btVector3	seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
-	btVector3	seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
-	int i;
-
-	int numSampleDirections = NUM_UNITSPHERE_POINTS;
-
-	for (i=0;i<numSampleDirections;i++)
-	{
-		const btVector3& norm = sPenetrationDirections[i];
-		seperatingAxisInABatch[i] =  (-norm) * transA.getBasis() ;
-		seperatingAxisInBBatch[i] =  norm   * transB.getBasis() ;
-	}
-
-	{
-		int numPDA = convexA->getNumPreferredPenetrationDirections();
-		if (numPDA)
-		{
-			for (int i=0;i<numPDA;i++)
-			{
-				btVector3 norm;
-				convexA->getPreferredPenetrationDirection(i,norm);
-				norm  = transA.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
-				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
-				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
-				numSampleDirections++;
-			}
-		}
-	}
-
-	{
-		int numPDB = convexB->getNumPreferredPenetrationDirections();
-		if (numPDB)
-		{
-			for (int i=0;i<numPDB;i++)
-			{
-				btVector3 norm;
-				convexB->getPreferredPenetrationDirection(i,norm);
-				norm  = transB.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
-				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
-				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
-				numSampleDirections++;
-			}
-		}
-	}
-
-
-
-	convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections);
-	convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections);
-
-	for (i=0;i<numSampleDirections;i++)
-	{
-		const btVector3& norm = sPenetrationDirections[i];
-		seperatingAxisInA = seperatingAxisInABatch[i];
-		seperatingAxisInB = seperatingAxisInBBatch[i];
-
-		pInA = supportVerticesABatch[i];
-		qInB = supportVerticesBBatch[i];
-
-		pWorld = transA(pInA);	
-		qWorld = transB(qInB);
-		w	= qWorld - pWorld;
-		btScalar delta = norm.dot(w);
-		//find smallest delta
-		if (delta < minProj)
-		{
-			minProj = delta;
-			minNorm = norm;
-			minA = pWorld;
-			minB = qWorld;
-		}
-	}	
-#else
-
-	int numSampleDirections = NUM_UNITSPHERE_POINTS;
-
-///this is necessary, otherwise the normal is not correct, and sphere will rotate forever on a sloped triangle mesh
-#define DO_PREFERRED_DIRECTIONS 1
-#ifdef DO_PREFERRED_DIRECTIONS
-	{
-		int numPDA = spuGetNumPreferredPenetrationDirections(shapeTypeA,convexA);
-		if (numPDA)
-		{
-			for (int i=0;i<numPDA;i++)
-			{
-				btVector3 norm;
-				spuGetPreferredPenetrationDirection(shapeTypeA,convexA,i,norm);
-				norm  = transA.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
-				numSampleDirections++;
-			}
-		}
-	}
-
-	{
-		int numPDB = spuGetNumPreferredPenetrationDirections(shapeTypeB,convexB);
-		if (numPDB)
-		{
-			for (int i=0;i<numPDB;i++)
-			{
-				btVector3 norm;
-				spuGetPreferredPenetrationDirection(shapeTypeB,convexB,i,norm);
-				norm  = transB.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
-				numSampleDirections++;
-			}
-		}
-	}
-#endif //DO_PREFERRED_DIRECTIONS
-
-	for (int i=0;i<numSampleDirections;i++)
-	{
-		const btVector3& norm = sPenetrationDirections[i];
-		seperatingAxisInA = (-norm)* transA.getBasis();
-		seperatingAxisInB = norm* transB.getBasis();
-
-		pInA = convexA->localGetSupportVertexWithoutMarginNonVirtual( seperatingAxisInA);//, NULL);
-		qInB = convexB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);//, NULL);
-
-	//	pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
-	//	qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
-
-		pWorld = transA(pInA);	
-		qWorld = transB(qInB);
-		w	= qWorld - pWorld;
-		btScalar delta = norm.dot(w);
-		//find smallest delta
-		if (delta < minProj)
-		{
-			minProj = delta;
-			minNorm = norm;
-			minA = pWorld;
-			minB = qWorld;
-		}
-	}
-#endif //USE_BATCHED_SUPPORT
-
-	//add the margins
-
-	minA += minNorm*marginA;
-	minB -= minNorm*marginB;
-	//no penetration
-	if (minProj < btScalar(0.))
-		return false;
-
-	minProj += (marginA + marginB) + btScalar(1.00);
-
-
-
-
-
-//#define DEBUG_DRAW 1
-#ifdef DEBUG_DRAW
-	if (debugDraw)
-	{
-		btVector3 color(0,1,0);
-		debugDraw->drawLine(minA,minB,color);
-		color = btVector3 (1,1,1);
-		btVector3 vec = minB-minA;
-		btScalar prj2 = minNorm.dot(vec);
-		debugDraw->drawLine(minA,minA+(minNorm*minProj),color);
-
-	}
-#endif //DEBUG_DRAW
-
-	
-	btGjkPairDetector gjkdet(convexA,convexB,&simplexSolver,0);
-
-	btScalar offsetDist = minProj;
-	btVector3 offset = minNorm * offsetDist;
-	
-
-	SpuClosestPointInput input;
-	input.m_convexVertexData[0] = convexVertexDataA;
-	input.m_convexVertexData[1] = convexVertexDataB;
-	btVector3 newOrg = transA.getOrigin() + offset;
-
-	btTransform displacedTrans = transA;
-	displacedTrans.setOrigin(newOrg);
-
-	input.m_transformA = displacedTrans;
-	input.m_transformB = transB;
-	input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);//minProj;
-	
-	btIntermediateResult res;
-	gjkdet.getClosestPoints(input,res,0);
-
-	btScalar correctedMinNorm = minProj - res.m_depth;
-
-
-	//the penetration depth is over-estimated, relax it
-	btScalar penetration_relaxation= btScalar(1.);
-	minNorm*=penetration_relaxation;
-
-	if (res.m_hasResult)
-	{
-
-		pa = res.m_pointInWorld - minNorm * correctedMinNorm;
-		pb = res.m_pointInWorld;
-		
-#ifdef DEBUG_DRAW
-		if (debugDraw)
-		{
-			btVector3 color(1,0,0);
-			debugDraw->drawLine(pa,pb,color);
-		}
-#endif//DEBUG_DRAW
-
-
-	} else {
-		// could not seperate shapes
-		//btAssert (false);
-	}
-	return res.m_hasResult;
-#endif
-	return false;
-}
-
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
deleted file mode 100644
index 18ad223ed36..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
+++ /dev/null
@@ -1,48 +0,0 @@
-
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
-#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
-
-
-#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
-
-class btStackAlloc;
-class btIDebugDraw;
-class btVoronoiSimplexSolver;
-class btConvexShape;
-
-///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
-///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
-class SpuMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver
-{
-public:
-	SpuMinkowskiPenetrationDepthSolver() {}
-	virtual ~SpuMinkowskiPenetrationDepthSolver() {};
-
-		virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
-		const btConvexShape* convexA,const btConvexShape* convexB,
-					const btTransform& transA,const btTransform& transB,
-				btVector3& v, btVector3& pa, btVector3& pb,
-				class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
-				);
-
-
-};
-
-
-#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
deleted file mode 100644
index 774a0cb2eb1..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
-#define _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
-
-
-#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
-
-int		spuGetNumPreferredPenetrationDirections(int shapeType, void* shape)
-{
-	switch (shapeType)
-    {
-		case TRIANGLE_SHAPE_PROXYTYPE:
-		{
-			return 2;
-			//spu_printf("2\n");
-			break;
-		}
-		default:
-			{
-#if __ASSERT
-        spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
-#endif // __ASSERT
-			}
-	}
-
-	return 0;	
-}	
-
-void	spuGetPreferredPenetrationDirection(int shapeType, void* shape, int index, btVector3& penetrationVector)
-{
-
-
-	switch (shapeType)
-    {
-		case TRIANGLE_SHAPE_PROXYTYPE:
-		{
-			btVector3* vertices = (btVector3*)shape;
-			///calcNormal
-			penetrationVector = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
-			penetrationVector.normalize();
-			if (index)
-				penetrationVector *= btScalar(-1.);
-			break;
-		}
-		default:
-			{
-					
-#if __ASSERT
-        spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
-#endif // __ASSERT
-			}
-	}
-		
-}
-
-#endif //_SPU_PREFERRED_PENETRATION_DIRECTIONS_H
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
deleted file mode 100644
index 30642a39294..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
+++ /dev/null
@@ -1,1155 +0,0 @@
-/*
-   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#include "Box.h"
-
-static inline float sqr( float a )
-{
-	return (a * a);
-}
-
-enum BoxSepAxisType
-{
-	A_AXIS, B_AXIS, CROSS_AXIS
-};
-
-//-------------------------------------------------------------------------------------------------
-// voronoiTol: bevels Voronoi planes slightly which helps when features are parallel.
-//-------------------------------------------------------------------------------------------------
-
-static const float voronoiTol = -1.0e-5f;
-
-//-------------------------------------------------------------------------------------------------
-// separating axis tests: gaps along each axis are computed, and the axis with the maximum
-// gap is stored.  cross product axes are normalized.
-//-------------------------------------------------------------------------------------------------
-
-#define AaxisTest( dim, letter, first )                                                         \
-{                                                                                               \
-   if ( first )                                                                                 \
-   {                                                                                            \
-      maxGap = gap = gapsA.get##letter();                                                      \
-      if ( gap > distanceThreshold ) return gap;                                                \
-      axisType = A_AXIS;                                                                        \
-      faceDimA = dim;                                                                           \
-      axisA = identity.getCol##dim();                                                          \
-   }                                                                                            \
-   else                                                                                         \
-   {                                                                                            \
-      gap = gapsA.get##letter();                                                               \
-      if ( gap > distanceThreshold ) return gap;                                                \
-      else if ( gap > maxGap )                                                                  \
-      {                                                                                         \
-         maxGap = gap;                                                                          \
-         axisType = A_AXIS;                                                                     \
-         faceDimA = dim;                                                                        \
-         axisA = identity.getCol##dim();                                                       \
-      }                                                                                         \
-   }                                                                                            \
-}
-
-
-#define BaxisTest( dim, letter )                                                                \
-{                                                                                               \
-   gap = gapsB.get##letter();                                                                  \
-   if ( gap > distanceThreshold ) return gap;                                                   \
-   else if ( gap > maxGap )                                                                     \
-   {                                                                                            \
-      maxGap = gap;                                                                             \
-      axisType = B_AXIS;                                                                        \
-      faceDimB = dim;                                                                           \
-      axisB = identity.getCol##dim();                                                          \
-   }                                                                                            \
-}
-
-#define CrossAxisTest( dima, dimb, letterb )                                                    \
-{                                                                                               \
-   const float lsqr_tolerance = 1.0e-30f;                                                       \
-   float lsqr;                                                                                  \
-                                                                                                \
-   lsqr = lsqrs.getCol##dima().get##letterb();                                                \
-                                                                                                \
-   if ( lsqr > lsqr_tolerance )                                                                 \
-   {                                                                                            \
-      float l_recip = 1.0f / sqrtf( lsqr );                                                     \
-      gap = float(gapsAxB.getCol##dima().get##letterb()) * l_recip;                           \
-                                                                                                \
-      if ( gap > distanceThreshold )                                                            \
-      {                                                                                         \
-         return gap;                                                                            \
-      }                                                                                         \
-                                                                                                \
-      if ( gap > maxGap )                                                                       \
-      {                                                                                         \
-         maxGap = gap;                                                                          \
-         axisType = CROSS_AXIS;                                                                 \
-         edgeDimA = dima;                                                                       \
-         edgeDimB = dimb;                                                                       \
-         axisA = cross(identity.getCol##dima(),matrixAB.getCol##dimb()) * l_recip;            \
-      }                                                                                         \
-   }                                                                                            \
-}
-
-//-------------------------------------------------------------------------------------------------
-// tests whether a vertex of box B and a face of box A are the closest features
-//-------------------------------------------------------------------------------------------------
-
-inline
-float
-VertexBFaceATest(
-	bool & inVoronoi,
-	float & t0,
-	float & t1,
-	const Vector3 & hA,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesB )
-{
-	// compute a corner of box B in A's coordinate system
-
-	Vector3 corner =
-		Vector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() );
-
-	// compute the parameters of the point on A, closest to this corner
-
-	t0 = corner[0];
-	t1 = corner[1];
-
-	if ( t0 > hA[0] )
-		t0 = hA[0];
-	else if ( t0 < -hA[0] )
-		t0 = -hA[0];
-	if ( t1 > hA[1] )
-		t1 = hA[1];
-	else if ( t1 < -hA[1] )
-		t1 = -hA[1];
-
-	// do the Voronoi test: already know the point on B is in the Voronoi region of the
-	// point on A, check the reverse.
-
-	Vector3 facePointB =
-		Vector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) );
-
-	inVoronoi = ( ( facePointB[0] >= voronoiTol * facePointB[2] ) &&
-				  ( facePointB[1] >= voronoiTol * facePointB[0] ) &&
-				  ( facePointB[2] >= voronoiTol * facePointB[1] ) );
-
-	return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
-}
-
-#define VertexBFaceA_SetNewMin()                \
-{                                               \
-   minDistSqr = distSqr;                        \
-   localPointA.setX(t0);                        \
-   localPointA.setY(t1);                        \
-   localPointB.setX( scalesB.getX() );          \
-   localPointB.setY( scalesB.getY() );          \
-   featureA = F;                                \
-   featureB = V;                                \
-}
-
-void
-VertexBFaceATests(
-	bool & done,
-	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
-	FeatureType & featureA,
-	FeatureType & featureB,
-	const Vector3 & hA,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesB,
-	bool first )
-{
-		
-	float t0, t1;
-	float distSqr;
-
-	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsB, scalesB );
-
-	if ( first ) {
-		VertexBFaceA_SetNewMin();
-	} else {
-		if ( distSqr < minDistSqr ) {
-			VertexBFaceA_SetNewMin();
-		}
-	}
-
-	if ( done )
-		return;
-
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsB, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		VertexBFaceA_SetNewMin();
-	}
-
-	if ( done )
-		return;
-
-	signsB.setY( -signsB.getY() );
-	scalesB.setY( -scalesB.getY() );
-
-	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsB, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		VertexBFaceA_SetNewMin();
-	}
-
-	if ( done )
-		return;
-
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsB, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		VertexBFaceA_SetNewMin();
-	}
-}
-
-//-------------------------------------------------------------------------------------------------
-// VertexAFaceBTest: tests whether a vertex of box A and a face of box B are the closest features
-//-------------------------------------------------------------------------------------------------
-
-inline
-float
-VertexAFaceBTest(
-	bool & inVoronoi,
-	float & t0,
-	float & t1,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) scalesA )
-{
-	Vector3 corner =
-		Vector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() );
-
-	t0 = corner[0];
-	t1 = corner[1];
-
-	if ( t0 > hB[0] )
-		t0 = hB[0];
-	else if ( t0 < -hB[0] )
-		t0 = -hB[0];
-	if ( t1 > hB[1] )
-		t1 = hB[1];
-	else if ( t1 < -hB[1] )
-		t1 = -hB[1];
-
-	Vector3 facePointA =
-		Vector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) );
-
-	inVoronoi = ( ( facePointA[0] >= voronoiTol * facePointA[2] ) &&
-				  ( facePointA[1] >= voronoiTol * facePointA[0] ) &&
-				  ( facePointA[2] >= voronoiTol * facePointA[1] ) );
-
-	return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
-}
-
-#define VertexAFaceB_SetNewMin()                \
-{                                               \
-   minDistSqr = distSqr;                        \
-   localPointB.setX(t0);                        \
-   localPointB.setY(t1);                        \
-   localPointA.setX( scalesA.getX() );          \
-   localPointA.setY( scalesA.getY() );          \
-   featureA = V;                                \
-   featureB = F;                                \
-}
-
-void
-VertexAFaceBTests(
-	bool & done,
-	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
-	FeatureType & featureA,
-	FeatureType & featureB,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) scalesA,
-	bool first )
-{
-	float t0, t1;
-	float distSqr;
-
-	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsA, scalesA );
-
-	if ( first ) {
-		VertexAFaceB_SetNewMin();
-	} else {
-		if ( distSqr < minDistSqr ) {
-			VertexAFaceB_SetNewMin();
-		}
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsA, scalesA );
-
-	if ( distSqr < minDistSqr ) {
-		VertexAFaceB_SetNewMin();
-	}
-
-	if ( done )
-		return;
-
-	signsA.setY( -signsA.getY() );
-	scalesA.setY( -scalesA.getY() );
-
-	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsA, scalesA );
-
-	if ( distSqr < minDistSqr ) {
-		VertexAFaceB_SetNewMin();
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
-								matrixAB, matrixBA, signsA, scalesA );
-
-	if ( distSqr < minDistSqr ) {
-		VertexAFaceB_SetNewMin();
-	}
-}
-
-//-------------------------------------------------------------------------------------------------
-// EdgeEdgeTest:
-//
-// tests whether a pair of edges are the closest features
-//
-// note on the shorthand:
-// 'a' & 'b' refer to the edges.
-// 'c' is the dimension of the axis that points from the face center to the edge Center
-// 'd' is the dimension of the edge Direction
-// the dimension of the face normal is 2
-//-------------------------------------------------------------------------------------------------
-
-#define EdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter )              \
-{                                                                                               \
-   Vector3 edgeOffsetAB;                                                                          \
-   Vector3 edgeOffsetBA;                                                                          \
-                                                                                                \
-   edgeOffsetAB = faceOffsetAB + matrixAB.getCol##bc() * scalesB.get##bc_letter();            \
-   edgeOffsetAB.set##ac_letter( edgeOffsetAB.get##ac_letter() - scalesA.get##ac_letter() );  \
-                                                                                                \
-   edgeOffsetBA = faceOffsetBA + matrixBA.getCol##ac() * scalesA.get##ac_letter();            \
-   edgeOffsetBA.set##bc_letter( edgeOffsetBA.get##bc_letter() - scalesB.get##bc_letter() );  \
-                                                                                                \
-   float dirDot = matrixAB.getCol##bd().get##ad_letter();                                     \
-   float denom = 1.0f - dirDot*dirDot;                                                          \
-   float edgeOffsetAB_ad = edgeOffsetAB.get##ad_letter();                                      \
-   float edgeOffsetBA_bd = edgeOffsetBA.get##bd_letter();                                      \
-                                                                                                \
-   if ( denom == 0.0f )                                                                         \
-   {                                                                                            \
-      tA = 0.0f;                                                                                \
-   }                                                                                            \
-   else                                                                                         \
-   {                                                                                            \
-      tA = ( edgeOffsetAB_ad + edgeOffsetBA_bd * dirDot ) / denom;                              \
-   }                                                                                            \
-                                                                                                \
-   if ( tA < -hA[ad] ) tA = -hA[ad];                                                            \
-   else if ( tA > hA[ad] ) tA = hA[ad];                                                         \
-                                                                                                \
-   tB = tA * dirDot + edgeOffsetBA_bd;                                                          \
-                                                                                                \
-   if ( tB < -hB[bd] )                                                                          \
-   {                                                                                            \
-      tB = -hB[bd];                                                                             \
-      tA = tB * dirDot + edgeOffsetAB_ad;                                                       \
-                                                                                                \
-      if ( tA < -hA[ad] ) tA = -hA[ad];                                                         \
-      else if ( tA > hA[ad] ) tA = hA[ad];                                                      \
-   }                                                                                            \
-   else if ( tB > hB[bd] )                                                                      \
-   {                                                                                            \
-      tB = hB[bd];                                                                              \
-      tA = tB * dirDot + edgeOffsetAB_ad;                                                       \
-                                                                                                \
-      if ( tA < -hA[ad] ) tA = -hA[ad];                                                         \
-      else if ( tA > hA[ad] ) tA = hA[ad];                                                      \
-   }                                                                                            \
-                                                                                                \
-   Vector3 edgeOffAB = Vector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\
-   Vector3 edgeOffBA = Vector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\
-                                                                                                \
-   inVoronoi = ( edgeOffAB[ac] >= voronoiTol * edgeOffAB[2] ) &&                                \
-               ( edgeOffAB[2] >= voronoiTol * edgeOffAB[ac] ) &&                                \
-               ( edgeOffBA[bc] >= voronoiTol * edgeOffBA[2] ) &&                                \
-               ( edgeOffBA[2] >= voronoiTol * edgeOffBA[bc] );                                  \
-                                                                                                \
-   edgeOffAB[ad] -= tA;                                                                         \
-   edgeOffBA[bd] -= tB;                                                                         \
-                                                                                                \
-   return dot(edgeOffAB,edgeOffAB);                                                             \
-}
-
-float
-EdgeEdgeTest_0101(
-	bool & inVoronoi,
-	float & tA,
-	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
-{
-	EdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y );
-}
-
-float
-EdgeEdgeTest_0110(
-	bool & inVoronoi,
-	float & tA,
-	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
-{
-	EdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X );
-}
-
-float
-EdgeEdgeTest_1001(
-	bool & inVoronoi,
-	float & tA,
-	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
-{
-	EdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y );
-}
-
-float
-EdgeEdgeTest_1010(
-	bool & inVoronoi,
-	float & tA,
-	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
-{
-	EdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X );
-}
-
-#define EdgeEdge_SetNewMin( ac_letter, ad_letter, bc_letter, bd_letter )   \
-{                                                                          \
-   minDistSqr = distSqr;                                                   \
-   localPointA.set##ac_letter(scalesA.get##ac_letter());                 \
-   localPointA.set##ad_letter(tA);                                        \
-   localPointB.set##bc_letter(scalesB.get##bc_letter());                 \
-   localPointB.set##bd_letter(tB);                                        \
-   otherFaceDimA = testOtherFaceDimA;                                      \
-   otherFaceDimB = testOtherFaceDimB;                                      \
-   featureA = E;                                                           \
-   featureB = E;                                                           \
-}
-
-void
-EdgeEdgeTests(
-	bool & done,
-	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
-	int & otherFaceDimA,
-	int & otherFaceDimB,
-	FeatureType & featureA,
-	FeatureType & featureB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB,
-	bool first )
-{
-
-	float distSqr;
-	float tA, tB;
-
-	int testOtherFaceDimA, testOtherFaceDimB;
-
-	testOtherFaceDimA = 0;
-	testOtherFaceDimB = 0;
-
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( first ) {
-		EdgeEdge_SetNewMin( X, Y, X, Y );
-	} else {
-		if ( distSqr < minDistSqr ) {
-			EdgeEdge_SetNewMin( X, Y, X, Y );
-		}
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	testOtherFaceDimA = 1;
-	testOtherFaceDimB = 0;
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setY( -signsA.getY() );
-	scalesA.setY( -scalesA.getY() );
-
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setY( -signsA.getY() );
-	scalesA.setY( -scalesA.getY() );
-
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, X, Y );
-	}
-
-	if ( done )
-		return;
-
-	testOtherFaceDimA = 0;
-	testOtherFaceDimB = 1;
-	signsB.setX( -signsB.getX() );
-	scalesB.setX( -scalesB.getX() );
-
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsB.setY( -signsB.getY() );
-	scalesB.setY( -scalesB.getY() );
-
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setX( -signsA.getX() );
-	scalesA.setX( -scalesA.getX() );
-
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( X, Y, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	testOtherFaceDimA = 1;
-	testOtherFaceDimB = 1;
-	signsB.setY( -signsB.getY() );
-	scalesB.setY( -scalesB.getY() );
-
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setY( -signsA.getY() );
-	scalesA.setY( -scalesA.getY() );
-
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsB.setY( -signsB.getY() );
-	scalesB.setY( -scalesB.getY() );
-
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, Y, X );
-	}
-
-	if ( done )
-		return;
-
-	signsA.setY( -signsA.getY() );
-	scalesA.setY( -scalesA.getY() );
-
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
-								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
-
-	if ( distSqr < minDistSqr ) {
-		EdgeEdge_SetNewMin( Y, X, Y, X );
-	}
-}
-
-float
-boxBoxDistance(
-	Vector3& normal,
-	BoxPoint& boxPointA,
-	BoxPoint& boxPointB,
-	PE_REF(Box) boxA, const Transform3& transformA,
-	PE_REF(Box) boxB, const Transform3& transformB,
-	float distanceThreshold )
-{
-	Matrix3 identity;
-	identity = Matrix3::identity();
-	Vector3 ident[3];
-	ident[0] = identity.getCol0();
-	ident[1] = identity.getCol1();
-	ident[2] = identity.getCol2();
-
-	// get relative transformations
-
-	Transform3 transformAB, transformBA;
-	Matrix3 matrixAB, matrixBA;
-	Vector3 offsetAB, offsetBA;
-
-	transformAB = orthoInverse(transformA) * transformB;
-	transformBA = orthoInverse(transformAB);
-
-	matrixAB = transformAB.getUpper3x3();
-	offsetAB = transformAB.getTranslation();
-	matrixBA = transformBA.getUpper3x3();
-	offsetBA = transformBA.getTranslation();
-
-	Matrix3 absMatrixAB = absPerElem(matrixAB);
-	Matrix3 absMatrixBA = absPerElem(matrixBA);
-
-	// find separating axis with largest gap between projections
-
-	BoxSepAxisType axisType;
-	Vector3 axisA(0.0f), axisB(0.0f);
-	float gap, maxGap;
-	int faceDimA = 0, faceDimB = 0, edgeDimA = 0, edgeDimB = 0;
-
-	// face axes
-
-	Vector3  gapsA   = absPerElem(offsetAB) - boxA.half - absMatrixAB * boxB.half;
-
-	AaxisTest(0,X,true);
-	AaxisTest(1,Y,false);
-	AaxisTest(2,Z,false);
-
-	Vector3  gapsB   = absPerElem(offsetBA) - boxB.half - absMatrixBA * boxA.half;
-
-	BaxisTest(0,X);
-	BaxisTest(1,Y);
-	BaxisTest(2,Z);
-
-	// cross product axes
-
-	// �O�ς��O�̂Ƃ��̑΍�
-	absMatrixAB += Matrix3(1.0e-5f);
-	absMatrixBA += Matrix3(1.0e-5f);
-
-	Matrix3 lsqrs, projOffset, projAhalf, projBhalf;
-
-	lsqrs.setCol0( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
-				   mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) );
-	lsqrs.setCol1( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
-				   mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
-	lsqrs.setCol2( mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) +
-				   mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
-
-	projOffset.setCol0(matrixBA.getCol1() * offsetAB.getZ() - matrixBA.getCol2() * offsetAB.getY());
-	projOffset.setCol1(matrixBA.getCol2() * offsetAB.getX() - matrixBA.getCol0() * offsetAB.getZ());
-	projOffset.setCol2(matrixBA.getCol0() * offsetAB.getY() - matrixBA.getCol1() * offsetAB.getX());
-
-	projAhalf.setCol0(absMatrixBA.getCol1() * boxA.half.getZ() + absMatrixBA.getCol2() * boxA.half.getY());
-	projAhalf.setCol1(absMatrixBA.getCol2() * boxA.half.getX() + absMatrixBA.getCol0() * boxA.half.getZ());
-	projAhalf.setCol2(absMatrixBA.getCol0() * boxA.half.getY() + absMatrixBA.getCol1() * boxA.half.getX());
-
-	projBhalf.setCol0(absMatrixAB.getCol1() * boxB.half.getZ() + absMatrixAB.getCol2() * boxB.half.getY());
-	projBhalf.setCol1(absMatrixAB.getCol2() * boxB.half.getX() + absMatrixAB.getCol0() * boxB.half.getZ());
-	projBhalf.setCol2(absMatrixAB.getCol0() * boxB.half.getY() + absMatrixAB.getCol1() * boxB.half.getX());
-
-	Matrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf);
-
-	CrossAxisTest(0,0,X);
-	CrossAxisTest(0,1,Y);
-	CrossAxisTest(0,2,Z);
-	CrossAxisTest(1,0,X);
-	CrossAxisTest(1,1,Y);
-	CrossAxisTest(1,2,Z);
-	CrossAxisTest(2,0,X);
-	CrossAxisTest(2,1,Y);
-	CrossAxisTest(2,2,Z);
-
-	// need to pick the face on each box whose normal best matches the separating axis.
-	// will transform vectors to be in the coordinate system of this face to simplify things later.
-	// for this, a permutation matrix can be used, which the next section computes.
-
-	int dimA[3], dimB[3];
-
-	if ( axisType == A_AXIS ) {
-		if ( dot(axisA,offsetAB) < 0.0f )
-			axisA = -axisA;
-		axisB = matrixBA * -axisA;
-
-		Vector3 absAxisB = Vector3(absPerElem(axisB));
-
-		if ( ( absAxisB[0] > absAxisB[1] ) && ( absAxisB[0] > absAxisB[2] ) )
-			faceDimB = 0;
-		else if ( absAxisB[1] > absAxisB[2] )
-			faceDimB = 1;
-		else
-			faceDimB = 2;
-	} else if ( axisType == B_AXIS ) {
-		if ( dot(axisB,offsetBA) < 0.0f )
-			axisB = -axisB;
-		axisA = matrixAB * -axisB;
-
-		Vector3 absAxisA = Vector3(absPerElem(axisA));
-
-		if ( ( absAxisA[0] > absAxisA[1] ) && ( absAxisA[0] > absAxisA[2] ) )
-			faceDimA = 0;
-		else if ( absAxisA[1] > absAxisA[2] )
-			faceDimA = 1;
-		else
-			faceDimA = 2;
-	}
-
-	if ( axisType == CROSS_AXIS ) {
-		if ( dot(axisA,offsetAB) < 0.0f )
-			axisA = -axisA;
-		axisB = matrixBA * -axisA;
-
-		Vector3 absAxisA = Vector3(absPerElem(axisA));
-		Vector3 absAxisB = Vector3(absPerElem(axisB));
-
-		dimA[1] = edgeDimA;
-		dimB[1] = edgeDimB;
-
-		if ( edgeDimA == 0 ) {
-			if ( absAxisA[1] > absAxisA[2] ) {
-				dimA[0] = 2;
-				dimA[2] = 1;
-			} else                             {
-				dimA[0] = 1;
-				dimA[2] = 2;
-			}
-		} else if ( edgeDimA == 1 ) {
-			if ( absAxisA[2] > absAxisA[0] ) {
-				dimA[0] = 0;
-				dimA[2] = 2;
-			} else                             {
-				dimA[0] = 2;
-				dimA[2] = 0;
-			}
-		} else {
-			if ( absAxisA[0] > absAxisA[1] ) {
-				dimA[0] = 1;
-				dimA[2] = 0;
-			} else                             {
-				dimA[0] = 0;
-				dimA[2] = 1;
-			}
-		}
-
-		if ( edgeDimB == 0 ) {
-			if ( absAxisB[1] > absAxisB[2] ) {
-				dimB[0] = 2;
-				dimB[2] = 1;
-			} else                             {
-				dimB[0] = 1;
-				dimB[2] = 2;
-			}
-		} else if ( edgeDimB == 1 ) {
-			if ( absAxisB[2] > absAxisB[0] ) {
-				dimB[0] = 0;
-				dimB[2] = 2;
-			} else                             {
-				dimB[0] = 2;
-				dimB[2] = 0;
-			}
-		} else {
-			if ( absAxisB[0] > absAxisB[1] ) {
-				dimB[0] = 1;
-				dimB[2] = 0;
-			} else                             {
-				dimB[0] = 0;
-				dimB[2] = 1;
-			}
-		}
-	} else {
-		dimA[2] = faceDimA;
-		dimA[0] = (faceDimA+1)%3;
-		dimA[1] = (faceDimA+2)%3;
-		dimB[2] = faceDimB;
-		dimB[0] = (faceDimB+1)%3;
-		dimB[1] = (faceDimB+2)%3;
-	}
-
-	Matrix3 aperm_col, bperm_col;
-
-	aperm_col.setCol0(ident[dimA[0]]);
-	aperm_col.setCol1(ident[dimA[1]]);
-	aperm_col.setCol2(ident[dimA[2]]);
-
-	bperm_col.setCol0(ident[dimB[0]]);
-	bperm_col.setCol1(ident[dimB[1]]);
-	bperm_col.setCol2(ident[dimB[2]]);
-
-	Matrix3 aperm_row, bperm_row;
-
-	aperm_row = transpose(aperm_col);
-	bperm_row = transpose(bperm_col);
-
-	// permute all box parameters to be in the face coordinate systems
-
-	Matrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col;
-	Matrix3 matrixBA_perm = transpose(matrixAB_perm);
-
-	Vector3 offsetAB_perm, offsetBA_perm;
-
-	offsetAB_perm = aperm_row * offsetAB;
-	offsetBA_perm = bperm_row * offsetBA;
-
-	Vector3 halfA_perm, halfB_perm;
-
-	halfA_perm = aperm_row * boxA.half;
-	halfB_perm = bperm_row * boxB.half;
-
-	// compute the vector between the centers of each face, in each face's coordinate frame
-
-	Vector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm;
-
-	signsA_perm = copySignPerElem(Vector3(1.0f),aperm_row * axisA);
-	signsB_perm = copySignPerElem(Vector3(1.0f),bperm_row * axisB);
-	scalesA_perm = mulPerElem( signsA_perm, halfA_perm );
-	scalesB_perm = mulPerElem( signsB_perm, halfB_perm );
-
-	faceOffsetAB_perm = offsetAB_perm + matrixAB_perm.getCol2() * scalesB_perm.getZ();
-	faceOffsetAB_perm.setZ( faceOffsetAB_perm.getZ() - scalesA_perm.getZ() );
-
-	faceOffsetBA_perm = offsetBA_perm + matrixBA_perm.getCol2() * scalesA_perm.getZ();
-	faceOffsetBA_perm.setZ( faceOffsetBA_perm.getZ() - scalesB_perm.getZ() );
-
-	if ( maxGap < 0.0f ) {
-		// if boxes overlap, this will separate the faces for finding points of penetration.
-
-		faceOffsetAB_perm -= aperm_row * axisA * maxGap * 1.01f;
-		faceOffsetBA_perm -= bperm_row * axisB * maxGap * 1.01f;
-	}
-
-	// for each vertex/face or edge/edge pair of the two faces, find the closest points.
-	//
-	// these points each have an associated box feature (vertex, edge, or face).  if each
-	// point is in the external Voronoi region of the other's feature, they are the
-	// closest points of the boxes, and the algorithm can exit.
-	//
-	// the feature pairs are arranged so that in the general case, the first test will
-	// succeed.  degenerate cases (parallel faces) may require up to all tests in the
-	// worst case.
-	//
-	// if for some reason no case passes the Voronoi test, the features with the minimum
-	// distance are returned.
-
-	Point3 localPointA_perm, localPointB_perm;
-	float minDistSqr;
-	bool done;
-
-	Vector3 hA_perm( halfA_perm ), hB_perm( halfB_perm );
-
-	localPointA_perm.setZ( scalesA_perm.getZ() );
-	localPointB_perm.setZ( scalesB_perm.getZ() );
-	scalesA_perm.setZ(0.0f);
-	scalesB_perm.setZ(0.0f);
-
-	int otherFaceDimA, otherFaceDimB;
-	FeatureType featureA, featureB;
-
-	if ( axisType == CROSS_AXIS ) {
-		EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-					   otherFaceDimA, otherFaceDimB, featureA, featureB,
-					   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-					   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
-					   scalesA_perm, scalesB_perm, true );
-
-		if ( !done ) {
-			VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
-							   featureA, featureB,
-							   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-							   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
-
-			if ( !done ) {
-				VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-								   featureA, featureB,
-								   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-								   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
-			}
-		}
-	} else if ( axisType == B_AXIS ) {
-		VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-						   featureA, featureB,
-						   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-						   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, true );
-
-		if ( !done ) {
-			VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
-							   featureA, featureB,
-							   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-							   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
-
-			if ( !done ) {
-				EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-							   otherFaceDimA, otherFaceDimB, featureA, featureB,
-							   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-							   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
-							   scalesA_perm, scalesB_perm, false );
-			}
-		}
-	} else {
-		VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
-						   featureA, featureB,
-						   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-						   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, true );
-
-		if ( !done ) {
-			VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-							   featureA, featureB,
-							   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-							   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
-
-			if ( !done ) {
-				EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
-							   otherFaceDimA, otherFaceDimB, featureA, featureB,
-							   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
-							   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
-							   scalesA_perm, scalesB_perm, false );
-			}
-		}
-	}
-
-	// convert local points from face-local to box-local coordinate system
-
-	boxPointA.localPoint = Point3( aperm_col * Vector3( localPointA_perm ) );
-	boxPointB.localPoint = Point3( bperm_col * Vector3( localPointB_perm ) );
-
-	// find which features of the boxes are involved.
-	// the only feature pairs which occur in this function are VF, FV, and EE, even though the
-	// closest points might actually lie on sub-features, as in a VF contact might be used for
-	// what's actually a VV contact.  this means some feature pairs could possibly seem distinct
-	// from others, although their contact positions are the same.  don't know yet whether this
-	// matters.
-
-	int sA[3], sB[3];
-
-	sA[0] = boxPointA.localPoint.getX() > 0.0f;
-	sA[1] = boxPointA.localPoint.getY() > 0.0f;
-	sA[2] = boxPointA.localPoint.getZ() > 0.0f;
-
-	sB[0] = boxPointB.localPoint.getX() > 0.0f;
-	sB[1] = boxPointB.localPoint.getY() > 0.0f;
-	sB[2] = boxPointB.localPoint.getZ() > 0.0f;
-
-	if ( featureA == F ) {
-		boxPointA.setFaceFeature( dimA[2], sA[dimA[2]] );
-	} else if ( featureA == E ) {
-		boxPointA.setEdgeFeature( dimA[2], sA[dimA[2]], dimA[otherFaceDimA], sA[dimA[otherFaceDimA]] );
-	} else {
-		boxPointA.setVertexFeature( sA[0], sA[1], sA[2] );
-	}
-
-	if ( featureB == F ) {
-		boxPointB.setFaceFeature( dimB[2], sB[dimB[2]] );
-	} else if ( featureB == E ) {
-		boxPointB.setEdgeFeature( dimB[2], sB[dimB[2]], dimB[otherFaceDimB], sB[dimB[otherFaceDimB]] );
-	} else {
-		boxPointB.setVertexFeature( sB[0], sB[1], sB[2] );
-	}
-
-	normal = transformA * axisA;
-
-	if ( maxGap < 0.0f ) {
-		return (maxGap);
-	} else {
-		return (sqrtf( minDistSqr ));
-	}
-}
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
deleted file mode 100644
index c58e257c026..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
-   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#ifndef __BOXBOXDISTANCE_H__
-#define __BOXBOXDISTANCE_H__
-
-
-#include "Box.h"
-
-using namespace Vectormath::Aos;
-
-//---------------------------------------------------------------------------
-// boxBoxDistance:
-//
-// description:
-//    this computes info that can be used for the collision response of two boxes.  when the boxes
-//    do not overlap, the points are set to the closest points of the boxes, and a positive
-//    distance between them is returned.  if the boxes do overlap, a negative distance is returned
-//    and the points are set to two points that would touch after the boxes are translated apart.
-//    the contact normal gives the direction to repel or separate the boxes when they touch or
-//    overlap (it's being approximated here as one of the 15 "separating axis" directions).
-//
-// returns:
-//    positive or negative distance between two boxes.
-//
-// args:
-//    Vector3& normal: set to a unit contact normal pointing from box A to box B.
-//
-//    BoxPoint& boxPointA, BoxPoint& boxPointB:
-//       set to a closest point or point of penetration on each box.
-//
-//    Box boxA, Box boxB:
-//       boxes, represented as 3 half-widths
-//
-//    const Transform3& transformA, const Transform3& transformB:
-//       box transformations, in world coordinates
-//
-//    float distanceThreshold:
-//       the algorithm will exit early if it finds that the boxes are more distant than this
-//       threshold, and not compute a contact normal or points.  if this distance returned
-//       exceeds the threshold, all the other output data may not have been computed.  by
-//       default, this is set to MAX_FLOAT so it will have no effect.
-//
-//---------------------------------------------------------------------------
-
-float
-boxBoxDistance(Vector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
-			   PE_REF(Box) boxA, const Transform3 & transformA, PE_REF(Box) boxB,
-			   const Transform3 & transformB,
-			   float distanceThreshold = FLT_MAX );
-
-#endif /* __BOXBOXDISTANCE_H__ */
diff --git a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt
deleted file mode 100644
index 5b4a907058f..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/readme.txt
+++ /dev/null
@@ -1 +0,0 @@
-Empty placeholder for future Libspe2 SPU task
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
deleted file mode 100644
index fe61955572f..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#include "SpuSampleTask.h"
-#include "BulletDynamics/Dynamics/btRigidBody.h"
-#include "../PlatformDefinitions.h"
-#include "../SpuFakeDma.h"
-#include "LinearMath/btMinMax.h"
-
-#ifdef __SPU__
-#include <spu_printf.h>
-#else
-#include <stdio.h>
-#define spu_printf printf
-#endif
-
-#define MAX_NUM_BODIES 8192
-
-struct SampleTask_LocalStoreMemory
-{
-	ATTRIBUTE_ALIGNED16(char gLocalRigidBody [sizeof(btRigidBody)+16]);
-	ATTRIBUTE_ALIGNED16(void* gPointerArray[MAX_NUM_BODIES]);
-
-};
-
-
-
-
-//-- MAIN METHOD
-void processSampleTask(void* userPtr, void* lsMemory)
-{
-	//	BT_PROFILE("processSampleTask");
-
-	SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory;
-
-	SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr;
-	SpuSampleTaskDesc& taskDesc = *taskDescPtr;
-
-	switch (taskDesc.m_sampleCommand)
-	{
-	case CMD_SAMPLE_INTEGRATE_BODIES:
-		{
-			btTransform predictedTrans;
-			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
-
-			int batchSize = taskDesc.m_sampleValue;
-			if (batchSize>MAX_NUM_BODIES)
-			{
-				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
-				break;
-			}
-			int dmaArraySize = batchSize*sizeof(void*);
-
-			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
-
-			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
-
-			if (dmaArraySize>=16)
-			{
-				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
-				cellDmaWaitTagStatusAll(DMA_MASK(1));
-			} else
-			{
-				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
-			}
-
-
-			for ( int i=0;i<batchSize;i++)
-			{
-				///DMA rigid body
-
-				void* localPtr = &localMemory->gLocalRigidBody[0];
-				void* shortAdd = localMemory->gPointerArray[i];
-				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
-
-				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
-
-				int dmaBodySize = sizeof(btRigidBody);
-
-				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
-				cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-
-				float timeStep = 1.f/60.f;
-
-				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
-				if (body)
-				{
-					if (body->isActive() && (!body->isStaticOrKinematicObject()))
-					{
-						body->predictIntegratedTransform(timeStep, predictedTrans);
-						body->proceedToTransform( predictedTrans);
-						void* ptr = (void*)localPtr;
-						//	spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress);
-
-						cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
-						cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-					}
-				}
-
-			}
-			break;
-		}
-
-
-	case CMD_SAMPLE_PREDICT_MOTION_BODIES:
-		{
-			btTransform predictedTrans;
-			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
-
-			int batchSize = taskDesc.m_sampleValue;
-			int dmaArraySize = batchSize*sizeof(void*);
-
-			if (batchSize>MAX_NUM_BODIES)
-			{
-				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
-				break;
-			}
-
-			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
-
-			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
-
-			if (dmaArraySize>=16)
-			{
-				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
-				cellDmaWaitTagStatusAll(DMA_MASK(1));
-			} else
-			{
-				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
-			}
-
-
-			for ( int i=0;i<batchSize;i++)
-			{
-				///DMA rigid body
-
-				void* localPtr = &localMemory->gLocalRigidBody[0];
-				void* shortAdd = localMemory->gPointerArray[i];
-				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
-
-				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
-
-				int dmaBodySize = sizeof(btRigidBody);
-
-				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
-				cellDmaWaitTagStatusAll(DMA_MASK(1));
-
-
-				float timeStep = 1.f/60.f;
-
-				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
-				if (body)
-				{
-					if (!body->isStaticOrKinematicObject())
-					{
-						if (body->isActive())
-						{
-							body->integrateVelocities( timeStep);
-							//damping
-							body->applyDamping(timeStep);
-
-							body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
-
-							void* ptr = (void*)localPtr;
-							cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
-							cellDmaWaitTagStatusAll(DMA_MASK(1));
-						}
-					}
-				}
-
-			}
-			break;
-		}
-	
-
-
-	default:
-		{
-
-		}
-	};
-}
-
-
-#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
-
-ATTRIBUTE_ALIGNED16(SampleTask_LocalStoreMemory	gLocalStoreMemory);
-
-void* createSampleLocalStoreMemory()
-{
-	return &gLocalStoreMemory;
-}
-#else
-void* createSampleLocalStoreMemory()
-{
-	return new SampleTask_LocalStoreMemory;
-};
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
deleted file mode 100644
index c8ebdfd6232..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef SPU_SAMPLE_TASK_H
-#define SPU_SAMPLE_TASK_H
-
-#include "../PlatformDefinitions.h"
-#include "LinearMath/btScalar.h"
-#include "LinearMath/btVector3.h"
-#include "LinearMath/btMatrix3x3.h"
-
-#include "LinearMath/btAlignedAllocator.h"
-
-
-enum
-{
-	CMD_SAMPLE_INTEGRATE_BODIES = 1,
-	CMD_SAMPLE_PREDICT_MOTION_BODIES
-};
-
-
-
-ATTRIBUTE_ALIGNED16(struct) SpuSampleTaskDesc
-{
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	uint32_t						m_sampleCommand;
-	uint32_t						m_taskId;
-
-	uint64_t 	m_mainMemoryPtr;
-	int			m_sampleValue;
-	
-
-};
-
-
-void	processSampleTask(void* userPtr, void* lsMemory);
-void*	createSampleLocalStoreMemory();
-
-
-#endif //SPU_SAMPLE_TASK_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt b/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt
deleted file mode 100644
index 5b4a907058f..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSampleTask/readme.txt
+++ /dev/null
@@ -1 +0,0 @@
-Empty placeholder for future Libspe2 SPU task
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp
deleted file mode 100644
index 11cb9e7c3f5..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//#define __CELLOS_LV2__ 1
-
-#define USE_SAMPLE_PROCESS 1
-#ifdef USE_SAMPLE_PROCESS
-
-
-#include "SpuSampleTaskProcess.h"
-#include <stdio.h>
-
-#ifdef __SPU__
-
-
-
-void	SampleThreadFunc(void* userPtr,void* lsMemory)
-{
-	//do nothing
-	printf("hello world\n");
-}
-
-
-void*	SamplelsMemoryFunc()
-{
-	//don't create local store memory, just return 0
-	return 0;
-}
-
-
-#else
-
-
-#include "btThreadSupportInterface.h"
-
-//#	include "SPUAssert.h"
-#include <string.h>
-
-
-
-extern "C" {
-	extern char SPU_SAMPLE_ELF_SYMBOL[];
-}
-
-
-
-
-
-SpuSampleTaskProcess::SpuSampleTaskProcess(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
-:m_threadInterface(threadInterface),
-m_maxNumOutstandingTasks(maxNumOutstandingTasks)
-{
-
-	m_taskBusy.resize(m_maxNumOutstandingTasks);
-	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
-
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-
-	m_initialized = false;
-
-	m_threadInterface->startSPU();
-
-
-}
-
-SpuSampleTaskProcess::~SpuSampleTaskProcess()
-{
-	m_threadInterface->stopSPU();
-	
-}
-
-
-
-void	SpuSampleTaskProcess::initialize()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("SpuSampleTaskProcess::initialize()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-	m_initialized = true;
-
-}
-
-
-void SpuSampleTaskProcess::issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand)
-{
-
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("SpuSampleTaskProcess::issueTask (m_currentTask= %d\)n", m_currentTask);
-#endif //DEBUG_SPU_TASK_SCHEDULING
-
-	m_taskBusy[m_currentTask] = true;
-	m_numBusyTasks++;
-
-	SpuSampleTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
-	{
-		// send task description in event message
-		// no error checking here...
-		// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
-	
-		taskDesc.m_mainMemoryPtr = reinterpret_cast<uint64_t>(sampleMainMemPtr);
-		taskDesc.m_sampleValue = sampleValue;
-		taskDesc.m_sampleCommand = sampleCommand;
-
-		//some bookkeeping to recognize finished tasks
-		taskDesc.m_taskId = m_currentTask;
-	}
-
-
-	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
-
-	// if all tasks busy, wait for spu event to clear the task.
-	
-	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
-	{
-		unsigned int taskId;
-		unsigned int outputSize;
-
-		for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-		m_threadInterface->waitForResponse(&taskId, &outputSize);
-
-		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-	// find new task buffer
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		if (!m_taskBusy[i])
-		{
-			m_currentTask = i;
-			break;
-		}
-	}
-}
-
-
-///Optional PPU-size post processing for each task
-void SpuSampleTaskProcess::postProcess(int taskId, int outputSize)
-{
-
-}
-
-
-void SpuSampleTaskProcess::flush()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("\nSpuCollisionTaskProcess::flush()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-
-	// all tasks are issued, wait for all tasks to be complete
-	while(m_numBusyTasks > 0)
-	{
-// Consolidating SPU code
-	  unsigned int taskId;
-	  unsigned int outputSize;
-	  
-	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-	  {
-			
-		  m_threadInterface->waitForResponse(&taskId, &outputSize);
-	  }
-
-		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-
-}
-
-#endif
-
-
-#endif //USE_SAMPLE_PROCESS
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h b/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h
deleted file mode 100644
index d733a9a8528..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSampleTaskProcess.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SPU_SAMPLE_TASK_PROCESS_H
-#define SPU_SAMPLE_TASK_PROCESS_H
-
-#include <assert.h>
-
-
-#include "PlatformDefinitions.h"
-
-#include <stdlib.h>
-
-#include "LinearMath/btAlignedObjectArray.h"
-
-
-#include "SpuSampleTask/SpuSampleTask.h"
-
-
-//just add your commands here, try to keep them globally unique for debugging purposes
-#define CMD_SAMPLE_TASK_COMMAND 10
-
-
-
-/// SpuSampleTaskProcess handles SPU processing of collision pairs.
-/// When PPU issues a task, it will look for completed task buffers
-/// PPU will do postprocessing, dependent on workunit output (not likely)
-class SpuSampleTaskProcess
-{
-	// track task buffers that are being used, and total busy tasks
-	btAlignedObjectArray<bool>	m_taskBusy;
-	btAlignedObjectArray<SpuSampleTaskDesc>m_spuSampleTaskDesc;
-	
-	int   m_numBusyTasks;
-
-	// the current task and the current entry to insert a new work unit
-	int   m_currentTask;
-
-	bool m_initialized;
-
-	void postProcess(int taskId, int outputSize);
-	
-	class	btThreadSupportInterface*	m_threadInterface;
-
-	int	m_maxNumOutstandingTasks;
-
-
-
-public:
-	SpuSampleTaskProcess(btThreadSupportInterface*	threadInterface, int maxNumOutstandingTasks);
-	
-	~SpuSampleTaskProcess();
-	
-	///call initialize in the beginning of the frame, before addCollisionPairToTask
-	void initialize();
-
-	void issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand);
-
-	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
-	void flush();
-};
-
-
-#if defined(USE_LIBSPE2) && defined(__SPU__)
-////////////////////MAIN/////////////////////////////
-#include "../SpuLibspe2Support.h"
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-#include <SpuFakeDma.h>
-
-void * SamplelsMemoryFunc();
-void SampleThreadFunc(void* userPtr,void* lsMemory);
-
-//#define DEBUG_LIBSPE2_MAINLOOP
-
-int main(unsigned long long speid, addr64 argp, addr64 envp)
-{
-	printf("SPU is up \n");
-	
-	ATTRIBUTE_ALIGNED128(btSpuStatus status);
-	ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
-	unsigned int received_message = Spu_Mailbox_Event_Nothing;
-        bool shutdown = false;
-
-	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-
-	status.m_status = Spu_Status_Free;
-	status.m_lsMemory.p = SamplelsMemoryFunc();
-
-	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-	cellDmaWaitTagStatusAll(DMA_MASK(3));
-	
-	
-	while (!shutdown)
-	{
-		received_message = spu_read_in_mbox();
-		
-
-		
-		switch(received_message)
-		{
-		case Spu_Mailbox_Event_Shutdown:
-			shutdown = true;
-			break; 
-		case Spu_Mailbox_Event_Task:
-			// refresh the status
-#ifdef DEBUG_LIBSPE2_MAINLOOP
-			printf("SPU recieved Task \n");
-#endif //DEBUG_LIBSPE2_MAINLOOP
-			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-		
-			btAssert(status.m_status==Spu_Status_Occupied);
-			
-			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
-			cellDmaWaitTagStatusAll(DMA_MASK(3));
-			
-			SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
-			break;
-		case Spu_Mailbox_Event_Nothing:
-		default:
-			break;
-		}
-
-		// set to status free and wait for next task
-		status.m_status = Spu_Status_Free;
-		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
-		cellDmaWaitTagStatusAll(DMA_MASK(3));		
-				
-		
-  	}
-  	return 0;
-}
-//////////////////////////////////////////////////////
-#endif
-
-
-
-#endif // SPU_SAMPLE_TASK_PROCESS_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/SpuSync.h b/extern/bullet2/BulletMultiThreaded/SpuSync.h
deleted file mode 100644
index b90d0fcbfd4..00000000000
--- a/extern/bullet2/BulletMultiThreaded/SpuSync.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2007 Starbreeze Studios
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-Written by: Marten Svanfeldt
-*/
-
-#ifndef SPU_SYNC_H
-#define	SPU_SYNC_H
-
-
-#include "PlatformDefinitions.h"
-
-
-#if defined(WIN32)
-
-#define WIN32_LEAN_AND_MEAN
-#ifdef _XBOX
-#include <Xtl.h>
-#else
-#include <Windows.h>
-#endif
-
-///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
-class btSpinlock
-{
-public:
-	//typedef volatile LONG SpinVariable;
-	typedef CRITICAL_SECTION SpinVariable;
-
-	btSpinlock (SpinVariable* var)
-		: spinVariable (var)
-	{}
-
-	void Init ()
-	{
-		//*spinVariable = 0;
-		InitializeCriticalSection(spinVariable);
-	}
-
-	void Lock ()
-	{
-		EnterCriticalSection(spinVariable);
-	}
-
-	void Unlock ()
-	{
-		LeaveCriticalSection(spinVariable);
-	}
-
-private:
-	SpinVariable* spinVariable;
-};
-
-
-#elif defined (__CELLOS_LV2__)
-
-//#include <cell/atomic.h>
-#include <cell/sync/mutex.h>
-
-///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
-class btSpinlock
-{
-public:
-	typedef CellSyncMutex SpinVariable;
-
-	btSpinlock (SpinVariable* var)
-		: spinVariable (var)
-	{}
-
-	void Init ()
-	{
-#ifndef __SPU__
-		//*spinVariable = 1;
-		cellSyncMutexInitialize(spinVariable);
-#endif
-	}
-
-
-
-	void Lock ()
-	{
-#ifdef __SPU__
-		// lock semaphore
-		/*while (cellAtomicTestAndDecr32(atomic_buf, (uint64_t)spinVariable) == 0) 
-		{
-
-		};*/
-		cellSyncMutexLock((uint64_t)spinVariable);
-#endif
-	}
-
-	void Unlock ()
-	{
-#ifdef __SPU__
-		//cellAtomicIncr32(atomic_buf, (uint64_t)spinVariable);
-		cellSyncMutexUnlock((uint64_t)spinVariable);
-#endif 
-	}
-
-
-private:
-	SpinVariable*	spinVariable;
-	ATTRIBUTE_ALIGNED128(uint32_t		atomic_buf[32]);
-};
-
-#else
-//create a dummy implementation (without any locking) useful for serial processing
-class btSpinlock
-{
-public:
-	typedef int  SpinVariable;
-
-	btSpinlock (SpinVariable* var)
-		: spinVariable (var)
-	{}
-
-	void Init ()
-	{
-	}
-
-	void Lock ()
-	{
-	}
-
-	void Unlock ()
-	{
-	}
-
-private:
-	SpinVariable* spinVariable;
-};
-
-
-#endif
-
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp
deleted file mode 100644
index 42b60a460e0..00000000000
--- a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "Win32ThreadSupport.h"
-
-#ifdef USE_WIN32_THREADING
-
-#include <windows.h>
-
-#include "SpuCollisionTaskProcess.h"
-
-#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
-
-
-
-///The number of threads should be equal to the number of available cores
-///@todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
-
-///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-///Setup and initialize SPU/CELL/Libspe2
-Win32ThreadSupport::Win32ThreadSupport(const Win32ThreadConstructionInfo & threadConstructionInfo)
-{
-	m_maxNumTasks = threadConstructionInfo.m_numThreads;
-	startThreads(threadConstructionInfo);
-}
-
-///cleanup/shutdown Libspe2
-Win32ThreadSupport::~Win32ThreadSupport()
-{
-	stopSPU();
-}
-
-
-
-
-#include <stdio.h>
-
-DWORD WINAPI Thread_no_1( LPVOID lpParam ) 
-{
-
-	Win32ThreadSupport::btSpuStatus* status = (Win32ThreadSupport::btSpuStatus*)lpParam;
-
-	
-	while (1)
-	{
-		WaitForSingleObject(status->m_eventStartHandle,INFINITE);
-		
-		void* userPtr = status->m_userPtr;
-
-		if (userPtr)
-		{
-			btAssert(status->m_status);
-			status->m_userThreadFunc(userPtr,status->m_lsMemory);
-			status->m_status = 2;
-			SetEvent(status->m_eventCompletetHandle);
-		} else
-		{
-			//exit Thread
-			status->m_status = 3;
-			SetEvent(status->m_eventCompletetHandle);
-			printf("Thread with taskId %i with handle %p exiting\n",status->m_taskId, status->m_threadHandle);
-			break;
-		}
-		
-	}
-
-	printf("Thread TERMINATED\n");
-	return 0;
-
-}
-
-///send messages to SPUs
-void Win32ThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
-{
-	///	gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc);
-	
-	///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
-	
-
-
-	switch (uiCommand)
-	{
-	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
-		{
-
-
-//#define SINGLE_THREADED 1
-#ifdef SINGLE_THREADED
-
-			btSpuStatus&	spuStatus = m_activeSpuStatus[0];
-			spuStatus.m_userPtr=(void*)uiArgument0;
-			spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
-			HANDLE handle =0;
-#else
-
-
-			btSpuStatus&	spuStatus = m_activeSpuStatus[taskId];
-			btAssert(taskId>=0);
-			btAssert(int(taskId)<m_activeSpuStatus.size());
-
-			spuStatus.m_commandId = uiCommand;
-			spuStatus.m_status = 1;
-			spuStatus.m_userPtr = (void*)uiArgument0;
-
-			///fire event to start new task
-			SetEvent(spuStatus.m_eventStartHandle);
-
-#endif //CollisionTask_LocalStoreMemory
-
-			
-
-			break;
-		}
-	default:
-		{
-			///not implemented
-			btAssert(0);
-		}
-
-	};
-
-
-}
-
-
-///check for messages from SPUs
-void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
-{
-	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
-	
-	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
-
-
-	btAssert(m_activeSpuStatus.size());
-
-	int last = -1;
-#ifndef SINGLE_THREADED
-	DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
-	btAssert(res != WAIT_FAILED);
-	last = res - WAIT_OBJECT_0;
-
-	btSpuStatus& spuStatus = m_activeSpuStatus[last];
-	btAssert(spuStatus.m_threadHandle);
-	btAssert(spuStatus.m_eventCompletetHandle);
-
-	//WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
-	btAssert(spuStatus.m_status > 1);
-	spuStatus.m_status = 0;
-
-	///need to find an active spu
-	btAssert(last>=0);
-
-#else
-	last=0;
-	btSpuStatus& spuStatus = m_activeSpuStatus[last];
-#endif //SINGLE_THREADED
-
-	
-
-	*puiArgument0 = spuStatus.m_taskId;
-	*puiArgument1 = spuStatus.m_status;
-
-
-}
-
-
-
-void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo)
-{
-
-	m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
-	m_completeHandles.resize(threadConstructionInfo.m_numThreads);
-
-	m_maxNumTasks = threadConstructionInfo.m_numThreads;
-
-	for (int i=0;i<threadConstructionInfo.m_numThreads;i++)
-	{
-		printf("starting thread %d\n",i);
-
-		btSpuStatus&	spuStatus = m_activeSpuStatus[i];
-
-		LPSECURITY_ATTRIBUTES lpThreadAttributes=NULL;
-		SIZE_T dwStackSize=threadConstructionInfo.m_threadStackSize;
-		LPTHREAD_START_ROUTINE lpStartAddress=&Thread_no_1;
-		LPVOID lpParameter=&spuStatus;
-		DWORD dwCreationFlags=0;
-		LPDWORD lpThreadId=0;
-
-		spuStatus.m_userPtr=0;
-
-		sprintf(spuStatus.m_eventStartHandleName,"eventStart%s%d",threadConstructionInfo.m_uniqueName,i);
-		spuStatus.m_eventStartHandle = CreateEventA (0,false,false,spuStatus.m_eventStartHandleName);
-
-		sprintf(spuStatus.m_eventCompletetHandleName,"eventComplete%s%d",threadConstructionInfo.m_uniqueName,i);
-		spuStatus.m_eventCompletetHandle = CreateEventA (0,false,false,spuStatus.m_eventCompletetHandleName);
-
-		m_completeHandles[i] = spuStatus.m_eventCompletetHandle;
-
-		HANDLE handle = CreateThread(lpThreadAttributes,dwStackSize,lpStartAddress,lpParameter,	dwCreationFlags,lpThreadId);
-		SetThreadPriority(handle,THREAD_PRIORITY_HIGHEST);
-		//SetThreadPriority(handle,THREAD_PRIORITY_TIME_CRITICAL);
-
-		SetThreadAffinityMask(handle, 1<<i);
-
-		spuStatus.m_taskId = i;
-		spuStatus.m_commandId = 0;
-		spuStatus.m_status = 0;
-		spuStatus.m_threadHandle = handle;
-		spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
-		spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
-
-		printf("started thread %d with threadHandle %p\n",i,handle);
-		
-	}
-
-}
-
-void Win32ThreadSupport::startSPU()
-{
-}
-
-
-///tell the task scheduler we are done with the SPU tasks
-void Win32ThreadSupport::stopSPU()
-{
-	int i;
-	for (i=0;i<m_activeSpuStatus.size();i++)
-	{
-		btSpuStatus& spuStatus = m_activeSpuStatus[i];
-		if (spuStatus.m_status>0)
-		{
-			WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
-		}
-		
-
-		spuStatus.m_userPtr = 0;
-		SetEvent(spuStatus.m_eventStartHandle);
-		WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
-
-		CloseHandle(spuStatus.m_eventCompletetHandle);
-		CloseHandle(spuStatus.m_eventStartHandle);
-		CloseHandle(spuStatus.m_threadHandle);
-	}
-
-	m_activeSpuStatus.clear();
-	m_completeHandles.clear();
-
-}
-
-#endif //USE_WIN32_THREADING
diff --git a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h b/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h
deleted file mode 100644
index c61ad901c07..00000000000
--- a/extern/bullet2/BulletMultiThreaded/Win32ThreadSupport.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "LinearMath/btScalar.h"
-#include "PlatformDefinitions.h"
-
-#ifdef USE_WIN32_THREADING  //platform specific defines are defined in PlatformDefinitions.h
-
-#ifndef WIN32_THREAD_SUPPORT_H
-#define WIN32_THREAD_SUPPORT_H
-
-#include "LinearMath/btAlignedObjectArray.h"
-
-#include "btThreadSupportInterface.h"
-
-
-typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory);
-typedef void* (*Win32lsMemorySetupFunc)();
-
-
-
-
-
-
-///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
-class Win32ThreadSupport : public btThreadSupportInterface 
-{
-public:
-	///placeholder, until libspe2 support is there
-	struct	btSpuStatus
-	{
-		uint32_t	m_taskId;
-		uint32_t	m_commandId;
-		uint32_t	m_status;
-
-		Win32ThreadFunc	m_userThreadFunc;
-		void*	m_userPtr; //for taskDesc etc
-		void*	m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc
-
-		void*	m_threadHandle; //this one is calling 'Win32ThreadFunc'
-
-		void*	m_eventStartHandle;
-		char	m_eventStartHandleName[32];
-
-		void*	m_eventCompletetHandle;
-		char	m_eventCompletetHandleName[32];
-		
-
-	};
-private:
-
-	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
-	btAlignedObjectArray<void*>			m_completeHandles;
-	
-	int m_maxNumTasks;
-public:
-	///Setup and initialize SPU/CELL/Libspe2
-
-	struct	Win32ThreadConstructionInfo
-	{
-		Win32ThreadConstructionInfo(char* uniqueName,
-									Win32ThreadFunc userThreadFunc,
-									Win32lsMemorySetupFunc	lsMemoryFunc,
-									int numThreads=1,
-									int threadStackSize=65535
-									)
-									:m_uniqueName(uniqueName),
-									m_userThreadFunc(userThreadFunc),
-									m_lsMemoryFunc(lsMemoryFunc),
-									m_numThreads(numThreads),
-									m_threadStackSize(threadStackSize)
-		{
-
-		}
-
-		char*					m_uniqueName;
-		Win32ThreadFunc			m_userThreadFunc;
-		Win32lsMemorySetupFunc	m_lsMemoryFunc;
-		int						m_numThreads;
-		int						m_threadStackSize;
-
-	};
-
-
-
-	Win32ThreadSupport(const Win32ThreadConstructionInfo& threadConstructionInfo);
-
-///cleanup/shutdown Libspe2
-	virtual	~Win32ThreadSupport();
-
-	void	startThreads(const Win32ThreadConstructionInfo&	threadInfo);
-
-
-///send messages to SPUs
-	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
-
-///check for messages from SPUs
-	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
-
-///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	virtual	void startSPU();
-
-///tell the task scheduler we are done with the SPU tasks
-	virtual	void stopSPU();
-
-	virtual	void	setNumTasks(int numTasks)
-	{
-		m_maxNumTasks = numTasks;
-	}
-
-	virtual int getNumTasks() const
-	{
-		return m_maxNumTasks;
-	}
-
-};
-
-#endif //WIN32_THREAD_SUPPORT_H
-
-#endif //USE_WIN32_THREADING
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
deleted file mode 100644
index 84a5e59f0af..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
-#include "BulletMultiThreaded/btGpuDefines.h"
-#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h"
-#include "BulletMultiThreaded/btGpuUtilsSharedCode.h"
-
-
-
-#include "LinearMath/btAlignedAllocator.h"
-#include "LinearMath/btQuickprof.h"
-#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
-
-
-
-#include "btGpuDefines.h"
-#include "btGpuUtilsSharedDefs.h"
-
-#include "btGpu3DGridBroadphaseSharedDefs.h"
-
-#include "btGpu3DGridBroadphase.h"
-#include <string.h> //for memset
-
-
-#include <stdio.h>
-
-
-
-static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
-
-
-
-btGpu3DGridBroadphase::btGpu3DGridBroadphase(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-										int gridSizeX, int gridSizeY, int gridSizeZ, 
-										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-										int maxBodiesPerCell,
-										btScalar cellFactorAABB) :
-	btSimpleBroadphase(maxSmallProxies,
-//				     new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
-				     new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
-	m_bInitialized(false),
-    m_numBodies(0)
-{
-	_initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, 
-				maxSmallProxies, maxLargeProxies, maxPairsPerBody,
-				maxBodiesPerCell, cellFactorAABB);
-}
-
-
-
-btGpu3DGridBroadphase::btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
-										const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-										int gridSizeX, int gridSizeY, int gridSizeZ, 
-										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-										int maxBodiesPerCell,
-										btScalar cellFactorAABB) :
-	btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
-	m_bInitialized(false),
-    m_numBodies(0)
-{
-	_initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, 
-				maxSmallProxies, maxLargeProxies, maxPairsPerBody,
-				maxBodiesPerCell, cellFactorAABB);
-}
-
-
-
-btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
-{
-	//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
-	assert(m_bInitialized);
-	_finalize();
-}
-
-
-
-void btGpu3DGridBroadphase::_initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-										int gridSizeX, int gridSizeY, int gridSizeZ, 
-										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-										int maxBodiesPerCell,
-										btScalar cellFactorAABB)
-{
-	// set various paramerers
-	m_ownsPairCache = true;
-	m_params.m_gridSizeX = gridSizeX;
-	m_params.m_gridSizeY = gridSizeY;
-	m_params.m_gridSizeZ = gridSizeZ;
-	m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
-	btVector3 w_org = worldAabbMin;
-	m_params.m_worldOriginX = w_org.getX();
-	m_params.m_worldOriginY = w_org.getY();
-	m_params.m_worldOriginZ = w_org.getZ();
-	btVector3 w_size = worldAabbMax - worldAabbMin;
-	m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX;
-	m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY;
-	m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ;
-	m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ);
-	m_maxRadius *= btScalar(0.5f);
-	m_params.m_numBodies = m_numBodies;
-	m_params.m_maxBodiesPerCell = maxBodiesPerCell;
-
-	m_numLargeHandles = 0;						
-	m_maxLargeHandles = maxLargeProxies;
-
-	m_maxPairsPerBody = maxPairsPerBody;
-
-	m_cellFactorAABB = cellFactorAABB;
-
-	m_LastLargeHandleIndex = -1;
-
-    assert(!m_bInitialized);
-    // allocate host storage
-    m_hBodiesHash = new unsigned int[m_maxHandles * 2];
-    memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
-
-    m_hCellStart = new unsigned int[m_params.m_numCells];
-    memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
-
-	m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
-	// --------------- for now, init with m_maxPairsPerBody for each body
-	m_hPairBuffStartCurr[0] = 0;
-	m_hPairBuffStartCurr[1] = 0;
-	for(int i = 1; i <= m_maxHandles; i++) 
-	{
-		m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
-		m_hPairBuffStartCurr[i * 2 + 1] = 0;
-	}
-	//----------------
-	unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
-	m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
-
-	m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
-	memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
-
-	m_hPairScan = new unsigned int[m_maxHandles + 1];
-
-	m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody];
-
-// large proxies
-
-	// allocate handles buffer and put all handles on free list
-	m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
-	m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
-	m_firstFreeLargeHandle = 0;
-	{
-		for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
-		{
-			m_pLargeHandles[i].SetNextFree(i + 1);
-			m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
-		}
-		m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
-	}
-
-// debug data
-	m_numPairsAdded = 0;
-	m_numOverflows = 0;
-
-    m_bInitialized = true;
-}
-
-
-
-void btGpu3DGridBroadphase::_finalize()
-{
-    assert(m_bInitialized);
-    delete [] m_hBodiesHash;
-    delete [] m_hCellStart;
-    delete [] m_hPairBuffStartCurr;
-    delete [] m_hAABB;
-	delete [] m_hPairBuff;
-	delete [] m_hPairScan;
-	delete [] m_hPairOut;
-	btAlignedFree(m_pLargeHandlesRawPtr);
-	m_bInitialized = false;
-}
-
-
-
-void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
-{
-	if(m_numHandles <= 0)
-	{
-		BT_PROFILE("addLarge2LargePairsToCache");
-		addLarge2LargePairsToCache(dispatcher);
-		return;
-	}
-	// update constants
-	setParameters(&m_params);
-	// prepare AABB array
-	prepareAABB();
-	// calculate hash
-	calcHashAABB();
-	// sort bodies based on hash
-	sortHash();
-	// find start of each cell
-	findCellStart();
-	// findOverlappingPairs (small/small)
-	findOverlappingPairs();
-	// findOverlappingPairs (small/large)
-	findPairsLarge();
-	// add pairs to CPU cache
-	computePairCacheChanges();
-	scanOverlappingPairBuff();
-	squeezeOverlappingPairBuff();
-	addPairsToCache(dispatcher);
-	// find and add large/large pairs to CPU cache
-	addLarge2LargePairsToCache(dispatcher);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
-{
-	m_numPairsAdded = 0;
-	m_numPairsRemoved = 0;
-	for(int i = 0; i < m_numHandles; i++) 
-	{
-		unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
-		if(!num)
-		{
-			continue;
-		}
-		unsigned int* pInp = m_hPairOut + m_hPairScan[i];
-		unsigned int index0 = m_hAABB[i * 2].uw;
-		btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
-		for(unsigned int j = 0; j < num; j++)
-		{
-			unsigned int indx1_s = pInp[j];
-			unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
-			btSimpleBroadphaseProxy* proxy1;
-			if(index1 < (unsigned int)m_maxHandles)
-			{
-				proxy1 = &m_pHandles[index1];
-			}
-			else
-			{
-				index1 -= m_maxHandles;
-				btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
-				proxy1 = &m_pLargeHandles[index1];
-			}
-			if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
-			{
-				m_pairCache->addOverlappingPair(proxy0,proxy1);
-				m_numPairsAdded++;
-			}
-			else
-			{
-				m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
-				m_numPairsRemoved++;
-			}
-		}
-	}
-}
-
-
-
-btBroadphaseProxy* btGpu3DGridBroadphase::createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
-{
-	btBroadphaseProxy*  proxy;
-	bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
-	if(bIsLarge)
-	{
-		if (m_numLargeHandles >= m_maxLargeHandles)
-		{
-			///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
-			btAssert(0);
-			return 0; //should never happen, but don't let the game crash ;-)
-		}
-		btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
-		int newHandleIndex = allocLargeHandle();
-		proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
-	}
-	else
-	{
-		proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
-	}
-	return proxy;
-}
-
-
-
-void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
-{
-	bool bIsLarge = isLargeProxy(proxy);
-	if(bIsLarge)
-	{
-		
-		btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
-		freeLargeHandle(proxy0);
-		m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
-	}
-	else
-	{
-		btSimpleBroadphase::destroyProxy(proxy, dispatcher);
-	}
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
-{
-	m_hPairBuffStartCurr[0] = 0;
-	m_hPairBuffStartCurr[1] = 0;
-	for(int i = 1; i <= m_maxHandles; i++) 
-	{
-		m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
-		m_hPairBuffStartCurr[i * 2 + 1] = 0;
-	}
-}
-
-
-
-bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax)
-{
-	btVector3 diag = aabbMax - aabbMin;
-	
-	///use the bounding sphere radius of this bounding box, to include rotation
-	btScalar radius = diag.length() * btScalar(0.5f);
-	radius *= m_cellFactorAABB; // user-defined factor
-
-	return (radius > m_maxRadius);
-}
-
-
-
-bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
-{
-	return (proxy->getUid() >= (m_maxHandles+2));
-}
-
-
-
-void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
-{
-	int i,j;
-	if (m_numLargeHandles <= 0)
-	{
-		return;
-	}
-	int new_largest_index = -1;
-	for(i = 0; i <= m_LastLargeHandleIndex; i++)
-	{
-		btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
-		if(!proxy0->m_clientObject)
-		{
-			continue;
-		}
-		new_largest_index = i;
-		for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
-		{
-			btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
-			if(!proxy1->m_clientObject)
-			{
-				continue;
-			}
-			btAssert(proxy0 != proxy1);
-			btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
-			btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
-			if(aabbOverlap(p0,p1))
-			{
-				if (!m_pairCache->findPair(proxy0,proxy1))
-				{
-					m_pairCache->addOverlappingPair(proxy0,proxy1);
-				}
-			} 
-			else
-			{
-				if(m_pairCache->findPair(proxy0,proxy1))
-				{
-					m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
-				}
-			}
-		}
-	}
-	m_LastLargeHandleIndex = new_largest_index;
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
-{
-	btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
-	for (int i=0; i <= m_LastLargeHandleIndex; i++)
-	{
-		btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
-		if(!proxy->m_clientObject)
-		{
-			continue;
-		}
-		rayCallback.process(proxy);
-	}
-}
-
-
-
-//
-// overrides for CPU version
-//
-
-
-
-void btGpu3DGridBroadphase::prepareAABB()
-{
-	BT_PROFILE("prepareAABB");
-	bt3DGrid3F1U* pBB = m_hAABB;
-	int i;
-	int new_largest_index = -1;
-	unsigned int num_small = 0;
-	for(i = 0; i <= m_LastHandleIndex; i++) 
-	{
-		btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
-		if(!proxy0->m_clientObject)
-		{
-			continue;
-		}
-		new_largest_index = i;
-		pBB->fx = proxy0->m_aabbMin.getX();
-		pBB->fy = proxy0->m_aabbMin.getY();
-		pBB->fz = proxy0->m_aabbMin.getZ();
-		pBB->uw = i;
-		pBB++;
-		pBB->fx = proxy0->m_aabbMax.getX();
-		pBB->fy = proxy0->m_aabbMax.getY();
-		pBB->fz = proxy0->m_aabbMax.getZ();
-		pBB->uw = num_small;
-		pBB++;
-		num_small++;
-	}
-	m_LastHandleIndex = new_largest_index;
-	new_largest_index = -1;
-	unsigned int num_large = 0;
-	for(i = 0; i <= m_LastLargeHandleIndex; i++) 
-	{
-		btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
-		if(!proxy0->m_clientObject)
-		{
-			continue;
-		}
-		new_largest_index = i;
-		pBB->fx = proxy0->m_aabbMin.getX();
-		pBB->fy = proxy0->m_aabbMin.getY();
-		pBB->fz = proxy0->m_aabbMin.getZ();
-		pBB->uw = i + m_maxHandles;
-		pBB++;
-		pBB->fx = proxy0->m_aabbMax.getX();
-		pBB->fy = proxy0->m_aabbMax.getY();
-		pBB->fz = proxy0->m_aabbMax.getZ();
-		pBB->uw = num_large + m_maxHandles;
-		pBB++;
-		num_large++;
-	}
-	m_LastLargeHandleIndex = new_largest_index;
-	// paranoid checks
-	btAssert(num_small == m_numHandles);
-	btAssert(num_large == m_numLargeHandles);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
-{
-	s3DGridBroadphaseParams = *hostParams;
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::calcHashAABB()
-{
-	BT_PROFILE("bt3DGrid_calcHashAABB");
-	btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::sortHash()
-{
-	class bt3DGridHashKey
-	{
-	public:
-	   unsigned int hash;
-	   unsigned int index;
-	   void quickSort(bt3DGridHashKey* pData, int lo, int hi)
-	   {
-			int i=lo, j=hi;
-			bt3DGridHashKey x = pData[(lo+hi)/2];
-			do
-			{    
-				while(pData[i].hash > x.hash) i++; 
-				while(x.hash > pData[j].hash) j--;
-				if(i <= j)
-				{
-					bt3DGridHashKey t = pData[i];
-					pData[i] = pData[j];
-					pData[j] = t;
-					i++; j--;
-				}
-			} while(i <= j);
-			if(lo < j) pData->quickSort(pData, lo, j);
-			if(i < hi) pData->quickSort(pData, i, hi);
-	   }
-	};
-	BT_PROFILE("bt3DGrid_sortHash");
-	bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
-	pHash->quickSort(pHash, 0, m_numHandles - 1);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::findCellStart()
-{
-	BT_PROFILE("bt3DGrid_findCellStart");
-	btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::findOverlappingPairs()
-{
-	BT_PROFILE("bt3DGrid_findOverlappingPairs");
-	btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::findPairsLarge()
-{
-	BT_PROFILE("bt3DGrid_findPairsLarge");
-	btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr,	m_numHandles, m_numLargeHandles);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::computePairCacheChanges()
-{
-	BT_PROFILE("bt3DGrid_computePairCacheChanges");
-	btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles);
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::scanOverlappingPairBuff()
-{
-	BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
-	m_hPairScan[0] = 0;
-	for(int i = 1; i <= m_numHandles; i++) 
-	{
-		unsigned int delta = m_hPairScan[i];
-		m_hPairScan[i] = m_hPairScan[i-1] + delta;
-	}
-	return;
-}
-
-
-
-void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
-{
-	BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
-	btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles);
-	return;
-}
-
-
-
-#include "btGpu3DGridBroadphaseSharedCode.h"
-
-
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h
deleted file mode 100644
index 1d49a0557ae..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphase.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASE_H
-#define BTGPU3DGRIDBROADPHASE_H
-
-//----------------------------------------------------------------------------------------
-
-#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
-
-#include "btGpu3DGridBroadphaseSharedTypes.h"
-
-//----------------------------------------------------------------------------------------
-
-///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
-
-class btGpu3DGridBroadphase : public btSimpleBroadphase
-{
-protected:
-	bool			m_bInitialized;
-    unsigned int	m_numBodies;
-    unsigned int	m_numCells;
-	unsigned int	m_maxPairsPerBody;
-	btScalar		m_cellFactorAABB;
-    unsigned int	m_maxBodiesPerCell;
-	bt3DGridBroadphaseParams m_params;
-	btScalar		m_maxRadius;
-	// CPU data
-    unsigned int*	m_hBodiesHash;
-    unsigned int*	m_hCellStart;
-	unsigned int*	m_hPairBuffStartCurr;
-	bt3DGrid3F1U*		m_hAABB;
-	unsigned int*	m_hPairBuff;
-	unsigned int*	m_hPairScan;
-	unsigned int*	m_hPairOut;
-// large proxies
-	int		m_numLargeHandles;						
-	int		m_maxLargeHandles;						
-	int		m_LastLargeHandleIndex;							
-	btSimpleBroadphaseProxy* m_pLargeHandles;
-	void* m_pLargeHandlesRawPtr;
-	int		m_firstFreeLargeHandle;
-	int allocLargeHandle()
-	{
-		btAssert(m_numLargeHandles < m_maxLargeHandles);
-		int freeLargeHandle = m_firstFreeLargeHandle;
-		m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
-		m_numLargeHandles++;
-		if(freeLargeHandle > m_LastLargeHandleIndex)
-		{
-			m_LastLargeHandleIndex = freeLargeHandle;
-		}
-		return freeLargeHandle;
-	}
-	void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
-	{
-		int handle = int(proxy - m_pLargeHandles);
-		btAssert((handle >= 0) && (handle < m_maxHandles));
-		if(handle == m_LastLargeHandleIndex)
-		{
-			m_LastLargeHandleIndex--;
-		}
-		proxy->SetNextFree(m_firstFreeLargeHandle);
-		m_firstFreeLargeHandle = handle;
-		proxy->m_clientObject = 0;
-		m_numLargeHandles--;
-	}
-	bool isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax);
-	bool isLargeProxy(btBroadphaseProxy* proxy);
-// debug
-	unsigned int	m_numPairsAdded;
-	unsigned int	m_numPairsRemoved;
-	unsigned int	m_numOverflows;
-// 
-public:
-	btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-					   int gridSizeX, int gridSizeY, int gridSizeZ, 
-					   int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-					   int maxBodiesPerCell = 8,
-					   btScalar cellFactorAABB = btScalar(1.0f));
-	btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
-						const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-						int gridSizeX, int gridSizeY, int gridSizeZ, 
-						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-						int maxBodiesPerCell = 8,
-						btScalar cellFactorAABB = btScalar(1.0f));
-	virtual ~btGpu3DGridBroadphase();
-	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
-
-	virtual btBroadphaseProxy*	createProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
-	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
-	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
-	virtual void	resetPool(btDispatcher* dispatcher);
-
-protected:
-	void _initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-						int gridSizeX, int gridSizeY, int gridSizeZ, 
-						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-						int maxBodiesPerCell = 8,
-						btScalar cellFactorAABB = btScalar(1.0f));
-	void _finalize();
-	void addPairsToCache(btDispatcher* dispatcher);
-	void addLarge2LargePairsToCache(btDispatcher* dispatcher);
-
-// overrides for CPU version
-	virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
-	virtual void prepareAABB();
-	virtual void calcHashAABB();
-	virtual void sortHash();	
-	virtual void findCellStart();
-	virtual void findOverlappingPairs();
-	virtual void findPairsLarge();
-	virtual void computePairCacheChanges();
-	virtual void scanOverlappingPairBuff();
-	virtual void squeezeOverlappingPairBuff();
-};
-
-//----------------------------------------------------------------------------------------
-
-#endif //BTGPU3DGRIDBROADPHASE_H
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
deleted file mode 100644
index e0afb87bb82..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//               K E R N E L    F U N C T I O N S 
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-
-// calculate position in uniform grid
-BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
-{
-    int3 gridPos;
-    gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
-    gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
-    gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
-    return gridPos;
-} // bt3DGrid_calcGridPos()
-
-//----------------------------------------------------------------------------------------
-
-// calculate address in grid from position (clamping to edges)
-BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
-{
-    gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
-    gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
-    gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
-    return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
-} // bt3DGrid_calcGridHash()
-
-//----------------------------------------------------------------------------------------
-
-// calculate grid hash value for each body using its AABB
-BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index*2];
-	bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
-	float4 pos;
-	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
-	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
-	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
-    // get address in grid
-    int3 gridPos = bt3DGrid_calcGridPos(pos);
-    uint gridHash = bt3DGrid_calcGridHash(gridPos);
-    // store grid hash and body index
-    pHash[index] = BT_GPU_make_uint2(gridHash, index);
-} // calcHashAABBD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	// Load hash data into shared memory so that we can look 
-	// at neighboring body's hash value without loading
-	// two hash values per thread
-	BT_GPU___shared__ uint sharedHash[257];
-	sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
-	if((index > 0) && (BT_GPU_threadIdx.x == 0))
-	{
-		// first thread in block must load neighbor body hash
-		volatile uint2 prevData = pHash[index-1];
-		sharedHash[0] = prevData.x;
-	}
-	BT_GPU___syncthreads();
-	if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
-	{
-		cellStart[sortedData.x] = index;
-	}
-} // findCellStartD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
-{
-	return	(min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && 
-			(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && 
-			(min0.fz <= max1.fz)&& (min1.fz <= max0.fz); 
-} // cudaTestAABBOverlap()
- 
-//----------------------------------------------------------------------------------------
-
-BT_GPU___device__ void findPairsInCell(	int3	gridPos,
-										uint    index,
-										uint2*  pHash,
-										uint*   pCellStart,
-										bt3DGrid3F1U* pAABB, 
-										uint*   pPairBuff,
-										uint2*	pPairBuffStartCurr,
-										uint	numBodies)
-{
-    if (	(gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
-		||	(gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
-		||  (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) 
-    {
-		return;
-	}
-    uint gridHash = bt3DGrid_calcGridHash(gridPos);
-    // get start of bucket for this cell
-    uint bucketStart = pCellStart[gridHash];
-    if (bucketStart == 0xffffffff)
-	{
-        return;   // cell empty
-	}
-	// iterate over bodies in this cell
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-    bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); 
-	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	uint handleIndex =  min0.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
-	uint curr_max = start_curr_next.x - start - 1;
-	uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
-	bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
-	for(uint index2 = bucketStart; index2 < bucketEnd; index2++) 
-	{
-        uint2 cellData = pHash[index2];
-        if (cellData.x != gridHash)
-        {
-			break;   // no longer in same bucket
-		}
-		uint unsorted_indx2 = cellData.y;
-        if (unsorted_indx2 < unsorted_indx) // check not colliding with self
-        {   
-			bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
-			bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
-			if(cudaTestAABBOverlap(min0, max0, min1, max1))
-			{
-				uint handleIndex2 = min1.uw;
-				uint k;
-				for(k = 0; k < curr; k++)
-				{
-					uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
-					if(old_pair == handleIndex2)
-					{
-						pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
-						break;
-					}
-				}
-				if(k == curr)
-				{
-					if(curr >= curr_max) 
-					{ // not a good solution, but let's avoid crash
-						break;
-					}
-					pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
-					curr++;
-				}
-			}
-		}
-	}
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
-    return;
-} // findPairsInCell()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findOverlappingPairsD(	bt3DGrid3F1U*	pAABB, uint2* pHash, uint* pCellStart, 
-												uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-	bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
-	bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	float4 pos;
-	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
-	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
-	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
-    // get address in grid
-    int3 gridPos = bt3DGrid_calcGridPos(pos);
-    // examine only neighbouring cells
-    for(int z=-1; z<=1; z++) {
-        for(int y=-1; y<=1; y++) {
-            for(int x=-1; x<=1; x++) {
-                findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
-            }
-        }
-    }
-} // findOverlappingPairsD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findPairsLargeD(	bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, 
-										uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-	bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
-	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	uint handleIndex =  min0.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
-	uint curr_max = start_curr_next.x - start - 1;
-    for(uint i = 0; i < numLarge; i++)
-    {
-		uint indx2 = numBodies + i;
-		bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
-		bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
-		if(cudaTestAABBOverlap(min0, max0, min1, max1))
-		{
-			uint k;
-			uint handleIndex2 =  min1.uw;
-			for(k = 0; k < curr; k++)
-			{
-				uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
-				if(old_pair == handleIndex2)
-				{
-					pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
-					break;
-				}
-			}
-			if(k == curr)
-			{
-				pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
-				if(curr >= curr_max) 
-				{ // not a good solution, but let's avoid crash
-					break;
-				}
-				curr++;
-			}
-		}
-    }
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
-    return;
-} // findPairsLargeD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, 
-												uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index * 2];
-	uint handleIndex = bbMin.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint *pInp = pPairBuff + start;
-	uint num_changes = 0;
-	for(uint k = 0; k < curr; k++, pInp++)
-	{
-		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
-		{
-			num_changes++;
-		}
-	}
-	pPairScan[index+1] = num_changes;
-} // computePairCacheChangesD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
-												   uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index * 2];
-	uint handleIndex = bbMin.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint* pInp = pPairBuff + start;
-	uint* pOut = pPairOut + pPairScan[index];
-	uint* pOut2 = pInp;
-	uint num = 0; 
-	for(uint k = 0; k < curr; k++, pInp++)
-	{
-		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
-		{
-			*pOut = *pInp;
-			pOut++;
-		}
-		if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
-		{
-			*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
-			pOut2++;
-			num++;
-		}
-	}
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
-} // squeezeOverlappingPairBuffD()
-
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//               E N D   O F    K E R N E L    F U N C T I O N S 
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies)
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    // execute the kernel
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
-    // check if kernel invocation generated an error
-    BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
-} // calcHashAABB()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-	BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
-	BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
-} // findCellStart()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies))
-{
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
-#endif
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
-#endif
-} // findOverlappingPairs()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
-{
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
-#endif
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
-#endif
-} // findPairsLarge()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
-} // computePairCacheChanges()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
-} // btCuda_squeezeOverlappingPairBuff()
-
-//------------------------------------------------------------------------------------------------
-
-} // extern "C"
-
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
deleted file mode 100644
index 607bda7edfd..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared definitions for GPU-based 3D Grid collision detection broadphase
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-
-//----------------------------------------------------------------------------------------
-
-#include "btGpu3DGridBroadphaseSharedTypes.h"
-
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies);
-
-void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
-
-void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies);
-
-void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
-
-void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
-
-void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
-
-
-//----------------------------------------------------------------------------------------
-
-} // extern "C"
-
-//----------------------------------------------------------------------------------------
-
-#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h b/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
deleted file mode 100644
index 616a40094ca..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared definitions for GPU-based 3D Grid collision detection broadphase
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-
-//----------------------------------------------------------------------------------------
-
-#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
-#define BT_3DGRID_PAIR_NEW_FLG   (0x20000000)
-#define BT_3DGRID_PAIR_ANY_FLG   (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
-
-//----------------------------------------------------------------------------------------
-
-struct bt3DGridBroadphaseParams 
-{
-	unsigned int	m_gridSizeX;
-	unsigned int	m_gridSizeY;
-	unsigned int	m_gridSizeZ;
-	unsigned int	m_numCells;
-	float			m_worldOriginX;
-	float			m_worldOriginY;
-	float			m_worldOriginZ;
-	float			m_cellSizeX;
-	float			m_cellSizeY;
-	float			m_cellSizeZ;
-	unsigned int	m_numBodies;
-	unsigned int	m_maxBodiesPerCell;
-};
-
-//----------------------------------------------------------------------------------------
-
-struct bt3DGrid3F1U
-{
-	float			fx;
-	float			fy;
-	float			fz;
-	unsigned int	uw;
-};
-
-//----------------------------------------------------------------------------------------
-
-#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuDefines.h b/extern/bullet2/BulletMultiThreaded/btGpuDefines.h
deleted file mode 100644
index f9315ab6496..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpuDefines.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-
-// definitions for "GPU on CPU" code
-
-
-#ifndef BT_GPU_DEFINES_H
-#define BT_GPU_DEFINES_H
-
-typedef unsigned int uint;
-
-struct int2
-{
-	int x, y;
-};
-
-struct uint2
-{
-	unsigned int x, y;
-};
-
-struct int3
-{
-	int x, y, z;
-};
-
-struct uint3
-{
-	unsigned int x, y, z;
-};
-
-struct float4
-{
-	float x, y, z, w;
-};
-
-struct float3
-{
-	float x, y, z;
-};
-
-
-#define BT_GPU___device__ inline
-#define BT_GPU___devdata__
-#define BT_GPU___constant__
-#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
-#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
-#define BT_GPU_params s3DGridBroadphaseParams
-#define BT_GPU___mul24(a, b) ((a)*(b))
-#define BT_GPU___global__ inline
-#define BT_GPU___shared__ static
-#define BT_GPU___syncthreads()
-#define CUDART_PI_F SIMD_PI
-
-static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
-{
-  uint2 t; t.x = x; t.y = y; return t;
-}
-#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
-
-static inline int3 bt3dGrid_make_int3(int x, int y, int z)
-{
-  int3 t; t.x = x; t.y = y; t.z = z; return t;
-}
-#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
-
-static inline float3 bt3dGrid_make_float3(float x, float y, float z)
-{
-  float3 t; t.x = x; t.y = y; t.z = z; return t;
-}
-#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
-
-static inline float3 bt3dGrid_make_float34(float4 f)
-{
-  float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
-}
-#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
-
-static inline float3 bt3dGrid_make_float31(float f)
-{
-  float3 t; t.x = t.y = t.z = f; return t;
-}
-#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
-
-static inline float4 bt3dGrid_make_float42(float3 v, float f)
-{
-  float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
-}
-#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b) 
-
-static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
-{
-  float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
-}
-#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d) 
-
-inline int3 operator+(int3 a, int3 b)
-{
-    return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-
-inline float4 operator+(const float4& a, const float4& b)
-{
-	float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
-}
-inline float4 operator*(const float4& a, float fact)
-{
-	float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
-}
-inline float4 operator*(float fact, float4& a)
-{
-	return (a * fact);
-}
-inline float4& operator*=(float4& a, float fact)
-{
-	a = fact * a;
-	return a;
-}
-inline float4& operator+=(float4& a, const float4& b)
-{
-	a = a + b;
-	return a;
-}
-
-inline float3 operator+(const float3& a, const float3& b)
-{
-	float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
-}
-inline float3 operator-(const float3& a, const float3& b)
-{
-	float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
-}
-static inline float bt3dGrid_dot(float3& a, float3& b)
-{
-	return a.x*b.x+a.y*b.y+a.z*b.z;
-}
-#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
-
-static inline float bt3dGrid_dot4(float4& a, float4& b)
-{
-	return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
-}
-#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
-
-static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
-{
-	float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x;	return r;
-}
-#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
-
-
-inline float3 operator*(const float3& a, float fact)
-{
-	float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
-}
-
-
-inline float3& operator+=(float3& a, const float3& b)
-{
-	a = a + b;
-	return a;
-}
-inline float3& operator-=(float3& a, const float3& b)
-{
-	a = a - b;
-	return a;
-}
-inline float3& operator*=(float3& a, float fact)
-{
-	a = a * fact;
-	return a;
-}
-inline float3 operator-(const float3& v)
-{
-	float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
-}
-
-
-#define BT_GPU_FETCH(a, b) a[b]
-#define BT_GPU_FETCH4(a, b) a[b]
-#define BT_GPU_PREF(func) btGpu_##func
-#define BT_GPU_SAFE_CALL(func) func
-#define BT_GPU_Memset memset
-#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
-#define BT_GPU_BindTexture(a, b, c, d)
-#define BT_GPU_UnbindTexture(a)
-
-static uint2 s_blockIdx, s_blockDim, s_threadIdx;
-#define BT_GPU_blockIdx s_blockIdx
-#define BT_GPU_blockDim s_blockDim
-#define BT_GPU_threadIdx s_threadIdx
-#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
-
-#define BT_GPU_CHECK_ERROR(s)
-
-
-#endif //BT_GPU_DEFINES_H
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h
deleted file mode 100644
index 5761e7901ee..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedCode.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared code for GPU-based utilities
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  will be compiled by both CPU and CUDA compilers
-//	file with definitions of BT_GPU_xxx should be included first
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#include "btGpuUtilsSharedDefs.h"
-
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-//Round a / b to nearest higher integer value
-int BT_GPU_PREF(iDivUp)(int a, int b)
-{
-    return (a % b != 0) ? (a / b + 1) : (a / b);
-} // iDivUp()
-
-//----------------------------------------------------------------------------------------
-
-// compute grid and thread block size for a given number of elements
-void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
-{
-    numThreads = BT_GPU_min(blockSize, n);
-    numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
-} // computeGridSize()
-
-//----------------------------------------------------------------------------------------
-
-} // extern "C"
-
diff --git a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h b/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h
deleted file mode 100644
index dccfda54cbc..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btGpuUtilsSharedDefs.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-// Shared definitions for GPU-based utilities
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//	file with definitions of BT_GPU_xxx should be included first
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-#ifndef BTGPUUTILSDHAREDDEFS_H
-#define BTGPUUTILSDHAREDDEFS_H
-
-
-extern "C"
-{
-
-
-//Round a / b to nearest higher integer value
-int BT_GPU_PREF(iDivUp)(int a, int b);
-
-// compute grid and thread block size for a given number of elements
-void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
-
-void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
-void BT_GPU_PREF(freeArray)(void* devPtr);
-void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
-void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
-void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
-void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
-void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
-
-
-} // extern "C"
-
-
-#endif // BTGPUUTILSDHAREDDEFS_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp
deleted file mode 100644
index 84774b22706..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-   Copyright (C) 2010 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#include "btParallelConstraintSolver.h"
-#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
-
-btParallelConstraintSolver::btParallelConstraintSolver()
-{
-
-	//initialize MiniCL here
-
-}
-	
-btParallelConstraintSolver::~btParallelConstraintSolver()
-{
-	//exit MiniCL
-
-}
-
-	
-btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
-{
-	{
-			int i;
-			btPersistentManifold* manifold = 0;
-//			btCollisionObject* colObj0=0,*colObj1=0;
-
-
-			for (i=0;i<numManifolds;i++)
-			{
-				manifold = manifoldPtr[i];
-				convertContact(manifold,infoGlobal);
-			}
-		
-	}
-
-	btContactSolverInfo info = infoGlobal;
-
-
-
-	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
-	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
-
-	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
-	m_orderTmpConstraintPool.resize(numConstraintPool);
-	m_orderFrictionConstraintPool.resize(numFrictionPool);
-	{
-		int i;
-		for (i=0;i<numConstraintPool;i++)
-		{
-			m_orderTmpConstraintPool[i] = i;
-		}
-		for (i=0;i<numFrictionPool;i++)
-		{
-			m_orderFrictionConstraintPool[i] = i;
-		}
-	}
-
-	return 0.f;
-}
-
diff --git a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h b/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h
deleted file mode 100644
index c347f96f5a0..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btParallelConstraintSolver.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-   Copyright (C) 2010 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
-#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
-
-#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
-									      
-class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
-{
-protected:
-
-public:
-
-	btParallelConstraintSolver();
-	
-	virtual ~btParallelConstraintSolver();
-
-	//virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
-	
-	btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
-
-
-
-};
-
-
-
-#endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H
-\ No newline at end of file
diff --git a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp
deleted file mode 100644
index 8192aa4684a..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "btThreadSupportInterface.h"
-
-btThreadSupportInterface::~btThreadSupportInterface()
-{
-
-}
-
diff --git a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h b/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h
deleted file mode 100644
index 730ffa9ea0b..00000000000
--- a/extern/bullet2/BulletMultiThreaded/btThreadSupportInterface.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef THREAD_SUPPORT_INTERFACE_H
-#define THREAD_SUPPORT_INTERFACE_H
-
-
-//#include <LinearMath/btScalar.h> //for uint32_t etc.
-#include "PlatformDefinitions.h"
-#include "PpuAddressSpace.h"
-
-class btThreadSupportInterface
-{
-public:
-
-	virtual ~btThreadSupportInterface();
-
-///send messages to SPUs
-	virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1) =0;
-
-///check for messages from SPUs
-	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0;
-
-///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
-	virtual	void startSPU() =0;
-
-///tell the task scheduler we are done with the SPU tasks
-	virtual	void stopSPU()=0;
-
-	///tell the task scheduler to use no more than numTasks tasks
-	virtual void	setNumTasks(int numTasks)=0;
-
-	virtual int		getNumTasks() const = 0;
-
-};
-
-#endif //THREAD_SUPPORT_INTERFACE_H
-
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
deleted file mode 100644
index c5eeeebd7a1..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <math.h>
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-private:
-    unsigned int mData;
-
-public:
-    // Default constructor; does no initialization
-    //
-    inline boolInVec( ) { };
-
-    // Construct from a value converted from float
-    //
-    inline boolInVec(floatInVec vec);
-
-    // Explicit cast from bool
-    //
-    explicit inline boolInVec(bool scalar);
-
-    // Explicit cast to bool
-    //
-    inline bool getAsBool() const;
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-    // Implicit cast to bool
-    //
-    inline operator bool() const;
-#endif
-
-    // Boolean negation operator
-    //
-    inline const boolInVec operator ! () const;
-
-    // Assignment operator
-    //
-    inline boolInVec& operator = (boolInVec vec);
-
-    // Boolean and assignment operator
-    //
-    inline boolInVec& operator &= (boolInVec vec);
-
-    // Boolean exclusive or assignment operator
-    //
-    inline boolInVec& operator ^= (boolInVec vec);
-
-    // Boolean or assignment operator
-    //
-    inline boolInVec& operator |= (boolInVec vec);
-
-};
-
-// Equal operator
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-
-// Not equal operator
-//
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-
-// And operator
-//
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-
-// Exclusive or operator
-//
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-
-// Or operator
-//
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// Conditionally select between two values
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-
-} // namespace Vectormath
-
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-    mData = -(int)scalar;
-}
-
-inline
-bool
-boolInVec::getAsBool() const
-{
-    return (mData > 0);
-}
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-inline
-boolInVec::operator bool() const
-{
-    return getAsBool();
-}
-#endif
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(!mData);
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
-}
-
-} // namespace Vectormath
-
-#endif // boolInVec_h
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
deleted file mode 100644
index 12d89e43d3e..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-// A class representing a scalar float value contained in a vector register
-// This class does not support fastmath
-class floatInVec
-{
-private:
-    float mData;
-
-public:
-    // Default constructor; does no initialization
-    //
-    inline floatInVec( ) { };
-
-    // Construct from a value converted from bool
-    //
-    inline floatInVec(boolInVec vec);
-
-    // Explicit cast from float
-    //
-    explicit inline floatInVec(float scalar);
-
-    // Explicit cast to float
-    //
-    inline float getAsFloat() const;
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-    // Implicit cast to float
-    //
-    inline operator float() const;
-#endif
-
-    // Post increment (add 1.0f)
-    //
-    inline const floatInVec operator ++ (int);
-
-    // Post decrement (subtract 1.0f)
-    //
-    inline const floatInVec operator -- (int);
-
-    // Pre increment (add 1.0f)
-    //
-    inline floatInVec& operator ++ ();
-
-    // Pre decrement (subtract 1.0f)
-    //
-    inline floatInVec& operator -- ();
-
-    // Negation operator
-    //
-    inline const floatInVec operator - () const;
-
-    // Assignment operator
-    //
-    inline floatInVec& operator = (floatInVec vec);
-
-    // Multiplication assignment operator
-    //
-    inline floatInVec& operator *= (floatInVec vec);
-
-    // Division assignment operator
-    //
-    inline floatInVec& operator /= (floatInVec vec);
-
-    // Addition assignment operator
-    //
-    inline floatInVec& operator += (floatInVec vec);
-
-    // Subtraction assignment operator
-    //
-    inline floatInVec& operator -= (floatInVec vec);
-
-};
-
-// Multiplication operator
-//
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-
-// Division operator
-//
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-
-// Addition operator
-//
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-
-// Subtraction operator
-//
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-
-// Less than operator
-//
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-
-// Less than or equal operator
-//
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-
-// Greater than operator
-//
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-
-// Greater than or equal operator
-//
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-
-// Equal operator
-//
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-
-// Not equal operator
-//
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// Conditionally select between two values
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-
-} // namespace Vectormath
-
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = float(vec.getAsBool());
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-    mData = scalar;
-}
-
-inline
-float
-floatInVec::getAsFloat() const
-{
-    return mData;
-}
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-inline
-floatInVec::operator float() const
-{
-    return getAsFloat();
-}
-#endif
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    float olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    float olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec(1.0f);
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec(1.0f);
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec(-mData);
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(num.getAsFloat() / den.getAsFloat());
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
deleted file mode 100644
index e103243d1e0..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
+++ /dev/null
@@ -1,1630 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = ( qx + qx );
-    qy2 = ( qy + qy );
-    qz2 = ( qz + qz );
-    qxqx2 = ( qx * qx2 );
-    qxqy2 = ( qx * qy2 );
-    qxqz2 = ( qx * qz2 );
-    qxqw2 = ( qw * qx2 );
-    qyqy2 = ( qy * qy2 );
-    qyqz2 = ( qy * qz2 );
-    qyqw2 = ( qw * qy2 );
-    qzqz2 = ( qz * qz2 );
-    qzqw2 = ( qw * qz2 );
-    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
-    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
-    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    float detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
-        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
-        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
-    );
-}
-
-inline float determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix3(
-        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
-        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
-        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
-    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
-    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
-    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
-    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
-    res1.setX( ( mI * tmp1 ) );
-    res1.setY( ( mM * tmp0 ) );
-    res1.setZ( ( mA * tmp1 ) );
-    res1.setW( ( mE * tmp0 ) );
-    res3.setX( ( mI * tmp3 ) );
-    res3.setY( ( mM * tmp2 ) );
-    res3.setZ( ( mA * tmp3 ) );
-    res3.setW( ( mE * tmp2 ) );
-    res2.setX( ( mI * tmp5 ) );
-    res2.setY( ( mM * tmp4 ) );
-    res2.setZ( ( mA * tmp5 ) );
-    res2.setW( ( mE * tmp4 ) );
-    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
-    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
-    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
-    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
-    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
-    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
-    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
-    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
-    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
-    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
-    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
-    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
-    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
-    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
-    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
-    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
-    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
-    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline float determinant( const Matrix4 & mat )
-{
-    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
-    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
-    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
-    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
-    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
-        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
-        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
-        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
-        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
-        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( 0.0f, c, s, 0.0f ),
-        Vector4( 0.0f, -s, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, 0.0f, -s, 0.0f ),
-        Vector4::yAxis( ),
-        Vector4( s, 0.0f, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, s, 0.0f, 0.0f ),
-        Vector4( -s, c, 0.0f, 0.0f ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix4(
-        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
-        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
-        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix4(
-        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
-        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
-        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
-    rangeInv = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, f, 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    n2 = ( zNear + zNear );
-    return Matrix4(
-        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
-        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
-        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    float detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
-    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
-    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Transform3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    int negTrace, ZgtX, ZgtY, YgtX;
-    int largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = ( ( xx + yy ) + zz );
-
-    negTrace = ( trace < 0.0f );
-    ZgtX = zz > xx;
-    ZgtY = zz > yy;
-    YgtX = yy > xx;
-    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
-    largestYorZ = ( YgtX || ZgtX ) && negTrace;
-    largestZorX = ( ZgtY || !YgtX ) && negTrace;
-    
-    if ( largestXorY )
-    {
-        zz = -zz;
-        xy = -xy;
-    }
-    if ( largestYorZ )
-    {
-        xx = -xx;
-        yz = -yz;
-    }
-    if ( largestZorX )
-    {
-        yy = -yy;
-        zx = -zx;
-    }
-
-    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
-    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
-
-    tmpx = ( ( zy - yz ) * scale );
-    tmpy = ( ( xz - zx ) * scale );
-    tmpz = ( ( yx - xy ) * scale );
-    tmpw = ( radicand * scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    if ( largestXorY )
-    {
-        qx = tmpw;
-        qy = tmpz;
-        qz = tmpy;
-        qw = tmpx;
-    }
-    if ( largestYorZ )
-    {
-        tmpx = qx;
-        tmpz = qz;
-        qx = qy;
-        qy = tmpx;
-        qz = qw;
-        qw = tmpz;
-    }
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
-        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
-        Vector3( vec.getY(), -vec.getX(), 0.0f )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
deleted file mode 100644
index 764e01708f9..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    if ( cosAngle < 0.0f ) {
-        cosAngle = -cosAngle;
-        start = ( -unitQuat0 );
-    } else {
-        start = unitQuat0;
-    }
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
-}
-
-inline void loadXYZW( Quat & quat, const float * fptr )
-{
-    quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
-}
-
-inline void storeXYZW( const Quat & quat, float * fptr )
-{
-    fptr[0] = quat.getX();
-    fptr[1] = quat.getY();
-    fptr[2] = quat.getZ();
-    fptr[3] = quat.getW();
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        ( mX + quat.mX ),
-        ( mY + quat.mY ),
-        ( mZ + quat.mZ ),
-        ( mW + quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        ( mX - quat.mX ),
-        ( mY - quat.mY ),
-        ( mZ - quat.mZ ),
-        ( mW - quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return Quat(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return Quat(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Quat operator *( float scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline float dot( const Quat & quat0, const Quat & quat1 )
-{
-    float result;
-    result = ( quat0.getX() * quat1.getX() );
-    result = ( result + ( quat0.getY() * quat1.getY() ) );
-    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
-    result = ( result + ( quat0.getW() * quat1.getW() ) );
-    return result;
-}
-
-inline float norm( const Quat & quat )
-{
-    float result;
-    result = ( quat.getX() * quat.getX() );
-    result = ( result + ( quat.getY() * quat.getY() ) );
-    result = ( result + ( quat.getZ() * quat.getZ() ) );
-    result = ( result + ( quat.getW() * quat.getW() ) );
-    return result;
-}
-
-inline float length( const Quat & quat )
-{
-    return ::sqrtf( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    float lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Quat(
-        ( quat.getX() * lenInv ),
-        ( quat.getY() * lenInv ),
-        ( quat.getZ() * lenInv ),
-        ( quat.getW() * lenInv )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
-}
-
-inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( s, 0.0f, 0.0f, c );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, s, 0.0f, c );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, 0.0f, s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
-        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
-        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
-        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
-    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
-    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
-    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
-    return Vector3(
-        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
-        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
-        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
-{
-    return Quat(
-        ( select1 )? quat1.getX() : quat0.getX(),
-        ( select1 )? quat1.getY() : quat0.getY(),
-        ( select1 )? quat1.getZ() : quat0.getZ(),
-        ( select1 )? quat1.getW() : quat0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
deleted file mode 100644
index 46d4d6b3e5c..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
+++ /dev/null
@@ -1,1426 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void loadXYZ( Vector3 & vec, const float * fptr )
-{
-    vec = Vector3( fptr[0], fptr[1], fptr[2] );
-}
-
-inline void storeXYZ( const Vector3 & vec, float * fptr )
-{
-    fptr[0] = vec.getX();
-    fptr[1] = vec.getY();
-    fptr[2] = vec.getZ();
-}
-
-inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 3; i++) {
-        unsigned short fp16 = hfptr[i];
-        unsigned int sign = fp16 >> 15;
-        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
-        unsigned int mantissa = fp16 & ((1 << 10) - 1);
-
-        if (exponent == 0) {
-            // zero
-            mantissa = 0;
-
-        } else if (exponent == 31) {
-            // infinity or nan -> infinity
-            exponent = 255;
-	    mantissa = 0;
-
-        } else {
-            exponent += 127 - 15;
-            mantissa <<= 13;
-        }
-
-        Data32 d;
-        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
-        vec[i] = d.f32;
-    }
-}
-
-inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 3; i++) {
-        Data32 d;
-        d.f32 = vec[i];
-
-        unsigned int sign = d.u32 >> 31;
-        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
-        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
-
-        if (exponent == 0) {
-            // zero or denorm -> zero
-            mantissa = 0;
-
-        } else if (exponent == 255 && mantissa != 0) {
-            // nan -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent >= 127 - 15 + 31) {
-            // overflow or infinity -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent <= 127 - 15) {
-            // underflow -> zero
-            exponent = 0;
-            mantissa = 0;
-
-        } else {
-            exponent -= 127 - 15;
-            mantissa >>= 13;
-        }
-
-        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
-    }
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        ( mX + pnt.getX() ),
-        ( mY + pnt.getY() ),
-        ( mZ + pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return Vector3(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return Vector3(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        -mX,
-        -mY,
-        -mZ
-    );
-}
-
-inline const Vector3 operator *( float scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float maxElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float minElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    return result;
-}
-
-inline float sum( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    return result;
-}
-
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    return result;
-}
-
-inline float length( const Vector3 & vec )
-{
-    return ::sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector3(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
-        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
-        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
-{
-    return Vector3(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = 0.0f;
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = 1.0f;
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void loadXYZW( Vector4 & vec, const float * fptr )
-{
-    vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] );
-}
-
-inline void storeXYZW( const Vector4 & vec, float * fptr )
-{
-    fptr[0] = vec.getX();
-    fptr[1] = vec.getY();
-    fptr[2] = vec.getZ();
-    fptr[3] = vec.getW();
-}
-
-inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 4; i++) {
-        unsigned short fp16 = hfptr[i];
-        unsigned int sign = fp16 >> 15;
-        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
-        unsigned int mantissa = fp16 & ((1 << 10) - 1);
-
-        if (exponent == 0) {
-            // zero
-            mantissa = 0;
-
-        } else if (exponent == 31) {
-            // infinity or nan -> infinity
-            exponent = 255;
-	    mantissa = 0;
-
-        } else {
-            exponent += 127 - 15;
-            mantissa <<= 13;
-        }
-
-        Data32 d;
-        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
-        vec[i] = d.f32;
-    }
-}
-
-inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 4; i++) {
-        Data32 d;
-        d.f32 = vec[i];
-
-        unsigned int sign = d.u32 >> 31;
-        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
-        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
-
-        if (exponent == 0) {
-            // zero or denorm -> zero
-            mantissa = 0;
-
-        } else if (exponent == 255 && mantissa != 0) {
-            // nan -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent >= 127 - 15 + 31) {
-            // overflow or infinity -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent <= 127 - 15) {
-            // underflow -> zero
-            exponent = 0;
-            mantissa = 0;
-
-        } else {
-            exponent -= 127 - 15;
-            mantissa >>= 13;
-        }
-
-        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
-    }
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ ),
-        ( mW + vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ ),
-        ( mW - vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return Vector4(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return Vector4(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Vector4 operator *( float scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() ),
-        ( vec0.getW() * vec1.getW() )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() ),
-        ( vec0.getW() / vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() ),
-        ( 1.0f / vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() ),
-        sqrtf( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) ),
-        ( 1.0f / sqrtf( vec.getW() ) )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() ),
-        fabsf( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
-        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float maxElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    result = (vec.getW() > result)? vec.getW() : result;
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float minElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    result = (vec.getW() < result)? vec.getW() : result;
-    return result;
-}
-
-inline float sum( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    result = ( result + vec.getW() );
-    return result;
-}
-
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    result = ( result + ( vec0.getW() * vec1.getW() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    result = ( result + ( vec.getW() * vec.getW() ) );
-    return result;
-}
-
-inline float length( const Vector4 & vec )
-{
-    return ::sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector4(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv ),
-        ( vec.getW() * lenInv )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
-{
-    return Vector4(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ(),
-        ( select1 )? vec1.getW() : vec0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline void loadXYZ( Point3 & pnt, const float * fptr )
-{
-    pnt = Point3( fptr[0], fptr[1], fptr[2] );
-}
-
-inline void storeXYZ( const Point3 & pnt, float * fptr )
-{
-    fptr[0] = pnt.getX();
-    fptr[1] = pnt.getY();
-    fptr[2] = pnt.getZ();
-}
-
-inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 3; i++) {
-        unsigned short fp16 = hfptr[i];
-        unsigned int sign = fp16 >> 15;
-        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
-        unsigned int mantissa = fp16 & ((1 << 10) - 1);
-
-        if (exponent == 0) {
-            // zero
-            mantissa = 0;
-
-        } else if (exponent == 31) {
-            // infinity or nan -> infinity
-            exponent = 255;
-	    mantissa = 0;
-
-        } else {
-            exponent += 127 - 15;
-            mantissa <<= 13;
-        }
-
-        Data32 d;
-        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
-        vec[i] = d.f32;
-    }
-}
-
-inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr )
-{
-    union Data32 {
-        unsigned int u32;
-        float f32;
-    };
-
-    for (int i = 0; i < 3; i++) {
-        Data32 d;
-        d.f32 = vec[i];
-
-        unsigned int sign = d.u32 >> 31;
-        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
-        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
-
-        if (exponent == 0) {
-            // zero or denorm -> zero
-            mantissa = 0;
-
-        } else if (exponent == 255 && mantissa != 0) {
-            // nan -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent >= 127 - 15 + 31) {
-            // overflow or infinity -> infinity
-            exponent = 31;
-            mantissa = 0;
-
-        } else if (exponent <= 127 - 15) {
-            // underflow -> zero
-            exponent = 0;
-            mantissa = 0;
-
-        } else {
-            exponent -= 127 - 15;
-            mantissa >>= 13;
-        }
-
-        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
-    }
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        ( mX - pnt.mX ),
-        ( mY - pnt.mY ),
-        ( mZ - pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX + vec.getX() ),
-        ( mY + vec.getY() ),
-        ( mZ + vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX - vec.getX() ),
-        ( mY - vec.getY() ),
-        ( mZ - vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() * pnt1.getX() ),
-        ( pnt0.getY() * pnt1.getY() ),
-        ( pnt0.getZ() * pnt1.getZ() )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() / pnt1.getX() ),
-        ( pnt0.getY() / pnt1.getY() ),
-        ( pnt0.getZ() / pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / pnt.getX() ),
-        ( 1.0f / pnt.getY() ),
-        ( 1.0f / pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf( pnt.getX() ),
-        sqrtf( pnt.getY() ),
-        sqrtf( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / sqrtf( pnt.getX() ) ),
-        ( 1.0f / sqrtf( pnt.getY() ) ),
-        ( 1.0f / sqrtf( pnt.getZ() ) )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf( pnt.getX() ),
-        fabsf( pnt.getY() ),
-        fabsf( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
-        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
-        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float maxElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() > result)? pnt.getZ() : result;
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float minElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() < result)? pnt.getZ() : result;
-    return result;
-}
-
-inline float sum( const Point3 & pnt )
-{
-    float result;
-    result = ( pnt.getX() + pnt.getY() );
-    result = ( result + pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, float scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline float projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    float result;
-    result = ( pnt.getX() * unitVec.getX() );
-    result = ( result + ( pnt.getY() * unitVec.getY() ) );
-    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
-    return result;
-}
-
-inline float distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline float distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
-{
-    return Point3(
-        ( select1 )? pnt1.getX() : pnt0.getX(),
-        ( select1 )? pnt1.getY() : pnt0.getY(),
-        ( select1 )? pnt1.getZ() : pnt0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h b/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
deleted file mode 100644
index d00456dfeb4..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
+++ /dev/null
@@ -1,1872 +0,0 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_H
-#define _VECTORMATH_AOS_CPP_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline float maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline float minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline float sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline float lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline float length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
-
-// Load x, y, and z elements from the first three words of a float array.
-// 
-// 
-inline void loadXYZ( Vector3 & vec, const float * fptr );
-
-// Store x, y, and z elements of a 3-D vector in the first three words of a float array.
-// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
-// 
-inline void storeXYZ( const Vector3 & vec, float * fptr );
-
-// Load three-half-floats as a 3-D vector
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs.
-// 
-inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr );
-
-// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
-// 
-inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, float w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline float maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline float minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline float sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline float lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline float length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
-
-// Load x, y, z, and w elements from the first four words of a float array.
-// 
-// 
-inline void loadXYZW( Vector4 & vec, const float * fptr );
-
-// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array.
-// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
-// 
-inline void storeXYZW( const Vector4 & vec, float * fptr );
-
-// Load four-half-floats as a 4-D vector
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs.
-// 
-inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr );
-
-// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
-// 
-inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline float maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline float minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline float sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, float scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline float projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
-
-// Load x, y, and z elements from the first three words of a float array.
-// 
-// 
-inline void loadXYZ( Point3 & pnt, const float * fptr );
-
-// Store x, y, and z elements of a 3-D point in the first three words of a float array.
-// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
-// 
-inline void storeXYZ( const Point3 & pnt, float * fptr );
-
-// Load three-half-floats as a 3-D point
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs.
-// 
-inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr );
-
-// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
-// NOTE: 
-// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
-// 
-inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, float w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Get the x element of a quaternion
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline float dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline float norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline float length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
-
-// Load x, y, z, and w elements from the first four words of a float array.
-// 
-// 
-inline void loadXYZW( Quat & quat, const float * fptr );
-
-// Store x, y, z, and w elements of a quaternion in the first four words of a float array.
-// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
-// 
-inline void storeXYZW( const Quat & quat, float * fptr );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline float determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline float determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
diff --git a/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h b/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h
deleted file mode 100644
index 5a4944a5500..00000000000
--- a/extern/bullet2/BulletMultiThreaded/vectormath2bullet.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef AOS_VECTORMATH_BULLET_CONVERT_H
-#define AOS_VECTORMATH_BULLET_CONVERT_H
-
-
-///only use a system-wide vectormath_aos.h on CELLOS_LV2 or if USE_SYSTEM_VECTORMATH
-#if defined(__CELLOS_LV2__) || defined (USE_SYSTEM_VECTORMATH)
-#include <vectormath_aos.h>
-#else
-#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
-#endif
-
-#include "LinearMath/btVector3.h"
-#include "LinearMath/btQuaternion.h"
-#include "LinearMath/btMatrix3x3.h"
-
-inline Vectormath::Aos::Vector3	getVmVector3(const btVector3& bulletVec)
-{
-	return Vectormath::Aos::Vector3(bulletVec.getX(),bulletVec.getY(),bulletVec.getZ());
-}
-
-inline btVector3 getBtVector3(const Vectormath::Aos::Vector3& vmVec)
-{
-	return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
-}
-inline btVector3 getBtVector3(const Vectormath::Aos::Point3& vmVec)
-{
-	return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
-}
-
-inline Vectormath::Aos::Quat	getVmQuat(const btQuaternion& bulletQuat)
-{
-	Vectormath::Aos::Quat vmQuat(bulletQuat.getX(),bulletQuat.getY(),bulletQuat.getZ(),bulletQuat.getW());
-	return vmQuat;
-}
-
-inline btQuaternion	getBtQuat(const Vectormath::Aos::Quat& vmQuat)
-{
-	return btQuaternion (vmQuat.getX(),vmQuat.getY(),vmQuat.getZ(),vmQuat.getW());
-}
-
-inline Vectormath::Aos::Matrix3	getVmMatrix3(const btMatrix3x3& btMat)
-{
-	Vectormath::Aos::Matrix3 mat(
-		getVmVector3(btMat.getColumn(0)),
-		getVmVector3(btMat.getColumn(1)),
-		getVmVector3(btMat.getColumn(2)));
-		return mat;
-}
-
-
-#endif //AOS_VECTORMATH_BULLET_CONVERT_H