diff options
Diffstat (limited to 'extern/bullet2/src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp')
-rw-r--r-- | extern/bullet2/src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp | 1084 |
1 files changed, 1084 insertions, 0 deletions
diff --git a/extern/bullet2/src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp b/extern/bullet2/src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp new file mode 100644 index 00000000000..27f76b8425c --- /dev/null +++ b/extern/bullet2/src/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp @@ -0,0 +1,1084 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btBatchedConstraints.h" + +#include "LinearMath/btIDebugDraw.h" +#include "LinearMath/btMinMax.h" +#include "LinearMath/btStackAlloc.h" +#include "LinearMath/btQuickprof.h" + +#include <string.h> //for memset + +#include <cmath> + +const int kNoMerge = -1; + +bool btBatchedConstraints::s_debugDrawBatches = false; + +struct btBatchedConstraintInfo +{ + int constraintIndex; + int numConstraintRows; + int bodyIds[2]; +}; + +struct btBatchInfo +{ + int numConstraints; + int mergeIndex; + + btBatchInfo() : numConstraints(0), mergeIndex(kNoMerge) {} +}; + +bool btBatchedConstraints::validate(btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies) const +{ + // + // validate: for debugging only. Verify coloring of bodies, that no body is touched by more than one batch in any given phase + // + int errors = 0; + const int kUnassignedBatch = -1; + + btAlignedObjectArray<int> bodyBatchId; + for (int iPhase = 0; iPhase < m_phases.size(); ++iPhase) + { + bodyBatchId.resizeNoInitialize(0); + bodyBatchId.resize(bodies.size(), kUnassignedBatch); + const Range& phase = m_phases[iPhase]; + for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch) + { + const Range& batch = m_batches[iBatch]; + for (int iiCons = batch.begin; iiCons < batch.end; ++iiCons) + { + int iCons = m_constraintIndices[iiCons]; + const btSolverConstraint& cons = constraints->at(iCons); + const btSolverBody& bodyA = bodies[cons.m_solverBodyIdA]; + const btSolverBody& bodyB = bodies[cons.m_solverBodyIdB]; + if (!bodyA.internalGetInvMass().isZero()) + { + int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdA]; + if (thisBodyBatchId == kUnassignedBatch) + { + bodyBatchId[cons.m_solverBodyIdA] = iBatch; + } + else if (thisBodyBatchId != iBatch) + { + btAssert(!"dynamic body is used in 2 different batches in the same phase"); + errors++; + } + } + if (!bodyB.internalGetInvMass().isZero()) + { + int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdB]; + if (thisBodyBatchId == kUnassignedBatch) + { + bodyBatchId[cons.m_solverBodyIdB] = iBatch; + } + else if (thisBodyBatchId != iBatch) + { + btAssert(!"dynamic body is used in 2 different batches in the same phase"); + errors++; + } + } + } + } + } + return errors == 0; +} + +static void debugDrawSingleBatch(const btBatchedConstraints* bc, + btConstraintArray* constraints, + const btAlignedObjectArray<btSolverBody>& bodies, + int iBatch, + const btVector3& color, + const btVector3& offset) +{ + if (bc && bc->m_debugDrawer && iBatch < bc->m_batches.size()) + { + const btBatchedConstraints::Range& b = bc->m_batches[iBatch]; + for (int iiCon = b.begin; iiCon < b.end; ++iiCon) + { + int iCon = bc->m_constraintIndices[iiCon]; + const btSolverConstraint& con = constraints->at(iCon); + int iBody0 = con.m_solverBodyIdA; + int iBody1 = con.m_solverBodyIdB; + btVector3 pos0 = bodies[iBody0].getWorldTransform().getOrigin() + offset; + btVector3 pos1 = bodies[iBody1].getWorldTransform().getOrigin() + offset; + bc->m_debugDrawer->drawLine(pos0, pos1, color); + } + } +} + +static void debugDrawPhase(const btBatchedConstraints* bc, + btConstraintArray* constraints, + const btAlignedObjectArray<btSolverBody>& bodies, + int iPhase, + const btVector3& color0, + const btVector3& color1, + const btVector3& offset) +{ + BT_PROFILE("debugDrawPhase"); + if (bc && bc->m_debugDrawer && iPhase < bc->m_phases.size()) + { + const btBatchedConstraints::Range& phase = bc->m_phases[iPhase]; + for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch) + { + float tt = float(iBatch - phase.begin) / float(btMax(1, phase.end - phase.begin - 1)); + btVector3 col = lerp(color0, color1, tt); + debugDrawSingleBatch(bc, constraints, bodies, iBatch, col, offset); + } + } +} + +static void debugDrawAllBatches(const btBatchedConstraints* bc, + btConstraintArray* constraints, + const btAlignedObjectArray<btSolverBody>& bodies) +{ + BT_PROFILE("debugDrawAllBatches"); + if (bc && bc->m_debugDrawer && bc->m_phases.size() > 0) + { + btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT); + btVector3 bboxMax = -bboxMin; + for (int iBody = 0; iBody < bodies.size(); ++iBody) + { + const btVector3& pos = bodies[iBody].getWorldTransform().getOrigin(); + bboxMin.setMin(pos); + bboxMax.setMax(pos); + } + btVector3 bboxExtent = bboxMax - bboxMin; + btVector3 offsetBase = btVector3(0, bboxExtent.y() * 1.1f, 0); + btVector3 offsetStep = btVector3(0, 0, bboxExtent.z() * 1.1f); + int numPhases = bc->m_phases.size(); + for (int iPhase = 0; iPhase < numPhases; ++iPhase) + { + float b = float(iPhase) / float(numPhases - 1); + btVector3 color0 = btVector3(1, 0, b); + btVector3 color1 = btVector3(0, 1, b); + btVector3 offset = offsetBase + offsetStep * (float(iPhase) - float(numPhases - 1) * 0.5); + debugDrawPhase(bc, constraints, bodies, iPhase, color0, color1, offset); + } + } +} + +static void initBatchedBodyDynamicFlags(btAlignedObjectArray<bool>* outBodyDynamicFlags, const btAlignedObjectArray<btSolverBody>& bodies) +{ + BT_PROFILE("initBatchedBodyDynamicFlags"); + btAlignedObjectArray<bool>& bodyDynamicFlags = *outBodyDynamicFlags; + bodyDynamicFlags.resizeNoInitialize(bodies.size()); + for (int i = 0; i < bodies.size(); ++i) + { + const btSolverBody& body = bodies[i]; + bodyDynamicFlags[i] = (body.internalGetInvMass().x() > btScalar(0)); + } +} + +static int runLengthEncodeConstraintInfo(btBatchedConstraintInfo* outConInfos, int numConstraints) +{ + BT_PROFILE("runLengthEncodeConstraintInfo"); + // detect and run-length encode constraint rows that repeat the same bodies + int iDest = 0; + int iSrc = 0; + while (iSrc < numConstraints) + { + const btBatchedConstraintInfo& srcConInfo = outConInfos[iSrc]; + btBatchedConstraintInfo& conInfo = outConInfos[iDest]; + conInfo.constraintIndex = iSrc; + conInfo.bodyIds[0] = srcConInfo.bodyIds[0]; + conInfo.bodyIds[1] = srcConInfo.bodyIds[1]; + while (iSrc < numConstraints && outConInfos[iSrc].bodyIds[0] == srcConInfo.bodyIds[0] && outConInfos[iSrc].bodyIds[1] == srcConInfo.bodyIds[1]) + { + ++iSrc; + } + conInfo.numConstraintRows = iSrc - conInfo.constraintIndex; + ++iDest; + } + return iDest; +} + +struct ReadSolverConstraintsLoop : public btIParallelForBody +{ + btBatchedConstraintInfo* m_outConInfos; + btConstraintArray* m_constraints; + + ReadSolverConstraintsLoop(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints) + { + m_outConInfos = outConInfos; + m_constraints = constraints; + } + void forLoop(int iBegin, int iEnd) const BT_OVERRIDE + { + for (int i = iBegin; i < iEnd; ++i) + { + btBatchedConstraintInfo& conInfo = m_outConInfos[i]; + const btSolverConstraint& con = m_constraints->at(i); + conInfo.bodyIds[0] = con.m_solverBodyIdA; + conInfo.bodyIds[1] = con.m_solverBodyIdB; + conInfo.constraintIndex = i; + conInfo.numConstraintRows = 1; + } + } +}; + +static int initBatchedConstraintInfo(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints) +{ + BT_PROFILE("initBatchedConstraintInfo"); + int numConstraints = constraints->size(); + bool inParallel = true; + if (inParallel) + { + ReadSolverConstraintsLoop loop(outConInfos, constraints); + int grainSize = 1200; + btParallelFor(0, numConstraints, grainSize, loop); + } + else + { + for (int i = 0; i < numConstraints; ++i) + { + btBatchedConstraintInfo& conInfo = outConInfos[i]; + const btSolverConstraint& con = constraints->at(i); + conInfo.bodyIds[0] = con.m_solverBodyIdA; + conInfo.bodyIds[1] = con.m_solverBodyIdB; + conInfo.constraintIndex = i; + conInfo.numConstraintRows = 1; + } + } + bool useRunLengthEncoding = true; + if (useRunLengthEncoding) + { + numConstraints = runLengthEncodeConstraintInfo(outConInfos, numConstraints); + } + return numConstraints; +} + +static void expandConstraintRowsInPlace(int* constraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows) +{ + BT_PROFILE("expandConstraintRowsInPlace"); + if (numConstraintRows > numConstraints) + { + // we walk the array in reverse to avoid overwriteing + for (int iCon = numConstraints - 1; iCon >= 0; --iCon) + { + const btBatchedConstraintInfo& conInfo = conInfos[iCon]; + int iBatch = constraintBatchIds[iCon]; + for (int i = conInfo.numConstraintRows - 1; i >= 0; --i) + { + int iDest = conInfo.constraintIndex + i; + btAssert(iDest >= iCon); + btAssert(iDest >= 0 && iDest < numConstraintRows); + constraintBatchIds[iDest] = iBatch; + } + } + } +} + +static void expandConstraintRows(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows) +{ + BT_PROFILE("expandConstraintRows"); + for (int iCon = 0; iCon < numConstraints; ++iCon) + { + const btBatchedConstraintInfo& conInfo = conInfos[iCon]; + int iBatch = srcConstraintBatchIds[iCon]; + for (int i = 0; i < conInfo.numConstraintRows; ++i) + { + int iDest = conInfo.constraintIndex + i; + btAssert(iDest >= iCon); + btAssert(iDest >= 0 && iDest < numConstraintRows); + destConstraintBatchIds[iDest] = iBatch; + } + } +} + +struct ExpandConstraintRowsLoop : public btIParallelForBody +{ + int* m_destConstraintBatchIds; + const int* m_srcConstraintBatchIds; + const btBatchedConstraintInfo* m_conInfos; + int m_numConstraintRows; + + ExpandConstraintRowsLoop(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraintRows) + { + m_destConstraintBatchIds = destConstraintBatchIds; + m_srcConstraintBatchIds = srcConstraintBatchIds; + m_conInfos = conInfos; + m_numConstraintRows = numConstraintRows; + } + void forLoop(int iBegin, int iEnd) const BT_OVERRIDE + { + expandConstraintRows(m_destConstraintBatchIds, m_srcConstraintBatchIds + iBegin, m_conInfos + iBegin, iEnd - iBegin, m_numConstraintRows); + } +}; + +static void expandConstraintRowsMt(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows) +{ + BT_PROFILE("expandConstraintRowsMt"); + ExpandConstraintRowsLoop loop(destConstraintBatchIds, srcConstraintBatchIds, conInfos, numConstraintRows); + int grainSize = 600; + btParallelFor(0, numConstraints, grainSize, loop); +} + +static void initBatchedConstraintInfoArray(btAlignedObjectArray<btBatchedConstraintInfo>* outConInfos, btConstraintArray* constraints) +{ + BT_PROFILE("initBatchedConstraintInfoArray"); + btAlignedObjectArray<btBatchedConstraintInfo>& conInfos = *outConInfos; + int numConstraints = constraints->size(); + conInfos.resizeNoInitialize(numConstraints); + + int newSize = initBatchedConstraintInfo(&outConInfos->at(0), constraints); + conInfos.resizeNoInitialize(newSize); +} + +static void mergeSmallBatches(btBatchInfo* batches, int iBeginBatch, int iEndBatch, int minBatchSize, int maxBatchSize) +{ + BT_PROFILE("mergeSmallBatches"); + for (int iBatch = iEndBatch - 1; iBatch >= iBeginBatch; --iBatch) + { + btBatchInfo& batch = batches[iBatch]; + if (batch.mergeIndex == kNoMerge && batch.numConstraints > 0 && batch.numConstraints < minBatchSize) + { + for (int iDestBatch = iBatch - 1; iDestBatch >= iBeginBatch; --iDestBatch) + { + btBatchInfo& destBatch = batches[iDestBatch]; + if (destBatch.mergeIndex == kNoMerge && (destBatch.numConstraints + batch.numConstraints) < maxBatchSize) + { + destBatch.numConstraints += batch.numConstraints; + batch.numConstraints = 0; + batch.mergeIndex = iDestBatch; + break; + } + } + } + } + // flatten mergeIndexes + // e.g. in case where A was merged into B and then B was merged into C, we need A to point to C instead of B + // Note: loop goes forward through batches because batches always merge from higher indexes to lower, + // so by going from low to high it reduces the amount of trail-following + for (int iBatch = iBeginBatch; iBatch < iEndBatch; ++iBatch) + { + btBatchInfo& batch = batches[iBatch]; + if (batch.mergeIndex != kNoMerge) + { + int iMergeDest = batches[batch.mergeIndex].mergeIndex; + // follow trail of merges to the end + while (iMergeDest != kNoMerge) + { + int iNext = batches[iMergeDest].mergeIndex; + if (iNext == kNoMerge) + { + batch.mergeIndex = iMergeDest; + break; + } + iMergeDest = iNext; + } + } + } +} + +static void updateConstraintBatchIdsForMerges(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches) +{ + BT_PROFILE("updateConstraintBatchIdsForMerges"); + // update batchIds to account for merges + for (int i = 0; i < numConstraints; ++i) + { + int iBatch = constraintBatchIds[i]; + btAssert(iBatch < numBatches); + // if this constraint references a batch that was merged into another batch + if (batches[iBatch].mergeIndex != kNoMerge) + { + // update batchId + constraintBatchIds[i] = batches[iBatch].mergeIndex; + } + } +} + +struct UpdateConstraintBatchIdsForMergesLoop : public btIParallelForBody +{ + int* m_constraintBatchIds; + const btBatchInfo* m_batches; + int m_numBatches; + + UpdateConstraintBatchIdsForMergesLoop(int* constraintBatchIds, const btBatchInfo* batches, int numBatches) + { + m_constraintBatchIds = constraintBatchIds; + m_batches = batches; + m_numBatches = numBatches; + } + void forLoop(int iBegin, int iEnd) const BT_OVERRIDE + { + BT_PROFILE("UpdateConstraintBatchIdsForMergesLoop"); + updateConstraintBatchIdsForMerges(m_constraintBatchIds + iBegin, iEnd - iBegin, m_batches, m_numBatches); + } +}; + +static void updateConstraintBatchIdsForMergesMt(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches) +{ + BT_PROFILE("updateConstraintBatchIdsForMergesMt"); + UpdateConstraintBatchIdsForMergesLoop loop(constraintBatchIds, batches, numBatches); + int grainSize = 800; + btParallelFor(0, numConstraints, grainSize, loop); +} + +inline bool BatchCompare(const btBatchedConstraints::Range& a, const btBatchedConstraints::Range& b) +{ + int lenA = a.end - a.begin; + int lenB = b.end - b.begin; + return lenA > lenB; +} + +static void writeOutConstraintIndicesForRangeOfBatches(btBatchedConstraints* bc, + const int* constraintBatchIds, + int numConstraints, + int* constraintIdPerBatch, + int batchBegin, + int batchEnd) +{ + BT_PROFILE("writeOutConstraintIndicesForRangeOfBatches"); + for (int iCon = 0; iCon < numConstraints; ++iCon) + { + int iBatch = constraintBatchIds[iCon]; + if (iBatch >= batchBegin && iBatch < batchEnd) + { + int iDestCon = constraintIdPerBatch[iBatch]; + constraintIdPerBatch[iBatch] = iDestCon + 1; + bc->m_constraintIndices[iDestCon] = iCon; + } + } +} + +struct WriteOutConstraintIndicesLoop : public btIParallelForBody +{ + btBatchedConstraints* m_batchedConstraints; + const int* m_constraintBatchIds; + int m_numConstraints; + int* m_constraintIdPerBatch; + int m_maxNumBatchesPerPhase; + + WriteOutConstraintIndicesLoop(btBatchedConstraints* bc, const int* constraintBatchIds, int numConstraints, int* constraintIdPerBatch, int maxNumBatchesPerPhase) + { + m_batchedConstraints = bc; + m_constraintBatchIds = constraintBatchIds; + m_numConstraints = numConstraints; + m_constraintIdPerBatch = constraintIdPerBatch; + m_maxNumBatchesPerPhase = maxNumBatchesPerPhase; + } + void forLoop(int iBegin, int iEnd) const BT_OVERRIDE + { + BT_PROFILE("WriteOutConstraintIndicesLoop"); + int batchBegin = iBegin * m_maxNumBatchesPerPhase; + int batchEnd = iEnd * m_maxNumBatchesPerPhase; + writeOutConstraintIndicesForRangeOfBatches(m_batchedConstraints, + m_constraintBatchIds, + m_numConstraints, + m_constraintIdPerBatch, + batchBegin, + batchEnd); + } +}; + +static void writeOutConstraintIndicesMt(btBatchedConstraints* bc, + const int* constraintBatchIds, + int numConstraints, + int* constraintIdPerBatch, + int maxNumBatchesPerPhase, + int numPhases) +{ + BT_PROFILE("writeOutConstraintIndicesMt"); + bool inParallel = true; + if (inParallel) + { + WriteOutConstraintIndicesLoop loop(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase); + btParallelFor(0, numPhases, 1, loop); + } + else + { + for (int iCon = 0; iCon < numConstraints; ++iCon) + { + int iBatch = constraintBatchIds[iCon]; + int iDestCon = constraintIdPerBatch[iBatch]; + constraintIdPerBatch[iBatch] = iDestCon + 1; + bc->m_constraintIndices[iDestCon] = iCon; + } + } +} + +static void writeGrainSizes(btBatchedConstraints* bc) +{ + typedef btBatchedConstraints::Range Range; + int numPhases = bc->m_phases.size(); + bc->m_phaseGrainSize.resizeNoInitialize(numPhases); + int numThreads = btGetTaskScheduler()->getNumThreads(); + for (int iPhase = 0; iPhase < numPhases; ++iPhase) + { + const Range& phase = bc->m_phases[iPhase]; + int numBatches = phase.end - phase.begin; + float grainSize = std::floor((0.25f * numBatches / float(numThreads)) + 0.0f); + bc->m_phaseGrainSize[iPhase] = btMax(1, int(grainSize)); + } +} + +static void writeOutBatches(btBatchedConstraints* bc, + const int* constraintBatchIds, + int numConstraints, + const btBatchInfo* batches, + int* batchWork, + int maxNumBatchesPerPhase, + int numPhases) +{ + BT_PROFILE("writeOutBatches"); + typedef btBatchedConstraints::Range Range; + bc->m_constraintIndices.reserve(numConstraints); + bc->m_batches.resizeNoInitialize(0); + bc->m_phases.resizeNoInitialize(0); + + //int maxNumBatches = numPhases * maxNumBatchesPerPhase; + { + int* constraintIdPerBatch = batchWork; // for each batch, keep an index into the next available slot in the m_constraintIndices array + int iConstraint = 0; + for (int iPhase = 0; iPhase < numPhases; ++iPhase) + { + int curPhaseBegin = bc->m_batches.size(); + int iBegin = iPhase * maxNumBatchesPerPhase; + int iEnd = iBegin + maxNumBatchesPerPhase; + for (int i = iBegin; i < iEnd; ++i) + { + const btBatchInfo& batch = batches[i]; + int curBatchBegin = iConstraint; + constraintIdPerBatch[i] = curBatchBegin; // record the start of each batch in m_constraintIndices array + int numConstraints = batch.numConstraints; + iConstraint += numConstraints; + if (numConstraints > 0) + { + bc->m_batches.push_back(Range(curBatchBegin, iConstraint)); + } + } + // if any batches were emitted this phase, + if (bc->m_batches.size() > curPhaseBegin) + { + // output phase + bc->m_phases.push_back(Range(curPhaseBegin, bc->m_batches.size())); + } + } + + btAssert(iConstraint == numConstraints); + bc->m_constraintIndices.resizeNoInitialize(numConstraints); + writeOutConstraintIndicesMt(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase, numPhases); + } + // for each phase + for (int iPhase = 0; iPhase < bc->m_phases.size(); ++iPhase) + { + // sort the batches from largest to smallest (can be helpful to some task schedulers) + const Range& curBatches = bc->m_phases[iPhase]; + bc->m_batches.quickSortInternal(BatchCompare, curBatches.begin, curBatches.end - 1); + } + bc->m_phaseOrder.resize(bc->m_phases.size()); + for (int i = 0; i < bc->m_phases.size(); ++i) + { + bc->m_phaseOrder[i] = i; + } + writeGrainSizes(bc); +} + +// +// PreallocatedMemoryHelper -- helper object for allocating a number of chunks of memory in a single contiguous block. +// It is generally more efficient to do a single larger allocation than many smaller allocations. +// +// Example Usage: +// +// btVector3* bodyPositions = NULL; +// btBatchedConstraintInfo* conInfos = NULL; +// { +// PreallocatedMemoryHelper<8> memHelper; +// memHelper.addChunk( (void**) &bodyPositions, sizeof( btVector3 ) * bodies.size() ); +// memHelper.addChunk( (void**) &conInfos, sizeof( btBatchedConstraintInfo ) * numConstraints ); +// void* memPtr = malloc( memHelper.getSizeToAllocate() ); // allocate the memory +// memHelper.setChunkPointers( memPtr ); // update pointers to chunks +// } +template <int N> +class PreallocatedMemoryHelper +{ + struct Chunk + { + void** ptr; + size_t size; + }; + Chunk m_chunks[N]; + int m_numChunks; + +public: + PreallocatedMemoryHelper() { m_numChunks = 0; } + void addChunk(void** ptr, size_t sz) + { + btAssert(m_numChunks < N); + if (m_numChunks < N) + { + Chunk& chunk = m_chunks[m_numChunks]; + chunk.ptr = ptr; + chunk.size = sz; + m_numChunks++; + } + } + size_t getSizeToAllocate() const + { + size_t totalSize = 0; + for (int i = 0; i < m_numChunks; ++i) + { + totalSize += m_chunks[i].size; + } + return totalSize; + } + void setChunkPointers(void* mem) const + { + size_t totalSize = 0; + for (int i = 0; i < m_numChunks; ++i) + { + const Chunk& chunk = m_chunks[i]; + char* chunkPtr = static_cast<char*>(mem) + totalSize; + *chunk.ptr = chunkPtr; + totalSize += chunk.size; + } + } +}; + +static btVector3 findMaxDynamicConstraintExtent( + btVector3* bodyPositions, + bool* bodyDynamicFlags, + btBatchedConstraintInfo* conInfos, + int numConstraints, + int numBodies) +{ + BT_PROFILE("findMaxDynamicConstraintExtent"); + btVector3 consExtent = btVector3(1, 1, 1) * 0.001; + for (int iCon = 0; iCon < numConstraints; ++iCon) + { + const btBatchedConstraintInfo& con = conInfos[iCon]; + int iBody0 = con.bodyIds[0]; + int iBody1 = con.bodyIds[1]; + btAssert(iBody0 >= 0 && iBody0 < numBodies); + btAssert(iBody1 >= 0 && iBody1 < numBodies); + // is it a dynamic constraint? + if (bodyDynamicFlags[iBody0] && bodyDynamicFlags[iBody1]) + { + btVector3 delta = bodyPositions[iBody1] - bodyPositions[iBody0]; + consExtent.setMax(delta.absolute()); + } + } + return consExtent; +} + +struct btIntVec3 +{ + int m_ints[3]; + + SIMD_FORCE_INLINE const int& operator[](int i) const { return m_ints[i]; } + SIMD_FORCE_INLINE int& operator[](int i) { return m_ints[i]; } +}; + +struct AssignConstraintsToGridBatchesParams +{ + bool* bodyDynamicFlags; + btIntVec3* bodyGridCoords; + int numBodies; + btBatchedConstraintInfo* conInfos; + int* constraintBatchIds; + btIntVec3 gridChunkDim; + int maxNumBatchesPerPhase; + int numPhases; + int phaseMask; + + AssignConstraintsToGridBatchesParams() + { + memset(this, 0, sizeof(*this)); + } +}; + +static void assignConstraintsToGridBatches(const AssignConstraintsToGridBatchesParams& params, int iConBegin, int iConEnd) +{ + BT_PROFILE("assignConstraintsToGridBatches"); + // (can be done in parallel) + for (int iCon = iConBegin; iCon < iConEnd; ++iCon) + { + const btBatchedConstraintInfo& con = params.conInfos[iCon]; + int iBody0 = con.bodyIds[0]; + int iBody1 = con.bodyIds[1]; + int iPhase = iCon; //iBody0; // pseudorandom choice to distribute evenly amongst phases + iPhase &= params.phaseMask; + int gridCoord[3]; + // is it a dynamic constraint? + if (params.bodyDynamicFlags[iBody0] && params.bodyDynamicFlags[iBody1]) + { + const btIntVec3& body0Coords = params.bodyGridCoords[iBody0]; + const btIntVec3& body1Coords = params.bodyGridCoords[iBody1]; + // for each dimension x,y,z, + for (int i = 0; i < 3; ++i) + { + int coordMin = btMin(body0Coords.m_ints[i], body1Coords.m_ints[i]); + int coordMax = btMax(body0Coords.m_ints[i], body1Coords.m_ints[i]); + if (coordMin != coordMax) + { + btAssert(coordMax == coordMin + 1); + if ((coordMin & 1) == 0) + { + iPhase &= ~(1 << i); // force bit off + } + else + { + iPhase |= (1 << i); // force bit on + iPhase &= params.phaseMask; + } + } + gridCoord[i] = coordMin; + } + } + else + { + if (!params.bodyDynamicFlags[iBody0]) + { + iBody0 = con.bodyIds[1]; + } + btAssert(params.bodyDynamicFlags[iBody0]); + const btIntVec3& body0Coords = params.bodyGridCoords[iBody0]; + // for each dimension x,y,z, + for (int i = 0; i < 3; ++i) + { + gridCoord[i] = body0Coords.m_ints[i]; + } + } + // calculate chunk coordinates + int chunkCoord[3]; + btIntVec3 gridChunkDim = params.gridChunkDim; + // for each dimension x,y,z, + for (int i = 0; i < 3; ++i) + { + int coordOffset = (iPhase >> i) & 1; + chunkCoord[i] = (gridCoord[i] - coordOffset) / 2; + btClamp(chunkCoord[i], 0, gridChunkDim[i] - 1); + btAssert(chunkCoord[i] < gridChunkDim[i]); + } + int iBatch = iPhase * params.maxNumBatchesPerPhase + chunkCoord[0] + chunkCoord[1] * gridChunkDim[0] + chunkCoord[2] * gridChunkDim[0] * gridChunkDim[1]; + btAssert(iBatch >= 0 && iBatch < params.maxNumBatchesPerPhase * params.numPhases); + params.constraintBatchIds[iCon] = iBatch; + } +} + +struct AssignConstraintsToGridBatchesLoop : public btIParallelForBody +{ + const AssignConstraintsToGridBatchesParams* m_params; + + AssignConstraintsToGridBatchesLoop(const AssignConstraintsToGridBatchesParams& params) + { + m_params = ¶ms; + } + void forLoop(int iBegin, int iEnd) const BT_OVERRIDE + { + assignConstraintsToGridBatches(*m_params, iBegin, iEnd); + } +}; + +// +// setupSpatialGridBatchesMt -- generate batches using a uniform 3D grid +// +/* + +Bodies are treated as 3D points at their center of mass. We only consider dynamic bodies at this stage, +because only dynamic bodies are mutated when a constraint is solved, thus subject to race conditions. + +1. Compute a bounding box around all dynamic bodies +2. Compute the maximum extent of all dynamic constraints. Each dynamic constraint is treated as a line segment, and we need the size of + box that will fully enclose any single dynamic constraint + +3. Establish the cell size of our grid, the cell size in each dimension must be at least as large as the dynamic constraints max-extent, + so that no dynamic constraint can span more than 2 cells of our grid on any axis of the grid. The cell size should be adjusted + larger in order to keep the total number of cells from being excessively high + +Key idea: Given that each constraint spans 1 or 2 grid cells in each dimension, we can handle all constraints by processing + in chunks of 2x2x2 cells with 8 different 1-cell offsets ((0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,0,0)...). + For each of the 8 offsets, we create a phase, and for each 2x2x2 chunk with dynamic constraints becomes a batch in that phase. + +4. Once the grid is established, we can calculate for each constraint which phase and batch it belongs in. + +5. Do a merge small batches on the batches of each phase separately, to try to even out the sizes of batches + +Optionally, we can "collapse" one dimension of our 3D grid to turn it into a 2D grid, which reduces the number of phases +to 4. With fewer phases, there are more constraints per phase and this makes it easier to create batches of a useful size. +*/ +// +static void setupSpatialGridBatchesMt( + btBatchedConstraints* batchedConstraints, + btAlignedObjectArray<char>* scratchMemory, + btConstraintArray* constraints, + const btAlignedObjectArray<btSolverBody>& bodies, + int minBatchSize, + int maxBatchSize, + bool use2DGrid) +{ + BT_PROFILE("setupSpatialGridBatchesMt"); + const int numPhases = 8; + int numConstraints = constraints->size(); + int numConstraintRows = constraints->size(); + + const int maxGridChunkCount = 128; + int allocNumBatchesPerPhase = maxGridChunkCount; + int minNumBatchesPerPhase = 16; + int allocNumBatches = allocNumBatchesPerPhase * numPhases; + + btVector3* bodyPositions = NULL; + bool* bodyDynamicFlags = NULL; + btIntVec3* bodyGridCoords = NULL; + btBatchInfo* batches = NULL; + int* batchWork = NULL; + btBatchedConstraintInfo* conInfos = NULL; + int* constraintBatchIds = NULL; + int* constraintRowBatchIds = NULL; + { + PreallocatedMemoryHelper<10> memHelper; + memHelper.addChunk((void**)&bodyPositions, sizeof(btVector3) * bodies.size()); + memHelper.addChunk((void**)&bodyDynamicFlags, sizeof(bool) * bodies.size()); + memHelper.addChunk((void**)&bodyGridCoords, sizeof(btIntVec3) * bodies.size()); + memHelper.addChunk((void**)&batches, sizeof(btBatchInfo) * allocNumBatches); + memHelper.addChunk((void**)&batchWork, sizeof(int) * allocNumBatches); + memHelper.addChunk((void**)&conInfos, sizeof(btBatchedConstraintInfo) * numConstraints); + memHelper.addChunk((void**)&constraintBatchIds, sizeof(int) * numConstraints); + memHelper.addChunk((void**)&constraintRowBatchIds, sizeof(int) * numConstraintRows); + size_t scratchSize = memHelper.getSizeToAllocate(); + // if we need to reallocate + if (scratchMemory->capacity() < scratchSize) + { + // allocate 6.25% extra to avoid repeated reallocs + scratchMemory->reserve(scratchSize + scratchSize / 16); + } + scratchMemory->resizeNoInitialize(scratchSize); + char* memPtr = &scratchMemory->at(0); + memHelper.setChunkPointers(memPtr); + } + + numConstraints = initBatchedConstraintInfo(conInfos, constraints); + + // compute bounding box around all dynamic bodies + // (could be done in parallel) + btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT); + btVector3 bboxMax = -bboxMin; + //int dynamicBodyCount = 0; + for (int i = 0; i < bodies.size(); ++i) + { + const btSolverBody& body = bodies[i]; + btVector3 bodyPos = body.getWorldTransform().getOrigin(); + bool isDynamic = (body.internalGetInvMass().x() > btScalar(0)); + bodyPositions[i] = bodyPos; + bodyDynamicFlags[i] = isDynamic; + if (isDynamic) + { + //dynamicBodyCount++; + bboxMin.setMin(bodyPos); + bboxMax.setMax(bodyPos); + } + } + + // find max extent of all dynamic constraints + // (could be done in parallel) + btVector3 consExtent = findMaxDynamicConstraintExtent(bodyPositions, bodyDynamicFlags, conInfos, numConstraints, bodies.size()); + + btVector3 gridExtent = bboxMax - bboxMin; + + gridExtent.setMax(btVector3(btScalar(1), btScalar(1), btScalar(1))); + + btVector3 gridCellSize = consExtent; + int gridDim[3]; + gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x()); + gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y()); + gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z()); + + // if we can collapse an axis, it will cut our number of phases in half which could be more efficient + int phaseMask = 7; + bool collapseAxis = use2DGrid; + if (collapseAxis) + { + // pick the smallest axis to collapse, leaving us with the greatest number of cells in our grid + int iAxisToCollapse = 0; + int axisDim = gridDim[iAxisToCollapse]; + //for each dimension + for (int i = 0; i < 3; ++i) + { + if (gridDim[i] < axisDim) + { + iAxisToCollapse = i; + axisDim = gridDim[i]; + } + } + // collapse it + gridCellSize[iAxisToCollapse] = gridExtent[iAxisToCollapse] * 2.0f; + phaseMask &= ~(1 << iAxisToCollapse); + } + + int numGridChunks = 0; + btIntVec3 gridChunkDim; // each chunk is 2x2x2 group of cells + while (true) + { + gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x()); + gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y()); + gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z()); + gridChunkDim[0] = btMax(1, (gridDim[0] + 0) / 2); + gridChunkDim[1] = btMax(1, (gridDim[1] + 0) / 2); + gridChunkDim[2] = btMax(1, (gridDim[2] + 0) / 2); + numGridChunks = gridChunkDim[0] * gridChunkDim[1] * gridChunkDim[2]; + float nChunks = float(gridChunkDim[0]) * float(gridChunkDim[1]) * float(gridChunkDim[2]); // suceptible to integer overflow + if (numGridChunks <= maxGridChunkCount && nChunks <= maxGridChunkCount) + { + break; + } + gridCellSize *= 1.25; // should roughly cut numCells in half + } + btAssert(numGridChunks <= maxGridChunkCount); + int maxNumBatchesPerPhase = numGridChunks; + + // for each dynamic body, compute grid coords + btVector3 invGridCellSize = btVector3(1, 1, 1) / gridCellSize; + // (can be done in parallel) + for (int iBody = 0; iBody < bodies.size(); ++iBody) + { + btIntVec3& coords = bodyGridCoords[iBody]; + if (bodyDynamicFlags[iBody]) + { + btVector3 v = (bodyPositions[iBody] - bboxMin) * invGridCellSize; + coords.m_ints[0] = int(v.x()); + coords.m_ints[1] = int(v.y()); + coords.m_ints[2] = int(v.z()); + btAssert(coords.m_ints[0] >= 0 && coords.m_ints[0] < gridDim[0]); + btAssert(coords.m_ints[1] >= 0 && coords.m_ints[1] < gridDim[1]); + btAssert(coords.m_ints[2] >= 0 && coords.m_ints[2] < gridDim[2]); + } + else + { + coords.m_ints[0] = -1; + coords.m_ints[1] = -1; + coords.m_ints[2] = -1; + } + } + + for (int iPhase = 0; iPhase < numPhases; ++iPhase) + { + int batchBegin = iPhase * maxNumBatchesPerPhase; + int batchEnd = batchBegin + maxNumBatchesPerPhase; + for (int iBatch = batchBegin; iBatch < batchEnd; ++iBatch) + { + btBatchInfo& batch = batches[iBatch]; + batch = btBatchInfo(); + } + } + + { + AssignConstraintsToGridBatchesParams params; + params.bodyDynamicFlags = bodyDynamicFlags; + params.bodyGridCoords = bodyGridCoords; + params.numBodies = bodies.size(); + params.conInfos = conInfos; + params.constraintBatchIds = constraintBatchIds; + params.gridChunkDim = gridChunkDim; + params.maxNumBatchesPerPhase = maxNumBatchesPerPhase; + params.numPhases = numPhases; + params.phaseMask = phaseMask; + bool inParallel = true; + if (inParallel) + { + AssignConstraintsToGridBatchesLoop loop(params); + int grainSize = 250; + btParallelFor(0, numConstraints, grainSize, loop); + } + else + { + assignConstraintsToGridBatches(params, 0, numConstraints); + } + } + for (int iCon = 0; iCon < numConstraints; ++iCon) + { + const btBatchedConstraintInfo& con = conInfos[iCon]; + int iBatch = constraintBatchIds[iCon]; + btBatchInfo& batch = batches[iBatch]; + batch.numConstraints += con.numConstraintRows; + } + + for (int iPhase = 0; iPhase < numPhases; ++iPhase) + { + // if phase is legit, + if (iPhase == (iPhase & phaseMask)) + { + int iBeginBatch = iPhase * maxNumBatchesPerPhase; + int iEndBatch = iBeginBatch + maxNumBatchesPerPhase; + mergeSmallBatches(batches, iBeginBatch, iEndBatch, minBatchSize, maxBatchSize); + } + } + // all constraints have been assigned a batchId + updateConstraintBatchIdsForMergesMt(constraintBatchIds, numConstraints, batches, maxNumBatchesPerPhase * numPhases); + + if (numConstraintRows > numConstraints) + { + expandConstraintRowsMt(&constraintRowBatchIds[0], &constraintBatchIds[0], &conInfos[0], numConstraints, numConstraintRows); + } + else + { + constraintRowBatchIds = constraintBatchIds; + } + + writeOutBatches(batchedConstraints, constraintRowBatchIds, numConstraintRows, batches, batchWork, maxNumBatchesPerPhase, numPhases); + btAssert(batchedConstraints->validate(constraints, bodies)); +} + +static void setupSingleBatch( + btBatchedConstraints* bc, + int numConstraints) +{ + BT_PROFILE("setupSingleBatch"); + typedef btBatchedConstraints::Range Range; + + bc->m_constraintIndices.resize(numConstraints); + for (int i = 0; i < numConstraints; ++i) + { + bc->m_constraintIndices[i] = i; + } + + bc->m_batches.resizeNoInitialize(0); + bc->m_phases.resizeNoInitialize(0); + bc->m_phaseOrder.resizeNoInitialize(0); + bc->m_phaseGrainSize.resizeNoInitialize(0); + + if (numConstraints > 0) + { + bc->m_batches.push_back(Range(0, numConstraints)); + bc->m_phases.push_back(Range(0, 1)); + bc->m_phaseOrder.push_back(0); + bc->m_phaseGrainSize.push_back(1); + } +} + +void btBatchedConstraints::setup( + btConstraintArray* constraints, + const btAlignedObjectArray<btSolverBody>& bodies, + BatchingMethod batchingMethod, + int minBatchSize, + int maxBatchSize, + btAlignedObjectArray<char>* scratchMemory) +{ + if (constraints->size() >= minBatchSize * 4) + { + bool use2DGrid = batchingMethod == BATCHING_METHOD_SPATIAL_GRID_2D; + setupSpatialGridBatchesMt(this, scratchMemory, constraints, bodies, minBatchSize, maxBatchSize, use2DGrid); + if (s_debugDrawBatches) + { + debugDrawAllBatches(this, constraints, bodies); + } + } + else + { + setupSingleBatch(this, constraints->size()); + } +} |