diff options
author | Sebastian Parborg <darkdefende@gmail.com> | 2020-09-02 21:41:30 +0300 |
---|---|---|
committer | Sebastian Parborg <darkdefende@gmail.com> | 2020-09-02 21:41:30 +0300 |
commit | 4446c3a593c51603e135e38951607b9b668ddec5 (patch) | |
tree | 9552dbd903b4fd05ea740e1bba9b1b87d97414a1 /extern/bullet2/src/LinearMath/btThreads.cpp | |
parent | 6f6f6ee18695dad66ad8aa0eb2bcab72501df597 (diff) |
Sync Bullet to upstream
This syncs Bullet to the latest upstream git version as of writing this.
(commit 47b0259b9700455022b5cf79b651cc1dc71dd59e).
Diffstat (limited to 'extern/bullet2/src/LinearMath/btThreads.cpp')
-rw-r--r-- | extern/bullet2/src/LinearMath/btThreads.cpp | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/extern/bullet2/src/LinearMath/btThreads.cpp b/extern/bullet2/src/LinearMath/btThreads.cpp new file mode 100644 index 00000000000..69a86799fad --- /dev/null +++ b/extern/bullet2/src/LinearMath/btThreads.cpp @@ -0,0 +1,792 @@ +/* +Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "btThreads.h" +#include "btQuickprof.h" +#include <algorithm> // for min and max + +#if BT_USE_OPENMP && BT_THREADSAFE + +#include <omp.h> + +#endif // #if BT_USE_OPENMP && BT_THREADSAFE + +#if BT_USE_PPL && BT_THREADSAFE + +// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later) +#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL +// Visual Studio 2010 and later should come with it +#include <concrtrm.h> // for GetProcessorCount() + +#endif // #if BT_USE_PPL && BT_THREADSAFE + +#if BT_USE_TBB && BT_THREADSAFE + +// use Intel Threading Building Blocks for thread management +#define __TBB_NO_IMPLICIT_LINKAGE 1 +#include <tbb/tbb.h> +#include <tbb/task_scheduler_init.h> +#include <tbb/parallel_for.h> +#include <tbb/blocked_range.h> + +#endif // #if BT_USE_TBB && BT_THREADSAFE + +#if BT_THREADSAFE +// +// Lightweight spin-mutex based on atomics +// Using ordinary system-provided mutexes like Windows critical sections was noticeably slower +// presumably because when it fails to lock at first it would sleep the thread and trigger costly +// context switching. +// + +#if __cplusplus >= 201103L + +// for anything claiming full C++11 compliance, use C++11 atomics +// on GCC or Clang you need to compile with -std=c++11 +#define USE_CPP11_ATOMICS 1 + +#elif defined(_MSC_VER) + +// on MSVC, use intrinsics instead +#define USE_MSVC_INTRINSICS 1 + +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + +// available since GCC 4.7 and some versions of clang +// todo: check for clang +#define USE_GCC_BUILTIN_ATOMICS 1 + +#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) + +// available since GCC 4.1 +#define USE_GCC_BUILTIN_ATOMICS_OLD 1 + +#endif + +#if USE_CPP11_ATOMICS + +#include <atomic> +#include <thread> + +#define THREAD_LOCAL_STATIC thread_local static + +bool btSpinMutex::tryLock() +{ + std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock); + int expected = 0; + return std::atomic_compare_exchange_weak_explicit(aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire); +} + +void btSpinMutex::lock() +{ + // note: this lock does not sleep the thread. + while (!tryLock()) + { + // spin + } +} + +void btSpinMutex::unlock() +{ + std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock); + std::atomic_store_explicit(aDest, int(0), std::memory_order_release); +} + +#elif USE_MSVC_INTRINSICS + +#define WIN32_LEAN_AND_MEAN + +#include <windows.h> +#include <intrin.h> + +#define THREAD_LOCAL_STATIC __declspec(thread) static + +bool btSpinMutex::tryLock() +{ + volatile long* aDest = reinterpret_cast<long*>(&mLock); + return (0 == _InterlockedCompareExchange(aDest, 1, 0)); +} + +void btSpinMutex::lock() +{ + // note: this lock does not sleep the thread + while (!tryLock()) + { + // spin + } +} + +void btSpinMutex::unlock() +{ + volatile long* aDest = reinterpret_cast<long*>(&mLock); + _InterlockedExchange(aDest, 0); +} + +#elif USE_GCC_BUILTIN_ATOMICS + +#define THREAD_LOCAL_STATIC static __thread + +bool btSpinMutex::tryLock() +{ + int expected = 0; + bool weak = false; + const int memOrderSuccess = __ATOMIC_ACQ_REL; + const int memOrderFail = __ATOMIC_ACQUIRE; + return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail); +} + +void btSpinMutex::lock() +{ + // note: this lock does not sleep the thread + while (!tryLock()) + { + // spin + } +} + +void btSpinMutex::unlock() +{ + __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE); +} + +#elif USE_GCC_BUILTIN_ATOMICS_OLD + +#define THREAD_LOCAL_STATIC static __thread + +bool btSpinMutex::tryLock() +{ + return __sync_bool_compare_and_swap(&mLock, int(0), int(1)); +} + +void btSpinMutex::lock() +{ + // note: this lock does not sleep the thread + while (!tryLock()) + { + // spin + } +} + +void btSpinMutex::unlock() +{ + // write 0 + __sync_fetch_and_and(&mLock, int(0)); +} + +#else //#elif USE_MSVC_INTRINSICS + +#error "no threading primitives defined -- unknown platform" + +#endif //#else //#elif USE_MSVC_INTRINSICS + +#else //#if BT_THREADSAFE + +// These should not be called ever +void btSpinMutex::lock() +{ + btAssert(!"unimplemented btSpinMutex::lock() called"); +} + +void btSpinMutex::unlock() +{ + btAssert(!"unimplemented btSpinMutex::unlock() called"); +} + +bool btSpinMutex::tryLock() +{ + btAssert(!"unimplemented btSpinMutex::tryLock() called"); + return true; +} + +#define THREAD_LOCAL_STATIC static + +#endif // #else //#if BT_THREADSAFE + +struct ThreadsafeCounter +{ + unsigned int mCounter; + btSpinMutex mMutex; + + ThreadsafeCounter() + { + mCounter = 0; + --mCounter; // first count should come back 0 + } + + unsigned int getNext() + { + // no need to optimize this with atomics, it is only called ONCE per thread! + mMutex.lock(); + mCounter++; + if (mCounter >= BT_MAX_THREAD_COUNT) + { + btAssert(!"thread counter exceeded"); + // wrap back to the first worker index + mCounter = 1; + } + unsigned int val = mCounter; + mMutex.unlock(); + return val; + } +}; + +static btITaskScheduler* gBtTaskScheduler=0; +static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls +static btSpinMutex gThreadsRunningCounterMutex; +static ThreadsafeCounter gThreadCounter; + +// +// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index. +// +// BT_DETECT_BAD_THREAD_INDEX is a developer option to test if +// certain assumptions about how the task scheduler manages its threads +// holds true. +// The main assumption is: +// - when the threadpool is resized, the task scheduler either +// 1. destroys all worker threads and creates all new ones in the correct number, OR +// 2. never destroys a worker thread +// +// We make that assumption because we can't easily enumerate the worker threads of a task scheduler +// to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed, +// so we can't tell when a thread-index is no longer being used. +// We allocate thread-indexes as needed with a sequential global thread counter. +// +// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but +// continues to re-use other threads and the application repeatedly resizes the thread pool of the +// task scheduler. +// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we +// wrap the thread counter back to 1. This should only happen if the worker threads have all been +// destroyed and re-created. +// +// BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now, +// but could be adapted to work with pthreads +#define BT_DETECT_BAD_THREAD_INDEX 0 + +#if BT_DETECT_BAD_THREAD_INDEX + +typedef DWORD ThreadId_t; +const static ThreadId_t kInvalidThreadId = 0; +ThreadId_t gDebugThreadIds[BT_MAX_THREAD_COUNT]; + +static ThreadId_t getDebugThreadId() +{ + return GetCurrentThreadId(); +} + +#endif // #if BT_DETECT_BAD_THREAD_INDEX + +// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT) +unsigned int btGetCurrentThreadIndex() +{ + const unsigned int kNullIndex = ~0U; + THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex; + if (sThreadIndex == kNullIndex) + { + sThreadIndex = gThreadCounter.getNext(); + btAssert(sThreadIndex < BT_MAX_THREAD_COUNT); + } +#if BT_DETECT_BAD_THREAD_INDEX + if (gBtTaskScheduler && sThreadIndex > 0) + { + ThreadId_t tid = getDebugThreadId(); + // if not set + if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId) + { + // set it + gDebugThreadIds[sThreadIndex] = tid; + } + else + { + if (gDebugThreadIds[sThreadIndex] != tid) + { + // this could indicate the task scheduler is breaking our assumptions about + // how threads are managed when threadpool is resized + btAssert(!"there are 2 or more threads with the same thread-index!"); + __debugbreak(); + } + } + } +#endif // #if BT_DETECT_BAD_THREAD_INDEX + return sThreadIndex; +} + +bool btIsMainThread() +{ + return btGetCurrentThreadIndex() == 0; +} + +void btResetThreadIndexCounter() +{ + // for when all current worker threads are destroyed + btAssert(btIsMainThread()); + gThreadCounter.mCounter = 0; +} + +btITaskScheduler::btITaskScheduler(const char* name) +{ + m_name = name; + m_savedThreadCounter = 0; + m_isActive = false; +} + +void btITaskScheduler::activate() +{ + // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler. + // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1) + // The thread-indexes need to be unique amongst the threads that can be running simultaneously. + // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different + // task schedulers to share the same thread index because they can't be running at the same time. + // So each task scheduler needs to keep its own thread counter value + if (!m_isActive) + { + gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter + m_isActive = true; + } +} + +void btITaskScheduler::deactivate() +{ + if (m_isActive) + { + m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter + m_isActive = false; + } +} + +void btPushThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter++; + gThreadsRunningCounterMutex.unlock(); +} + +void btPopThreadsAreRunning() +{ + gThreadsRunningCounterMutex.lock(); + gThreadsRunningCounter--; + gThreadsRunningCounterMutex.unlock(); +} + +bool btThreadsAreRunning() +{ + return gThreadsRunningCounter != 0; +} + +void btSetTaskScheduler(btITaskScheduler* ts) +{ + int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run + if (threadId != 0) + { + btAssert(!"btSetTaskScheduler must be called from the main thread!"); + return; + } + if (gBtTaskScheduler) + { + // deactivate old task scheduler + gBtTaskScheduler->deactivate(); + } + gBtTaskScheduler = ts; + if (ts) + { + // activate new task scheduler + ts->activate(); + } +} + +btITaskScheduler* btGetTaskScheduler() +{ + return gBtTaskScheduler; +} + +void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) +{ +#if BT_THREADSAFE + +#if BT_DETECT_BAD_THREAD_INDEX + if (!btThreadsAreRunning()) + { + // clear out thread ids + for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i) + { + gDebugThreadIds[i] = kInvalidThreadId; + } + } +#endif // #if BT_DETECT_BAD_THREAD_INDEX + + btAssert(gBtTaskScheduler != NULL); // call btSetTaskScheduler() with a valid task scheduler first! + gBtTaskScheduler->parallelFor(iBegin, iEnd, grainSize, body); + +#else // #if BT_THREADSAFE + + // non-parallel version of btParallelFor + btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE"); + body.forLoop(iBegin, iEnd); + +#endif // #if BT_THREADSAFE +} + +btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) +{ +#if BT_THREADSAFE + +#if BT_DETECT_BAD_THREAD_INDEX + if (!btThreadsAreRunning()) + { + // clear out thread ids + for (int i = 0; i < BT_MAX_THREAD_COUNT; ++i) + { + gDebugThreadIds[i] = kInvalidThreadId; + } + } +#endif // #if BT_DETECT_BAD_THREAD_INDEX + + btAssert(gBtTaskScheduler != NULL); // call btSetTaskScheduler() with a valid task scheduler first! + return gBtTaskScheduler->parallelSum(iBegin, iEnd, grainSize, body); + +#else // #if BT_THREADSAFE + + // non-parallel version of btParallelSum + btAssert(!"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE"); + return body.sumLoop(iBegin, iEnd); + +#endif //#else // #if BT_THREADSAFE +} + +/// +/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler +/// (really just useful for testing performance of single threaded vs multi) +/// +class btTaskSchedulerSequential : public btITaskScheduler +{ +public: + btTaskSchedulerSequential() : btITaskScheduler("Sequential") {} + virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; } + virtual int getNumThreads() const BT_OVERRIDE { return 1; } + virtual void setNumThreads(int numThreads) BT_OVERRIDE {} + virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelFor_sequential"); + body.forLoop(iBegin, iEnd); + } + virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelSum_sequential"); + return body.sumLoop(iBegin, iEnd); + } +}; + +#if BT_USE_OPENMP && BT_THREADSAFE +/// +/// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler +/// +class btTaskSchedulerOpenMP : public btITaskScheduler +{ + int m_numThreads; + +public: + btTaskSchedulerOpenMP() : btITaskScheduler("OpenMP") + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return omp_get_max_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads(int numThreads) BT_OVERRIDE + { + // With OpenMP, because it is a standard with various implementations, we can't + // know for sure if every implementation has the same behavior of destroying all + // previous threads when resizing the threadpool + m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads)); + omp_set_num_threads(1); // hopefully, all previous threads get destroyed here + omp_set_num_threads(m_numThreads); + m_savedThreadCounter = 0; + if (m_isActive) + { + btResetThreadIndexCounter(); + } + } + virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelFor_OpenMP"); + btPushThreadsAreRunning(); +#pragma omp parallel for schedule(static, 1) + for (int i = iBegin; i < iEnd; i += grainSize) + { + BT_PROFILE("OpenMP_forJob"); + body.forLoop(i, (std::min)(i + grainSize, iEnd)); + } + btPopThreadsAreRunning(); + } + virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelFor_OpenMP"); + btPushThreadsAreRunning(); + btScalar sum = btScalar(0); +#pragma omp parallel for schedule(static, 1) reduction(+ \ + : sum) + for (int i = iBegin; i < iEnd; i += grainSize) + { + BT_PROFILE("OpenMP_sumJob"); + sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd)); + } + btPopThreadsAreRunning(); + return sum; + } +}; +#endif // #if BT_USE_OPENMP && BT_THREADSAFE + +#if BT_USE_TBB && BT_THREADSAFE +/// +/// btTaskSchedulerTBB -- wrapper around Intel Threaded Building Blocks task scheduler +/// +class btTaskSchedulerTBB : public btITaskScheduler +{ + int m_numThreads; + tbb::task_scheduler_init* m_tbbSchedulerInit; + +public: + btTaskSchedulerTBB() : btITaskScheduler("IntelTBB") + { + m_numThreads = 0; + m_tbbSchedulerInit = NULL; + } + ~btTaskSchedulerTBB() + { + if (m_tbbSchedulerInit) + { + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + } + + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return tbb::task_scheduler_init::default_num_threads(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads(int numThreads) BT_OVERRIDE + { + m_numThreads = (std::max)(1, (std::min)(int(BT_MAX_THREAD_COUNT), numThreads)); + if (m_tbbSchedulerInit) + { + // destroys all previous threads + delete m_tbbSchedulerInit; + m_tbbSchedulerInit = NULL; + } + m_tbbSchedulerInit = new tbb::task_scheduler_init(m_numThreads); + m_savedThreadCounter = 0; + if (m_isActive) + { + btResetThreadIndexCounter(); + } + } + struct ForBodyAdapter + { + const btIParallelForBody* mBody; + + ForBodyAdapter(const btIParallelForBody* body) : mBody(body) {} + void operator()(const tbb::blocked_range<int>& range) const + { + BT_PROFILE("TBB_forJob"); + mBody->forLoop(range.begin(), range.end()); + } + }; + virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelFor_TBB"); + ForBodyAdapter tbbBody(&body); + btPushThreadsAreRunning(); + tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize), + tbbBody, + tbb::simple_partitioner()); + btPopThreadsAreRunning(); + } + struct SumBodyAdapter + { + const btIParallelSumBody* mBody; + btScalar mSum; + + SumBodyAdapter(const btIParallelSumBody* body) : mBody(body), mSum(btScalar(0)) {} + SumBodyAdapter(const SumBodyAdapter& src, tbb::split) : mBody(src.mBody), mSum(btScalar(0)) {} + void join(const SumBodyAdapter& src) { mSum += src.mSum; } + void operator()(const tbb::blocked_range<int>& range) + { + BT_PROFILE("TBB_sumJob"); + mSum += mBody->sumLoop(range.begin(), range.end()); + } + }; + virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelSum_TBB"); + SumBodyAdapter tbbBody(&body); + btPushThreadsAreRunning(); + tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody); + btPopThreadsAreRunning(); + return tbbBody.mSum; + } +}; +#endif // #if BT_USE_TBB && BT_THREADSAFE + +#if BT_USE_PPL && BT_THREADSAFE +/// +/// btTaskSchedulerPPL -- wrapper around Microsoft Parallel Patterns Lib task scheduler +/// +class btTaskSchedulerPPL : public btITaskScheduler +{ + int m_numThreads; + concurrency::combinable<btScalar> m_sum; // for parallelSum +public: + btTaskSchedulerPPL() : btITaskScheduler("PPL") + { + m_numThreads = 0; + } + virtual int getMaxNumThreads() const BT_OVERRIDE + { + return concurrency::GetProcessorCount(); + } + virtual int getNumThreads() const BT_OVERRIDE + { + return m_numThreads; + } + virtual void setNumThreads(int numThreads) BT_OVERRIDE + { + // capping the thread count for PPL due to a thread-index issue + const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31); + m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads)); + using namespace concurrency; + if (CurrentScheduler::Id() != -1) + { + CurrentScheduler::Detach(); + } + SchedulerPolicy policy; + { + // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads + // force it to destroy old threads + policy.SetConcurrencyLimits(1, 1); + CurrentScheduler::Create(policy); + CurrentScheduler::Detach(); + } + policy.SetConcurrencyLimits(m_numThreads, m_numThreads); + CurrentScheduler::Create(policy); + m_savedThreadCounter = 0; + if (m_isActive) + { + btResetThreadIndexCounter(); + } + } + struct ForBodyAdapter + { + const btIParallelForBody* mBody; + int mGrainSize; + int mIndexEnd; + + ForBodyAdapter(const btIParallelForBody* body, int grainSize, int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {} + void operator()(int i) const + { + BT_PROFILE("PPL_forJob"); + mBody->forLoop(i, (std::min)(i + mGrainSize, mIndexEnd)); + } + }; + virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelFor_PPL"); + // PPL dispatch + ForBodyAdapter pplBody(&body, grainSize, iEnd); + btPushThreadsAreRunning(); + // note: MSVC 2010 doesn't support partitioner args, so avoid them + concurrency::parallel_for(iBegin, + iEnd, + grainSize, + pplBody); + btPopThreadsAreRunning(); + } + struct SumBodyAdapter + { + const btIParallelSumBody* mBody; + concurrency::combinable<btScalar>* mSum; + int mGrainSize; + int mIndexEnd; + + SumBodyAdapter(const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end) : mBody(body), mSum(sum), mGrainSize(grainSize), mIndexEnd(end) {} + void operator()(int i) const + { + BT_PROFILE("PPL_sumJob"); + mSum->local() += mBody->sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd)); + } + }; + static btScalar sumFunc(btScalar a, btScalar b) { return a + b; } + virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body) BT_OVERRIDE + { + BT_PROFILE("parallelSum_PPL"); + m_sum.clear(); + SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd); + btPushThreadsAreRunning(); + // note: MSVC 2010 doesn't support partitioner args, so avoid them + concurrency::parallel_for(iBegin, + iEnd, + grainSize, + pplBody); + btPopThreadsAreRunning(); + return m_sum.combine(sumFunc); + } +}; +#endif // #if BT_USE_PPL && BT_THREADSAFE + +// create a non-threaded task scheduler (always available) +btITaskScheduler* btGetSequentialTaskScheduler() +{ + static btTaskSchedulerSequential sTaskScheduler; + return &sTaskScheduler; +} + +// create an OpenMP task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetOpenMPTaskScheduler() +{ +#if BT_USE_OPENMP && BT_THREADSAFE + static btTaskSchedulerOpenMP sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} + +// create an Intel TBB task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetTBBTaskScheduler() +{ +#if BT_USE_TBB && BT_THREADSAFE + static btTaskSchedulerTBB sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} + +// create a PPL task scheduler (if available, otherwise returns null) +btITaskScheduler* btGetPPLTaskScheduler() +{ +#if BT_USE_PPL && BT_THREADSAFE + static btTaskSchedulerPPL sTaskScheduler; + return &sTaskScheduler; +#else + return NULL; +#endif +} |