diff options
Diffstat (limited to 'source/blender/blenlib/intern/threads.c')
-rw-r--r-- | source/blender/blenlib/intern/threads.c | 903 |
1 files changed, 452 insertions, 451 deletions
diff --git a/source/blender/blenlib/intern/threads.c b/source/blender/blenlib/intern/threads.c index 0758af03193..abd59dac891 100644 --- a/source/blender/blenlib/intern/threads.c +++ b/source/blender/blenlib/intern/threads.c @@ -50,7 +50,8 @@ #include "atomic_ops.h" #include "numaapi.h" -#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__) +#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && \ + !defined(__clang__) # define USE_APPLE_OMP_FIX #endif @@ -122,60 +123,60 @@ static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_t mainid; static bool is_numa_available = false; -static unsigned int thread_levels = 0; /* threads can be invoked inside threads */ +static unsigned int thread_levels = 0; /* threads can be invoked inside threads */ static int num_threads_override = 0; /* just a max for security reasons */ #define RE_MAX_THREAD BLENDER_MAX_THREADS typedef struct ThreadSlot { - struct ThreadSlot *next, *prev; - void *(*do_thread)(void *); - void *callerdata; - pthread_t pthread; - int avail; + struct ThreadSlot *next, *prev; + void *(*do_thread)(void *); + void *callerdata; + pthread_t pthread; + int avail; } ThreadSlot; static void BLI_lock_malloc_thread(void) { - BLI_spin_lock(&_malloc_lock); + BLI_spin_lock(&_malloc_lock); } static void BLI_unlock_malloc_thread(void) { - BLI_spin_unlock(&_malloc_lock); + BLI_spin_unlock(&_malloc_lock); } void BLI_threadapi_init(void) { - mainid = pthread_self(); + mainid = pthread_self(); - BLI_spin_init(&_malloc_lock); - if (numaAPI_Initialize() == NUMAAPI_SUCCESS) { - is_numa_available = true; - } + BLI_spin_init(&_malloc_lock); + if (numaAPI_Initialize() == NUMAAPI_SUCCESS) { + is_numa_available = true; + } } void BLI_threadapi_exit(void) { - if (task_scheduler) { - BLI_task_scheduler_free(task_scheduler); - } - BLI_spin_end(&_malloc_lock); + if (task_scheduler) { + BLI_task_scheduler_free(task_scheduler); + } + BLI_spin_end(&_malloc_lock); } TaskScheduler *BLI_task_scheduler_get(void) { - if (task_scheduler == NULL) { - int tot_thread = BLI_system_thread_count(); + if (task_scheduler == NULL) { + int tot_thread = BLI_system_thread_count(); - /* Do a lazy initialization, so it happens after - * command line arguments parsing - */ - task_scheduler = BLI_task_scheduler_create(tot_thread); - } + /* Do a lazy initialization, so it happens after + * command line arguments parsing + */ + task_scheduler = BLI_task_scheduler_create(tot_thread); + } - return task_scheduler; + return task_scheduler; } /* tot = 0 only initializes malloc mutex in a safe way (see sequence.c) @@ -184,161 +185,161 @@ TaskScheduler *BLI_task_scheduler_get(void) void BLI_threadpool_init(ListBase *threadbase, void *(*do_thread)(void *), int tot) { - int a; + int a; - if (threadbase != NULL && tot > 0) { - BLI_listbase_clear(threadbase); + if (threadbase != NULL && tot > 0) { + BLI_listbase_clear(threadbase); - if (tot > RE_MAX_THREAD) { - tot = RE_MAX_THREAD; - } - else if (tot < 1) { - tot = 1; - } + if (tot > RE_MAX_THREAD) { + tot = RE_MAX_THREAD; + } + else if (tot < 1) { + tot = 1; + } - for (a = 0; a < tot; a++) { - ThreadSlot *tslot = MEM_callocN(sizeof(ThreadSlot), "threadslot"); - BLI_addtail(threadbase, tslot); - tslot->do_thread = do_thread; - tslot->avail = 1; - } - } + for (a = 0; a < tot; a++) { + ThreadSlot *tslot = MEM_callocN(sizeof(ThreadSlot), "threadslot"); + BLI_addtail(threadbase, tslot); + tslot->do_thread = do_thread; + tslot->avail = 1; + } + } - unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); - if (level == 0) { - MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); + unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); + if (level == 0) { + MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); #ifdef USE_APPLE_OMP_FIX - /* workaround for Apple gcc 4.2.1 omp vs background thread bug, - * we copy gomp thread local storage pointer to setting it again - * inside the thread that we start */ - thread_tls_data = pthread_getspecific(gomp_tls_key); + /* workaround for Apple gcc 4.2.1 omp vs background thread bug, + * we copy gomp thread local storage pointer to setting it again + * inside the thread that we start */ + thread_tls_data = pthread_getspecific(gomp_tls_key); #endif - } + } } /* amount of available threads */ int BLI_available_threads(ListBase *threadbase) { - ThreadSlot *tslot; - int counter = 0; + ThreadSlot *tslot; + int counter = 0; - for (tslot = threadbase->first; tslot; tslot = tslot->next) { - if (tslot->avail) { - counter++; - } - } - return counter; + for (tslot = threadbase->first; tslot; tslot = tslot->next) { + if (tslot->avail) { + counter++; + } + } + return counter; } /* returns thread number, for sample patterns or threadsafe tables */ int BLI_threadpool_available_thread_index(ListBase *threadbase) { - ThreadSlot *tslot; - int counter = 0; + ThreadSlot *tslot; + int counter = 0; - for (tslot = threadbase->first; tslot; tslot = tslot->next, counter++) { - if (tslot->avail) { - return counter; - } - } - return 0; + for (tslot = threadbase->first; tslot; tslot = tslot->next, counter++) { + if (tslot->avail) { + return counter; + } + } + return 0; } static void *tslot_thread_start(void *tslot_p) { - ThreadSlot *tslot = (ThreadSlot *)tslot_p; + ThreadSlot *tslot = (ThreadSlot *)tslot_p; #ifdef USE_APPLE_OMP_FIX - /* workaround for Apple gcc 4.2.1 omp vs background thread bug, - * set gomp thread local storage pointer which was copied beforehand */ - pthread_setspecific(gomp_tls_key, thread_tls_data); + /* workaround for Apple gcc 4.2.1 omp vs background thread bug, + * set gomp thread local storage pointer which was copied beforehand */ + pthread_setspecific(gomp_tls_key, thread_tls_data); #endif - return tslot->do_thread(tslot->callerdata); + return tslot->do_thread(tslot->callerdata); } int BLI_thread_is_main(void) { - return pthread_equal(pthread_self(), mainid); + return pthread_equal(pthread_self(), mainid); } void BLI_threadpool_insert(ListBase *threadbase, void *callerdata) { - ThreadSlot *tslot; + ThreadSlot *tslot; - for (tslot = threadbase->first; tslot; tslot = tslot->next) { - if (tslot->avail) { - tslot->avail = 0; - tslot->callerdata = callerdata; - pthread_create(&tslot->pthread, NULL, tslot_thread_start, tslot); - return; - } - } - printf("ERROR: could not insert thread slot\n"); + for (tslot = threadbase->first; tslot; tslot = tslot->next) { + if (tslot->avail) { + tslot->avail = 0; + tslot->callerdata = callerdata; + pthread_create(&tslot->pthread, NULL, tslot_thread_start, tslot); + return; + } + } + printf("ERROR: could not insert thread slot\n"); } void BLI_threadpool_remove(ListBase *threadbase, void *callerdata) { - ThreadSlot *tslot; + ThreadSlot *tslot; - for (tslot = threadbase->first; tslot; tslot = tslot->next) { - if (tslot->callerdata == callerdata) { - pthread_join(tslot->pthread, NULL); - tslot->callerdata = NULL; - tslot->avail = 1; - } - } + for (tslot = threadbase->first; tslot; tslot = tslot->next) { + if (tslot->callerdata == callerdata) { + pthread_join(tslot->pthread, NULL); + tslot->callerdata = NULL; + tslot->avail = 1; + } + } } void BLI_threadpool_remove_index(ListBase *threadbase, int index) { - ThreadSlot *tslot; - int counter = 0; + ThreadSlot *tslot; + int counter = 0; - for (tslot = threadbase->first; tslot; tslot = tslot->next, counter++) { - if (counter == index && tslot->avail == 0) { - pthread_join(tslot->pthread, NULL); - tslot->callerdata = NULL; - tslot->avail = 1; - break; - } - } + for (tslot = threadbase->first; tslot; tslot = tslot->next, counter++) { + if (counter == index && tslot->avail == 0) { + pthread_join(tslot->pthread, NULL); + tslot->callerdata = NULL; + tslot->avail = 1; + break; + } + } } void BLI_threadpool_clear(ListBase *threadbase) { - ThreadSlot *tslot; + ThreadSlot *tslot; - for (tslot = threadbase->first; tslot; tslot = tslot->next) { - if (tslot->avail == 0) { - pthread_join(tslot->pthread, NULL); - tslot->callerdata = NULL; - tslot->avail = 1; - } - } + for (tslot = threadbase->first; tslot; tslot = tslot->next) { + if (tslot->avail == 0) { + pthread_join(tslot->pthread, NULL); + tslot->callerdata = NULL; + tslot->avail = 1; + } + } } void BLI_threadpool_end(ListBase *threadbase) { - ThreadSlot *tslot; + ThreadSlot *tslot; - /* only needed if there's actually some stuff to end - * this way we don't end up decrementing thread_levels on an empty threadbase - * */ - if (threadbase && (BLI_listbase_is_empty(threadbase) == false)) { - for (tslot = threadbase->first; tslot; tslot = tslot->next) { - if (tslot->avail == 0) { - pthread_join(tslot->pthread, NULL); - } - } - BLI_freelistN(threadbase); - } + /* only needed if there's actually some stuff to end + * this way we don't end up decrementing thread_levels on an empty threadbase + * */ + if (threadbase && (BLI_listbase_is_empty(threadbase) == false)) { + for (tslot = threadbase->first; tslot; tslot = tslot->next) { + if (tslot->avail == 0) { + pthread_join(tslot->pthread, NULL); + } + } + BLI_freelistN(threadbase); + } - unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); - if (level == 0) { - MEM_set_lock_callback(NULL, NULL); - } + unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); + if (level == 0) { + MEM_set_lock_callback(NULL, NULL); + } } /* System Information */ @@ -346,131 +347,131 @@ void BLI_threadpool_end(ListBase *threadbase) /* how many threads are native on this system? */ int BLI_system_thread_count(void) { - static int t = -1; + static int t = -1; - if (num_threads_override != 0) { - return num_threads_override; - } - else if (LIKELY(t != -1)) { - return t; - } + if (num_threads_override != 0) { + return num_threads_override; + } + else if (LIKELY(t != -1)) { + return t; + } - { + { #ifdef WIN32 - SYSTEM_INFO info; - GetSystemInfo(&info); - t = (int) info.dwNumberOfProcessors; + SYSTEM_INFO info; + GetSystemInfo(&info); + t = (int)info.dwNumberOfProcessors; #else -# ifdef __APPLE__ - int mib[2]; - size_t len; - - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - len = sizeof(t); - sysctl(mib, 2, &t, &len, NULL, 0); -# else - t = (int)sysconf(_SC_NPROCESSORS_ONLN); -# endif +# ifdef __APPLE__ + int mib[2]; + size_t len; + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + len = sizeof(t); + sysctl(mib, 2, &t, &len, NULL, 0); +# else + t = (int)sysconf(_SC_NPROCESSORS_ONLN); +# endif #endif - } + } - CLAMP(t, 1, RE_MAX_THREAD); + CLAMP(t, 1, RE_MAX_THREAD); - return t; + return t; } void BLI_system_num_threads_override_set(int num) { - num_threads_override = num; + num_threads_override = num; } int BLI_system_num_threads_override_get(void) { - return num_threads_override; + return num_threads_override; } /* Global Mutex Locks */ static ThreadMutex *global_mutex_from_type(const int type) { - switch (type) { - case LOCK_IMAGE: - return &_image_lock; - case LOCK_DRAW_IMAGE: - return &_image_draw_lock; - case LOCK_VIEWER: - return &_viewer_lock; - case LOCK_CUSTOM1: - return &_custom1_lock; - case LOCK_RCACHE: - return &_rcache_lock; - case LOCK_OPENGL: - return &_opengl_lock; - case LOCK_NODES: - return &_nodes_lock; - case LOCK_MOVIECLIP: - return &_movieclip_lock; - case LOCK_COLORMANAGE: - return &_colormanage_lock; - case LOCK_FFTW: - return &_fftw_lock; - case LOCK_VIEW3D: - return &_view3d_lock; - default: - BLI_assert(0); - return NULL; - } + switch (type) { + case LOCK_IMAGE: + return &_image_lock; + case LOCK_DRAW_IMAGE: + return &_image_draw_lock; + case LOCK_VIEWER: + return &_viewer_lock; + case LOCK_CUSTOM1: + return &_custom1_lock; + case LOCK_RCACHE: + return &_rcache_lock; + case LOCK_OPENGL: + return &_opengl_lock; + case LOCK_NODES: + return &_nodes_lock; + case LOCK_MOVIECLIP: + return &_movieclip_lock; + case LOCK_COLORMANAGE: + return &_colormanage_lock; + case LOCK_FFTW: + return &_fftw_lock; + case LOCK_VIEW3D: + return &_view3d_lock; + default: + BLI_assert(0); + return NULL; + } } void BLI_thread_lock(int type) { - pthread_mutex_lock(global_mutex_from_type(type)); + pthread_mutex_lock(global_mutex_from_type(type)); } void BLI_thread_unlock(int type) { - pthread_mutex_unlock(global_mutex_from_type(type)); + pthread_mutex_unlock(global_mutex_from_type(type)); } /* Mutex Locks */ void BLI_mutex_init(ThreadMutex *mutex) { - pthread_mutex_init(mutex, NULL); + pthread_mutex_init(mutex, NULL); } void BLI_mutex_lock(ThreadMutex *mutex) { - pthread_mutex_lock(mutex); + pthread_mutex_lock(mutex); } void BLI_mutex_unlock(ThreadMutex *mutex) { - pthread_mutex_unlock(mutex); + pthread_mutex_unlock(mutex); } bool BLI_mutex_trylock(ThreadMutex *mutex) { - return (pthread_mutex_trylock(mutex) == 0); + return (pthread_mutex_trylock(mutex) == 0); } void BLI_mutex_end(ThreadMutex *mutex) { - pthread_mutex_destroy(mutex); + pthread_mutex_destroy(mutex); } ThreadMutex *BLI_mutex_alloc(void) { - ThreadMutex *mutex = MEM_callocN(sizeof(ThreadMutex), "ThreadMutex"); - BLI_mutex_init(mutex); - return mutex; + ThreadMutex *mutex = MEM_callocN(sizeof(ThreadMutex), "ThreadMutex"); + BLI_mutex_init(mutex); + return mutex; } void BLI_mutex_free(ThreadMutex *mutex) { - BLI_mutex_end(mutex); - MEM_freeN(mutex); + BLI_mutex_end(mutex); + MEM_freeN(mutex); } /* Spin Locks */ @@ -478,39 +479,39 @@ void BLI_mutex_free(ThreadMutex *mutex) void BLI_spin_init(SpinLock *spin) { #if defined(__APPLE__) - *spin = OS_SPINLOCK_INIT; + *spin = OS_SPINLOCK_INIT; #elif defined(_MSC_VER) - *spin = 0; + *spin = 0; #else - pthread_spin_init(spin, 0); + pthread_spin_init(spin, 0); #endif } void BLI_spin_lock(SpinLock *spin) { #if defined(__APPLE__) - OSSpinLockLock(spin); + OSSpinLockLock(spin); #elif defined(_MSC_VER) - while (InterlockedExchangeAcquire(spin, 1)) { - while (*spin) { - /* Spinlock hint for processors with hyperthreading. */ - YieldProcessor(); - } - } + while (InterlockedExchangeAcquire(spin, 1)) { + while (*spin) { + /* Spinlock hint for processors with hyperthreading. */ + YieldProcessor(); + } + } #else - pthread_spin_lock(spin); + pthread_spin_lock(spin); #endif } void BLI_spin_unlock(SpinLock *spin) { #if defined(__APPLE__) - OSSpinLockUnlock(spin); + OSSpinLockUnlock(spin); #elif defined(_MSC_VER) - _ReadWriteBarrier(); - *spin = 0; + _ReadWriteBarrier(); + *spin = 0; #else - pthread_spin_unlock(spin); + pthread_spin_unlock(spin); #endif } @@ -521,7 +522,7 @@ void BLI_spin_end(SpinLock *UNUSED(spin)) #else void BLI_spin_end(SpinLock *spin) { - pthread_spin_destroy(spin); + pthread_spin_destroy(spin); } #endif @@ -529,87 +530,87 @@ void BLI_spin_end(SpinLock *spin) void BLI_rw_mutex_init(ThreadRWMutex *mutex) { - pthread_rwlock_init(mutex, NULL); + pthread_rwlock_init(mutex, NULL); } void BLI_rw_mutex_lock(ThreadRWMutex *mutex, int mode) { - if (mode == THREAD_LOCK_READ) { - pthread_rwlock_rdlock(mutex); - } - else { - pthread_rwlock_wrlock(mutex); - } + if (mode == THREAD_LOCK_READ) { + pthread_rwlock_rdlock(mutex); + } + else { + pthread_rwlock_wrlock(mutex); + } } void BLI_rw_mutex_unlock(ThreadRWMutex *mutex) { - pthread_rwlock_unlock(mutex); + pthread_rwlock_unlock(mutex); } void BLI_rw_mutex_end(ThreadRWMutex *mutex) { - pthread_rwlock_destroy(mutex); + pthread_rwlock_destroy(mutex); } ThreadRWMutex *BLI_rw_mutex_alloc(void) { - ThreadRWMutex *mutex = MEM_callocN(sizeof(ThreadRWMutex), "ThreadRWMutex"); - BLI_rw_mutex_init(mutex); - return mutex; + ThreadRWMutex *mutex = MEM_callocN(sizeof(ThreadRWMutex), "ThreadRWMutex"); + BLI_rw_mutex_init(mutex); + return mutex; } void BLI_rw_mutex_free(ThreadRWMutex *mutex) { - BLI_rw_mutex_end(mutex); - MEM_freeN(mutex); + BLI_rw_mutex_end(mutex); + MEM_freeN(mutex); } /* Ticket Mutex Lock */ struct TicketMutex { - pthread_cond_t cond; - pthread_mutex_t mutex; - unsigned int queue_head, queue_tail; + pthread_cond_t cond; + pthread_mutex_t mutex; + unsigned int queue_head, queue_tail; }; TicketMutex *BLI_ticket_mutex_alloc(void) { - TicketMutex *ticket = MEM_callocN(sizeof(TicketMutex), "TicketMutex"); + TicketMutex *ticket = MEM_callocN(sizeof(TicketMutex), "TicketMutex"); - pthread_cond_init(&ticket->cond, NULL); - pthread_mutex_init(&ticket->mutex, NULL); + pthread_cond_init(&ticket->cond, NULL); + pthread_mutex_init(&ticket->mutex, NULL); - return ticket; + return ticket; } void BLI_ticket_mutex_free(TicketMutex *ticket) { - pthread_mutex_destroy(&ticket->mutex); - pthread_cond_destroy(&ticket->cond); - MEM_freeN(ticket); + pthread_mutex_destroy(&ticket->mutex); + pthread_cond_destroy(&ticket->cond); + MEM_freeN(ticket); } void BLI_ticket_mutex_lock(TicketMutex *ticket) { - unsigned int queue_me; + unsigned int queue_me; - pthread_mutex_lock(&ticket->mutex); - queue_me = ticket->queue_tail++; + pthread_mutex_lock(&ticket->mutex); + queue_me = ticket->queue_tail++; - while (queue_me != ticket->queue_head) { - pthread_cond_wait(&ticket->cond, &ticket->mutex); - } + while (queue_me != ticket->queue_head) { + pthread_cond_wait(&ticket->cond, &ticket->mutex); + } - pthread_mutex_unlock(&ticket->mutex); + pthread_mutex_unlock(&ticket->mutex); } void BLI_ticket_mutex_unlock(TicketMutex *ticket) { - pthread_mutex_lock(&ticket->mutex); - ticket->queue_head++; - pthread_cond_broadcast(&ticket->cond); - pthread_mutex_unlock(&ticket->mutex); + pthread_mutex_lock(&ticket->mutex); + ticket->queue_head++; + pthread_cond_broadcast(&ticket->cond); + pthread_mutex_unlock(&ticket->mutex); } /* ************************************************ */ @@ -618,241 +619,241 @@ void BLI_ticket_mutex_unlock(TicketMutex *ticket) void BLI_condition_init(ThreadCondition *cond) { - pthread_cond_init(cond, NULL); + pthread_cond_init(cond, NULL); } void BLI_condition_wait(ThreadCondition *cond, ThreadMutex *mutex) { - pthread_cond_wait(cond, mutex); + pthread_cond_wait(cond, mutex); } void BLI_condition_wait_global_mutex(ThreadCondition *cond, const int type) { - pthread_cond_wait(cond, global_mutex_from_type(type)); + pthread_cond_wait(cond, global_mutex_from_type(type)); } void BLI_condition_notify_one(ThreadCondition *cond) { - pthread_cond_signal(cond); + pthread_cond_signal(cond); } void BLI_condition_notify_all(ThreadCondition *cond) { - pthread_cond_broadcast(cond); + pthread_cond_broadcast(cond); } void BLI_condition_end(ThreadCondition *cond) { - pthread_cond_destroy(cond); + pthread_cond_destroy(cond); } /* ************************************************ */ struct ThreadQueue { - GSQueue *queue; - pthread_mutex_t mutex; - pthread_cond_t push_cond; - pthread_cond_t finish_cond; - volatile int nowait; - volatile int canceled; + GSQueue *queue; + pthread_mutex_t mutex; + pthread_cond_t push_cond; + pthread_cond_t finish_cond; + volatile int nowait; + volatile int canceled; }; ThreadQueue *BLI_thread_queue_init(void) { - ThreadQueue *queue; + ThreadQueue *queue; - queue = MEM_callocN(sizeof(ThreadQueue), "ThreadQueue"); - queue->queue = BLI_gsqueue_new(sizeof(void *)); + queue = MEM_callocN(sizeof(ThreadQueue), "ThreadQueue"); + queue->queue = BLI_gsqueue_new(sizeof(void *)); - pthread_mutex_init(&queue->mutex, NULL); - pthread_cond_init(&queue->push_cond, NULL); - pthread_cond_init(&queue->finish_cond, NULL); + pthread_mutex_init(&queue->mutex, NULL); + pthread_cond_init(&queue->push_cond, NULL); + pthread_cond_init(&queue->finish_cond, NULL); - return queue; + return queue; } void BLI_thread_queue_free(ThreadQueue *queue) { - /* destroy everything, assumes no one is using queue anymore */ - pthread_cond_destroy(&queue->finish_cond); - pthread_cond_destroy(&queue->push_cond); - pthread_mutex_destroy(&queue->mutex); + /* destroy everything, assumes no one is using queue anymore */ + pthread_cond_destroy(&queue->finish_cond); + pthread_cond_destroy(&queue->push_cond); + pthread_mutex_destroy(&queue->mutex); - BLI_gsqueue_free(queue->queue); + BLI_gsqueue_free(queue->queue); - MEM_freeN(queue); + MEM_freeN(queue); } void BLI_thread_queue_push(ThreadQueue *queue, void *work) { - pthread_mutex_lock(&queue->mutex); + pthread_mutex_lock(&queue->mutex); - BLI_gsqueue_push(queue->queue, &work); + BLI_gsqueue_push(queue->queue, &work); - /* signal threads waiting to pop */ - pthread_cond_signal(&queue->push_cond); - pthread_mutex_unlock(&queue->mutex); + /* signal threads waiting to pop */ + pthread_cond_signal(&queue->push_cond); + pthread_mutex_unlock(&queue->mutex); } void *BLI_thread_queue_pop(ThreadQueue *queue) { - void *work = NULL; + void *work = NULL; - /* wait until there is work */ - pthread_mutex_lock(&queue->mutex); - while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) { - pthread_cond_wait(&queue->push_cond, &queue->mutex); - } + /* wait until there is work */ + pthread_mutex_lock(&queue->mutex); + while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) { + pthread_cond_wait(&queue->push_cond, &queue->mutex); + } - /* if we have something, pop it */ - if (!BLI_gsqueue_is_empty(queue->queue)) { - BLI_gsqueue_pop(queue->queue, &work); + /* if we have something, pop it */ + if (!BLI_gsqueue_is_empty(queue->queue)) { + BLI_gsqueue_pop(queue->queue, &work); - if (BLI_gsqueue_is_empty(queue->queue)) { - pthread_cond_broadcast(&queue->finish_cond); - } - } + if (BLI_gsqueue_is_empty(queue->queue)) { + pthread_cond_broadcast(&queue->finish_cond); + } + } - pthread_mutex_unlock(&queue->mutex); + pthread_mutex_unlock(&queue->mutex); - return work; + return work; } static void wait_timeout(struct timespec *timeout, int ms) { - ldiv_t div_result; - long sec, usec, x; + ldiv_t div_result; + long sec, usec, x; #ifdef WIN32 - { - struct _timeb now; - _ftime(&now); - sec = now.time; - usec = now.millitm * 1000; /* microsecond precision would be better */ - } + { + struct _timeb now; + _ftime(&now); + sec = now.time; + usec = now.millitm * 1000; /* microsecond precision would be better */ + } #else - { - struct timeval now; - gettimeofday(&now, NULL); - sec = now.tv_sec; - usec = now.tv_usec; - } + { + struct timeval now; + gettimeofday(&now, NULL); + sec = now.tv_sec; + usec = now.tv_usec; + } #endif - /* add current time + millisecond offset */ - div_result = ldiv(ms, 1000); - timeout->tv_sec = sec + div_result.quot; + /* add current time + millisecond offset */ + div_result = ldiv(ms, 1000); + timeout->tv_sec = sec + div_result.quot; - x = usec + (div_result.rem * 1000); + x = usec + (div_result.rem * 1000); - if (x >= 1000000) { - timeout->tv_sec++; - x -= 1000000; - } + if (x >= 1000000) { + timeout->tv_sec++; + x -= 1000000; + } - timeout->tv_nsec = x * 1000; + timeout->tv_nsec = x * 1000; } void *BLI_thread_queue_pop_timeout(ThreadQueue *queue, int ms) { - double t; - void *work = NULL; - struct timespec timeout; + double t; + void *work = NULL; + struct timespec timeout; - t = PIL_check_seconds_timer(); - wait_timeout(&timeout, ms); + t = PIL_check_seconds_timer(); + wait_timeout(&timeout, ms); - /* wait until there is work */ - pthread_mutex_lock(&queue->mutex); - while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) { - if (pthread_cond_timedwait(&queue->push_cond, &queue->mutex, &timeout) == ETIMEDOUT) { - break; - } - else if (PIL_check_seconds_timer() - t >= ms * 0.001) { - break; - } - } + /* wait until there is work */ + pthread_mutex_lock(&queue->mutex); + while (BLI_gsqueue_is_empty(queue->queue) && !queue->nowait) { + if (pthread_cond_timedwait(&queue->push_cond, &queue->mutex, &timeout) == ETIMEDOUT) { + break; + } + else if (PIL_check_seconds_timer() - t >= ms * 0.001) { + break; + } + } - /* if we have something, pop it */ - if (!BLI_gsqueue_is_empty(queue->queue)) { - BLI_gsqueue_pop(queue->queue, &work); + /* if we have something, pop it */ + if (!BLI_gsqueue_is_empty(queue->queue)) { + BLI_gsqueue_pop(queue->queue, &work); - if (BLI_gsqueue_is_empty(queue->queue)) { - pthread_cond_broadcast(&queue->finish_cond); - } - } + if (BLI_gsqueue_is_empty(queue->queue)) { + pthread_cond_broadcast(&queue->finish_cond); + } + } - pthread_mutex_unlock(&queue->mutex); + pthread_mutex_unlock(&queue->mutex); - return work; + return work; } int BLI_thread_queue_len(ThreadQueue *queue) { - int size; + int size; - pthread_mutex_lock(&queue->mutex); - size = BLI_gsqueue_len(queue->queue); - pthread_mutex_unlock(&queue->mutex); + pthread_mutex_lock(&queue->mutex); + size = BLI_gsqueue_len(queue->queue); + pthread_mutex_unlock(&queue->mutex); - return size; + return size; } bool BLI_thread_queue_is_empty(ThreadQueue *queue) { - bool is_empty; + bool is_empty; - pthread_mutex_lock(&queue->mutex); - is_empty = BLI_gsqueue_is_empty(queue->queue); - pthread_mutex_unlock(&queue->mutex); + pthread_mutex_lock(&queue->mutex); + is_empty = BLI_gsqueue_is_empty(queue->queue); + pthread_mutex_unlock(&queue->mutex); - return is_empty; + return is_empty; } void BLI_thread_queue_nowait(ThreadQueue *queue) { - pthread_mutex_lock(&queue->mutex); + pthread_mutex_lock(&queue->mutex); - queue->nowait = 1; + queue->nowait = 1; - /* signal threads waiting to pop */ - pthread_cond_broadcast(&queue->push_cond); - pthread_mutex_unlock(&queue->mutex); + /* signal threads waiting to pop */ + pthread_cond_broadcast(&queue->push_cond); + pthread_mutex_unlock(&queue->mutex); } void BLI_thread_queue_wait_finish(ThreadQueue *queue) { - /* wait for finish condition */ - pthread_mutex_lock(&queue->mutex); + /* wait for finish condition */ + pthread_mutex_lock(&queue->mutex); - while (!BLI_gsqueue_is_empty(queue->queue)) { - pthread_cond_wait(&queue->finish_cond, &queue->mutex); - } + while (!BLI_gsqueue_is_empty(queue->queue)) { + pthread_cond_wait(&queue->finish_cond, &queue->mutex); + } - pthread_mutex_unlock(&queue->mutex); + pthread_mutex_unlock(&queue->mutex); } /* ************************************************ */ void BLI_threaded_malloc_begin(void) { - unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); - if (level == 0) { - MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); - /* There is a little chance that two threads will need to access to a - * scheduler which was not yet created from main thread. which could - * cause scheduler created multiple times. - */ - BLI_task_scheduler_get(); - } + unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); + if (level == 0) { + MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); + /* There is a little chance that two threads will need to access to a + * scheduler which was not yet created from main thread. which could + * cause scheduler created multiple times. + */ + BLI_task_scheduler_get(); + } } void BLI_threaded_malloc_end(void) { - unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); - if (level == 0) { - MEM_set_lock_callback(NULL, NULL); - } + unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); + if (level == 0) { + MEM_set_lock_callback(NULL, NULL); + } } /* **** Special functions to help performance on crazy NUMA setups. **** */ @@ -860,107 +861,107 @@ void BLI_threaded_malloc_end(void) #if 0 /* UNUSED */ static bool check_is_threadripper2_alike_topology(void) { - /* NOTE: We hope operating system does not support CPU hotswap to - * a different brand. And that SMP of different types is also not - * encouraged by the system. */ - static bool is_initialized = false; - static bool is_threadripper2 = false; - if (is_initialized) { - return is_threadripper2; - } - is_initialized = true; - char *cpu_brand = BLI_cpu_brand_string(); - if (cpu_brand == NULL) { - return false; - } - if (strstr(cpu_brand, "Threadripper")) { - /* NOTE: We consider all Thread-rippers having similar topology to - * the second one. This is because we are trying to utilize NUMA node - * 0 as much as possible. This node does exist on earlier versions of - * thread-ripper and setting affinity to it should not have negative - * effect. - * This allows us to avoid per-model check, making the code more - * reliable for the CPUs which are not yet released. - */ - if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) { - is_threadripper2 = true; - } - } - /* NOTE: While all dies of EPYC has memory controller, only two f them - * has access to a lower-indexed DDR slots. Those dies are same as on - * Threadripper2 with the memory controller. - * Now, it is rather likely that reasonable amount of users don't max - * up their DR slots, making it only two dies connected to a DDR slot - * with actual memory in it. */ - if (strstr(cpu_brand, "EPYC")) { - /* NOTE: Similarly to Thread-ripper we do not do model check. */ - is_threadripper2 = true; - } - MEM_freeN(cpu_brand); - return is_threadripper2; + /* NOTE: We hope operating system does not support CPU hotswap to + * a different brand. And that SMP of different types is also not + * encouraged by the system. */ + static bool is_initialized = false; + static bool is_threadripper2 = false; + if (is_initialized) { + return is_threadripper2; + } + is_initialized = true; + char *cpu_brand = BLI_cpu_brand_string(); + if (cpu_brand == NULL) { + return false; + } + if (strstr(cpu_brand, "Threadripper")) { + /* NOTE: We consider all Thread-rippers having similar topology to + * the second one. This is because we are trying to utilize NUMA node + * 0 as much as possible. This node does exist on earlier versions of + * thread-ripper and setting affinity to it should not have negative + * effect. + * This allows us to avoid per-model check, making the code more + * reliable for the CPUs which are not yet released. + */ + if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) { + is_threadripper2 = true; + } + } + /* NOTE: While all dies of EPYC has memory controller, only two f them + * has access to a lower-indexed DDR slots. Those dies are same as on + * Threadripper2 with the memory controller. + * Now, it is rather likely that reasonable amount of users don't max + * up their DR slots, making it only two dies connected to a DDR slot + * with actual memory in it. */ + if (strstr(cpu_brand, "EPYC")) { + /* NOTE: Similarly to Thread-ripper we do not do model check. */ + is_threadripper2 = true; + } + MEM_freeN(cpu_brand); + return is_threadripper2; } static void threadripper_put_process_on_fast_node(void) { - if (!is_numa_available) { - return; - } - /* NOTE: Technically, we can use NUMA nodes 0 and 2 and using both of - * them in the affinity mask will allow OS to schedule threads more - * flexible,possibly increasing overall performance when multiple apps - * are crunching numbers. - * - * However, if scene fits into memory adjacent to a single die we don't - * want OS to re-schedule the process to another die since that will make - * it further away from memory allocated for .blend file. */ - /* NOTE: Even if NUMA is available in the API but is disabled in BIOS on - * this workstation we still process here. If NUMA is disabled it will be a - * single node, so our action is no-visible-changes, but allows to keep - * things simple and unified. */ - numaAPI_RunProcessOnNode(0); + if (!is_numa_available) { + return; + } + /* NOTE: Technically, we can use NUMA nodes 0 and 2 and using both of + * them in the affinity mask will allow OS to schedule threads more + * flexible,possibly increasing overall performance when multiple apps + * are crunching numbers. + * + * However, if scene fits into memory adjacent to a single die we don't + * want OS to re-schedule the process to another die since that will make + * it further away from memory allocated for .blend file. */ + /* NOTE: Even if NUMA is available in the API but is disabled in BIOS on + * this workstation we still process here. If NUMA is disabled it will be a + * single node, so our action is no-visible-changes, but allows to keep + * things simple and unified. */ + numaAPI_RunProcessOnNode(0); } static void threadripper_put_thread_on_fast_node(void) { - if (!is_numa_available) { - return; - } - /* NOTE: This is where things becomes more interesting. On the one hand - * we can use nodes 0 and 2 and allow operating system to do balancing - * of processes/threads for the maximum performance when multiple apps - * are running. - * On another hand, however, we probably want to use same node as the - * main thread since that's where the memory of .blend file is likely - * to be allocated. - * Since the main thread is currently on node 0, we also put thread on - * same node. */ - /* See additional note about NUMA disabled in BIOS above. */ - numaAPI_RunThreadOnNode(0); -} -#endif /* UNUSED */ + if (!is_numa_available) { + return; + } + /* NOTE: This is where things becomes more interesting. On the one hand + * we can use nodes 0 and 2 and allow operating system to do balancing + * of processes/threads for the maximum performance when multiple apps + * are running. + * On another hand, however, we probably want to use same node as the + * main thread since that's where the memory of .blend file is likely + * to be allocated. + * Since the main thread is currently on node 0, we also put thread on + * same node. */ + /* See additional note about NUMA disabled in BIOS above. */ + numaAPI_RunThreadOnNode(0); +} +#endif /* UNUSED */ void BLI_thread_put_process_on_fast_node(void) { - /* Disabled for now since this causes only 16 threads to be used on a - * thread-ripper for computations like sculpting and fluid sim. The problem - * is that all threads created as children from this thread will inherit - * the NUMA node and so will end up on the same node. This can be fixed - * case-by-case by assigning the NUMA node for every child thread, however - * this is difficult for external libraries and OpenMP, and out of our - * control for plugins like external renderers. */ + /* Disabled for now since this causes only 16 threads to be used on a + * thread-ripper for computations like sculpting and fluid sim. The problem + * is that all threads created as children from this thread will inherit + * the NUMA node and so will end up on the same node. This can be fixed + * case-by-case by assigning the NUMA node for every child thread, however + * this is difficult for external libraries and OpenMP, and out of our + * control for plugins like external renderers. */ #if 0 - if (check_is_threadripper2_alike_topology()) { - threadripper_put_process_on_fast_node(); - } + if (check_is_threadripper2_alike_topology()) { + threadripper_put_process_on_fast_node(); + } #endif } void BLI_thread_put_thread_on_fast_node(void) { - /* Disabled for now, see comment above. */ + /* Disabled for now, see comment above. */ #if 0 - if (check_is_threadripper2_alike_topology()) { - threadripper_put_thread_on_fast_node(); - } + if (check_is_threadripper2_alike_topology()) { + threadripper_put_thread_on_fast_node(); + } #endif } |