diff options
Diffstat (limited to 'intern/numaapi/source/numaapi_linux.c')
-rw-r--r-- | intern/numaapi/source/numaapi_linux.c | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c new file mode 100644 index 00000000000..559e97b67d3 --- /dev/null +++ b/intern/numaapi/source/numaapi_linux.c @@ -0,0 +1,272 @@ +// Copyright (c) 2016, libnumaapi authors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +// +// Author: Sergey Sharybin (sergey.vfx@gmail.com) + +#include "build_config.h" + +#if OS_LINUX + +#include "numaapi.h" + +#include <stdlib.h> + +#ifndef WITH_DYNLOAD +# include <numa.h> +#else +# include <dlfcn.h> +#endif + +#ifdef WITH_DYNLOAD + +// Descriptor numa library. +static void* numa_lib; + +// Types of all symbols which are read from the library. +struct bitmask; +typedef int tnuma_available(void); +typedef int tnuma_max_node(void); +typedef int tnuma_node_to_cpus(int node, struct bitmask* mask); +typedef long tnuma_node_size(int node, long* freep); +typedef int tnuma_run_on_node(int node); +typedef void* tnuma_alloc_onnode(size_t size, int node); +typedef void* tnuma_alloc_local(size_t size); +typedef void tnuma_free(void* start, size_t size); +typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask); +typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask, + unsigned int n); +typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask, + unsigned int n); +typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask); +typedef void tnuma_bitmask_free(struct bitmask *bitmask); +typedef struct bitmask* tnuma_allocate_cpumask(void); +typedef struct bitmask* tnuma_allocate_nodemask(void); +typedef void tnuma_free_cpumask(struct bitmask* bitmask); +typedef void tnuma_free_nodemask(struct bitmask* bitmask); +typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); +typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); +typedef void tnuma_set_localalloc(void); + +// Actual symbols. +static tnuma_available* numa_available; +static tnuma_max_node* numa_max_node; +static tnuma_node_to_cpus* numa_node_to_cpus; +static tnuma_node_size* numa_node_size; +static tnuma_run_on_node* numa_run_on_node; +static tnuma_alloc_onnode* numa_alloc_onnode; +static tnuma_alloc_local* numa_alloc_local; +static tnuma_free* numa_free; +static tnuma_bitmask_clearall* numa_bitmask_clearall; +static tnuma_bitmask_isbitset* numa_bitmask_isbitset; +static tnuma_bitmask_setbit* numa_bitmask_setbit; +static tnuma_bitmask_nbytes* numa_bitmask_nbytes; +static tnuma_bitmask_free* numa_bitmask_free; +static tnuma_allocate_cpumask* numa_allocate_cpumask; +static tnuma_allocate_nodemask* numa_allocate_nodemask; +static tnuma_free_nodemask* numa_free_nodemask; +static tnuma_free_cpumask* numa_free_cpumask; +static tnuma_run_on_node_mask* numa_run_on_node_mask; +static tnuma_set_interleave_mask* numa_set_interleave_mask; +static tnuma_set_localalloc* numa_set_localalloc; + +static void* findLibrary(const char** paths) { + int i = 0; + while (paths[i] != NULL) { + void* lib = dlopen(paths[i], RTLD_LAZY); + if (lib != NULL) { + return lib; + } + ++i; + } + return NULL; +} + +static void numaExit(void) { + if (numa_lib == NULL) { + return; + } + dlclose(numa_lib); + numa_lib = NULL; +} + +static NUMAAPI_Result loadNumaSymbols(void) { + // Prevent multiple initializations. + static bool initialized = false; + static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE; + if (initialized) { + return result; + } + initialized = true; + // Find appropriate .so library. + const char* numa_paths[] = { + "libnuma.so.1", + "libnuma.so", + NULL}; + // Register de-initialization. + const int error = atexit(numaExit); + if (error) { + result = NUMAAPI_ERROR_ATEXIT; + return result; + } + // Load library. + numa_lib = findLibrary(numa_paths); + if (numa_lib == NULL) { + result = NUMAAPI_NOT_AVAILABLE; + return result; + } + // Load symbols. + +#define _LIBRARY_FIND(lib, name) \ + do { \ + name = (t##name *)dlsym(lib, #name); \ + } while (0) +#define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name) + + NUMA_LIBRARY_FIND(numa_available); + NUMA_LIBRARY_FIND(numa_max_node); + NUMA_LIBRARY_FIND(numa_node_to_cpus); + NUMA_LIBRARY_FIND(numa_node_size); + NUMA_LIBRARY_FIND(numa_run_on_node); + NUMA_LIBRARY_FIND(numa_alloc_onnode); + NUMA_LIBRARY_FIND(numa_alloc_local); + NUMA_LIBRARY_FIND(numa_free); + NUMA_LIBRARY_FIND(numa_bitmask_clearall); + NUMA_LIBRARY_FIND(numa_bitmask_isbitset); + NUMA_LIBRARY_FIND(numa_bitmask_setbit); + NUMA_LIBRARY_FIND(numa_bitmask_nbytes); + NUMA_LIBRARY_FIND(numa_bitmask_free); + NUMA_LIBRARY_FIND(numa_allocate_cpumask); + NUMA_LIBRARY_FIND(numa_allocate_nodemask); + NUMA_LIBRARY_FIND(numa_free_cpumask); + NUMA_LIBRARY_FIND(numa_free_nodemask); + NUMA_LIBRARY_FIND(numa_run_on_node_mask); + NUMA_LIBRARY_FIND(numa_set_interleave_mask); + NUMA_LIBRARY_FIND(numa_set_localalloc); + +#undef NUMA_LIBRARY_FIND +#undef _LIBRARY_FIND + + result = NUMAAPI_SUCCESS; + return result; +} +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Initialization. + +NUMAAPI_Result numaAPI_Initialize(void) { +#ifdef WITH_DYNLOAD + NUMAAPI_Result result = loadNumaSymbols(); + if (result != NUMAAPI_SUCCESS) { + return result; + } +#endif + if (numa_available() < 0) { + return NUMAAPI_NOT_AVAILABLE; + } + return NUMAAPI_SUCCESS; +} + +//////////////////////////////////////////////////////////////////////////////// +// Topology query. + +int numaAPI_GetNumNodes(void) { + return numa_max_node() + 1; +} + +bool numaAPI_IsNodeAvailable(int node) { + if (numa_node_size(node, NULL) > 0) { + return true; + } + return false; +} + +int numaAPI_GetNumNodeProcessors(int node) { + struct bitmask* cpu_mask = numa_allocate_cpumask(); + numa_node_to_cpus(node, cpu_mask); + const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask); + const unsigned int num_bits = num_bytes *8; + // TODO(sergey): There might be faster way calculating number of set bits. + int num_processors = 0; + for (unsigned int bit = 0; bit < num_bits; ++bit) { + if (numa_bitmask_isbitset(cpu_mask, bit)) { + ++num_processors; + } + } +#ifdef WITH_DYNLOAD + if (numa_free_cpumask != NULL) { + numa_free_cpumask(cpu_mask); + } else { + numa_bitmask_free(cpu_mask); + } +#else + numa_free_cpumask(cpu_mask); +#endif + return num_processors; +} + +//////////////////////////////////////////////////////////////////////////////// +// Affinities. + +bool numaAPI_RunProcessOnNode(int node) { + numaAPI_RunThreadOnNode(node); + return true; +} + +bool numaAPI_RunThreadOnNode(int node) { + // Construct bit mask from node index. + struct bitmask* node_mask = numa_allocate_nodemask(); + numa_bitmask_clearall(node_mask); + numa_bitmask_setbit(node_mask, node); + numa_run_on_node_mask(node_mask); + // TODO(sergey): The following commands are based on x265 code, we might want + // to make those optional, or require to call those explicitly. + // + // Current assumption is that this is similar to SetThreadGroupAffinity(). + numa_set_interleave_mask(node_mask); + numa_set_localalloc(); +#ifdef WITH_DYNLOAD + if (numa_free_nodemask != NULL) { + numa_free_nodemask(node_mask); + } else { + numa_bitmask_free(node_mask); + } +#else + numa_free_nodemask(node_mask); +#endif + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Memory management. + +void* numaAPI_AllocateOnNode(size_t size, int node) { + return numa_alloc_onnode(size, node); +} + +void* numaAPI_AllocateLocal(size_t size) { + return numa_alloc_local(size); +} + +void numaAPI_Free(void* start, size_t size) { + numa_free(start, size); +} + +#endif // OS_LINUX |