Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/nccl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/misc/ibvwrap.cc')
-rw-r--r--src/misc/ibvwrap.cc290
1 files changed, 290 insertions, 0 deletions
diff --git a/src/misc/ibvwrap.cc b/src/misc/ibvwrap.cc
new file mode 100644
index 0000000..f47c141
--- /dev/null
+++ b/src/misc/ibvwrap.cc
@@ -0,0 +1,290 @@
+/*************************************************************************
+ * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * See LICENSE.txt for license information
+ ************************************************************************/
+
+#include "ibvwrap.h"
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <dlfcn.h>
+#include "core.h"
+
+static enum { ibvUninitialized, ibvInitializing, ibvInitialized, ibvError } ibvState = ibvUninitialized;
+
+/*Function Pointers*/
+int (*ibv_internal_fork_init)(void);
+struct ibv_device** (*ibv_internal_get_device_list)(int *num_devices);
+void (*ibv_internal_free_device_list)(struct ibv_device **list);
+const char * (*ibv_internal_get_device_name)(struct ibv_device *device);
+struct ibv_context* (*ibv_internal_open_device)(struct ibv_device* device);
+int (*ibv_internal_close_device)(struct ibv_context *context);
+int (*ibv_internal_get_async_event)(struct ibv_context *context, struct ibv_async_event *event);
+void (*ibv_internal_ack_async_event)(struct ibv_async_event *event);
+int (*ibv_internal_query_device)(struct ibv_context *context, struct ibv_device_attr *device_attr);
+int (*ibv_internal_query_port)(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr);
+int (*ibv_internal_query_gid)(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid);
+int (*ibv_internal_query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr);
+struct ibv_pd * (*ibv_internal_alloc_pd)(struct ibv_context *context);
+int (*ibv_internal_dealloc_pd)(struct ibv_pd *pd);
+struct ibv_mr * (*ibv_internal_reg_mr)(struct ibv_pd *pd, void *addr, size_t length, int access);
+int (*ibv_internal_dereg_mr)(struct ibv_mr *mr);
+struct ibv_cq * (*ibv_internal_create_cq)(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector);
+int (*ibv_internal_destroy_cq)(struct ibv_cq *cq);
+struct ibv_qp * (*ibv_internal_create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr);
+int (*ibv_internal_modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
+int (*ibv_internal_destroy_qp)(struct ibv_qp *qp);
+const char * (*ibv_internal_event_type_str)(enum ibv_event_type event);
+
+// IBVERBS Library versioning
+#define IBVERBS_VERSION "IBVERBS_1.1"
+
+ncclResult_t wrap_ibv_symbols(void) {
+ if (ibvState == ibvInitialized)
+ return ncclSuccess;
+ if (ibvState == ibvError)
+ return ncclSystemError;
+
+ if (__sync_bool_compare_and_swap(&ibvState, ibvUninitialized, ibvInitializing) == false) {
+ // Another thread raced in front of us. Wait for it to be done.
+ while (ibvState == ibvInitializing) pthread_yield();
+ return (ibvState == ibvInitialized) ? ncclSuccess : ncclSystemError;
+ }
+
+ static void* ibvhandle = NULL;
+ void* tmp;
+ void** cast;
+
+ ibvhandle=dlopen("libibverbs.so", RTLD_NOW);
+ if (!ibvhandle) {
+ ibvhandle=dlopen("libibverbs.so.1", RTLD_NOW);
+ if (!ibvhandle) {
+ WARN("Failed to open libibverbs.so[.1]");
+ goto teardown;
+ }
+ }
+
+#define LOAD_SYM(handle, symbol, funcptr) do { \
+ cast = (void**)&funcptr; \
+ tmp = dlvsym(handle, symbol, IBVERBS_VERSION); \
+ if (tmp == NULL) { \
+ WARN("dlvsym failed on %s - %s version %s", symbol, dlerror(), IBVERBS_VERSION); \
+ goto teardown; \
+ } \
+ *cast = tmp; \
+ } while (0)
+
+ LOAD_SYM(ibvhandle, "ibv_get_device_list", ibv_internal_get_device_list);
+ LOAD_SYM(ibvhandle, "ibv_free_device_list", ibv_internal_free_device_list);
+ LOAD_SYM(ibvhandle, "ibv_get_device_name", ibv_internal_get_device_name);
+ LOAD_SYM(ibvhandle, "ibv_open_device", ibv_internal_open_device);
+ LOAD_SYM(ibvhandle, "ibv_close_device", ibv_internal_close_device);
+ LOAD_SYM(ibvhandle, "ibv_get_async_event", ibv_internal_get_async_event);
+ LOAD_SYM(ibvhandle, "ibv_ack_async_event", ibv_internal_ack_async_event);
+ LOAD_SYM(ibvhandle, "ibv_query_device", ibv_internal_query_device);
+ LOAD_SYM(ibvhandle, "ibv_query_port", ibv_internal_query_port);
+ LOAD_SYM(ibvhandle, "ibv_query_gid", ibv_internal_query_gid);
+ LOAD_SYM(ibvhandle, "ibv_query_qp", ibv_internal_query_qp);
+ LOAD_SYM(ibvhandle, "ibv_alloc_pd", ibv_internal_alloc_pd);
+ LOAD_SYM(ibvhandle, "ibv_dealloc_pd", ibv_internal_dealloc_pd);
+ LOAD_SYM(ibvhandle, "ibv_reg_mr", ibv_internal_reg_mr);
+ LOAD_SYM(ibvhandle, "ibv_dereg_mr", ibv_internal_dereg_mr);
+ LOAD_SYM(ibvhandle, "ibv_create_cq", ibv_internal_create_cq);
+ LOAD_SYM(ibvhandle, "ibv_destroy_cq", ibv_internal_destroy_cq);
+ LOAD_SYM(ibvhandle, "ibv_create_qp", ibv_internal_create_qp);
+ LOAD_SYM(ibvhandle, "ibv_modify_qp", ibv_internal_modify_qp);
+ LOAD_SYM(ibvhandle, "ibv_destroy_qp", ibv_internal_destroy_qp);
+ LOAD_SYM(ibvhandle, "ibv_fork_init", ibv_internal_fork_init);
+ LOAD_SYM(ibvhandle, "ibv_event_type_str", ibv_internal_event_type_str);
+
+ ibvState = ibvInitialized;
+ return ncclSuccess;
+
+teardown:
+ ibv_internal_get_device_list = NULL;
+ ibv_internal_free_device_list = NULL;
+ ibv_internal_get_device_name = NULL;
+ ibv_internal_open_device = NULL;
+ ibv_internal_close_device = NULL;
+ ibv_internal_get_async_event = NULL;
+ ibv_internal_ack_async_event = NULL;
+ ibv_internal_query_device = NULL;
+ ibv_internal_query_port = NULL;
+ ibv_internal_query_gid = NULL;
+ ibv_internal_query_qp = NULL;
+ ibv_internal_alloc_pd = NULL;
+ ibv_internal_dealloc_pd = NULL;
+ ibv_internal_reg_mr = NULL;
+ ibv_internal_dereg_mr = NULL;
+ ibv_internal_create_cq = NULL;
+ ibv_internal_destroy_cq = NULL;
+ ibv_internal_create_qp = NULL;
+ ibv_internal_modify_qp = NULL;
+ ibv_internal_destroy_qp = NULL;
+ ibv_internal_fork_init = NULL;
+ ibv_internal_event_type_str = NULL;
+
+ if (ibvhandle != NULL) dlclose(ibvhandle);
+ ibvState = ibvError;
+ return ncclSystemError;
+}
+
+#define IBV_PTR_CHECK_ERRNO(name_internal, call, retval, error_retval, name) \
+ if (name_internal == NULL) { \
+ WARN("lib wrapper not initialized."); \
+ return ncclInternalError; \
+ } \
+ retval = call; \
+ if (retval == error_retval) { \
+ WARN("Call to " name " failed with error %s", strerror(errno)); \
+ return ncclSystemError; \
+ } \
+ return ncclSuccess;
+
+#define IBV_PTR_CHECK(name_internal, call, retval, error_retval, name) \
+ if (name_internal == NULL) { \
+ WARN("lib wrapper not initialized."); \
+ return ncclInternalError; \
+ } \
+ retval = call; \
+ if (retval == error_retval) { \
+ WARN("Call to " name " failed"); \
+ return ncclSystemError; \
+ } \
+ return ncclSuccess;
+
+#define IBV_INT_CHECK_RET_ERRNO(name_internal, call, success_retval, name) \
+ if (name_internal == NULL) { \
+ WARN("lib wrapper not initialized."); \
+ return ncclInternalError; \
+ } \
+ int ret = call; \
+ if (ret != success_retval) { \
+ WARN("Call to " name " failed with error %s", strerror(ret)); \
+ return ncclSystemError; \
+ } \
+ return ncclSuccess;
+
+#define IBV_INT_CHECK(name_internal, call, error_retval, name) \
+ if (name_internal == NULL) { \
+ WARN("lib wrapper not initialized."); \
+ return ncclInternalError; \
+ } \
+ int ret = call; \
+ if (ret == error_retval) { \
+ WARN("Call to " name " failed"); \
+ return ncclSystemError; \
+ } \
+ return ncclSuccess;
+
+#define IBV_PASSTHRU(name_internal, call) \
+ if (name_internal == NULL) { \
+ WARN("lib wrapper not initialized."); \
+ return ncclInternalError; \
+ } \
+ call; \
+ return ncclSuccess;
+
+ncclResult_t wrap_ibv_fork_init() {
+ IBV_INT_CHECK(ibv_internal_fork_init, ibv_internal_fork_init(), -1, "ibv_fork_init");
+}
+
+ncclResult_t wrap_ibv_get_device_list(struct ibv_device ***ret, int *num_devices) {
+ *ret = ibv_internal_get_device_list(num_devices);
+ if (*ret == NULL) *num_devices = 0;
+ return ncclSuccess;
+}
+
+ncclResult_t wrap_ibv_free_device_list(struct ibv_device **list) {
+ IBV_PASSTHRU(ibv_internal_free_device_list, ibv_internal_free_device_list(list));
+}
+
+const char *wrap_ibv_get_device_name(struct ibv_device *device) {
+ if (ibv_internal_get_device_name == NULL) {
+ WARN("lib wrapper not initialized.");
+ exit(-1);
+ }
+ return ibv_internal_get_device_name(device);
+}
+
+ncclResult_t wrap_ibv_open_device(struct ibv_context **ret, struct ibv_device *device) { /*returns 0 on success, -1 on failure*/
+ IBV_PTR_CHECK(ibv_internal_open_device, ibv_internal_open_device(device), *ret, NULL, "ibv_open_device");
+}
+
+ncclResult_t wrap_ibv_close_device(struct ibv_context *context) { /*returns 0 on success, -1 on failure*/
+ IBV_INT_CHECK(ibv_internal_close_device, ibv_internal_close_device(context), -1, "ibv_close_device");
+}
+
+ncclResult_t wrap_ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event) { /*returns 0 on success, and -1 on error*/
+ IBV_INT_CHECK(ibv_internal_get_async_event, ibv_internal_get_async_event(context, event), -1, "ibv_get_async_event");
+}
+
+ncclResult_t wrap_ibv_ack_async_event(struct ibv_async_event *event) {
+ IBV_PASSTHRU(ibv_internal_ack_async_event, ibv_internal_ack_async_event(event));
+}
+
+ncclResult_t wrap_ibv_query_device(struct ibv_context *context, struct ibv_device_attr *device_attr) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_query_device, ibv_internal_query_device(context, device_attr), 0, "ibv_query_device");
+}
+
+ncclResult_t wrap_ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_query_port, ibv_internal_query_port(context, port_num, port_attr), 0, "ibv_query_port");
+}
+
+ncclResult_t wrap_ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid) {
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_query_gid, ibv_internal_query_gid(context, port_num, index, gid), 0, "ibv_query_gid");
+}
+
+ncclResult_t wrap_ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) {
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_query_qp, ibv_internal_query_qp(qp, attr, attr_mask, init_attr), 0, "ibv_query_qp");
+}
+
+ncclResult_t wrap_ibv_alloc_pd(struct ibv_pd **ret, struct ibv_context *context) {
+ IBV_PTR_CHECK(ibv_internal_alloc_pd, ibv_internal_alloc_pd(context), *ret, NULL, "ibv_alloc_pd");
+}
+
+ncclResult_t wrap_ibv_dealloc_pd(struct ibv_pd *pd) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_dealloc_pd, ibv_internal_dealloc_pd(pd), 0, "ibv_dealloc_pd");
+}
+
+ncclResult_t wrap_ibv_reg_mr(struct ibv_mr **ret, struct ibv_pd *pd, void *addr, size_t length, int access) {
+ IBV_PTR_CHECK(ibv_internal_reg_mr, ibv_internal_reg_mr(pd, addr, length, access), *ret, NULL, "ibv_reg_mr");
+}
+
+struct ibv_mr * wrap_direct_ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) {
+ if (ibv_internal_reg_mr == NULL) {
+ WARN("lib wrapper not initialized.");
+ return NULL;
+ }
+ return ibv_internal_reg_mr(pd, addr, length, access);
+}
+
+ncclResult_t wrap_ibv_dereg_mr(struct ibv_mr *mr) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_dereg_mr, ibv_internal_dereg_mr(mr), 0, "ibv_dereg_mr");
+}
+
+ncclResult_t wrap_ibv_create_cq(struct ibv_cq **ret, struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector) {
+ IBV_PTR_CHECK(ibv_internal_create_cq, ibv_internal_create_cq(context, cqe, cq_context, channel, comp_vector), *ret, NULL, "ibv_create_cq");
+}
+
+ncclResult_t wrap_ibv_destroy_cq(struct ibv_cq *cq) {
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_destroy_cq, ibv_internal_destroy_cq(cq), 0, "ibv_destroy_cq");
+}
+
+ncclResult_t wrap_ibv_destroy_qp(struct ibv_qp *qp) {
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_destroy_qp, ibv_internal_destroy_qp(qp), 0, "ibv_destroy_qp");
+}
+
+ncclResult_t wrap_ibv_create_qp(struct ibv_qp **ret, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) {
+ IBV_PTR_CHECK(ibv_internal_create_qp, ibv_internal_create_qp(pd, qp_init_attr), *ret, NULL, "ibv_create_qp");
+}
+
+ncclResult_t wrap_ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/
+ IBV_INT_CHECK_RET_ERRNO(ibv_internal_modify_qp, ibv_internal_modify_qp(qp, attr, attr_mask), 0, "ibv_modify_qp");
+}
+
+ncclResult_t wrap_ibv_event_type_str(char **ret, enum ibv_event_type event) {
+ *ret = (char *) ibv_internal_event_type_str(event);
+ return ncclSuccess;
+}