diff options
author | David Yat Sin <david.yatsin@amd.com> | 2021-12-22 10:48:45 +0300 |
---|---|---|
committer | Andrei Vagin <avagin@gmail.com> | 2022-04-29 03:53:52 +0300 |
commit | a218fe0baa0ec652ab5f226c736f2e2f72b27bdc (patch) | |
tree | 210bc502b730dca63a35221ddb88c4709799fcb8 /plugins | |
parent | ba9c62df24160743d8082cfc5f6af4e92a4e629a (diff) |
criu/plugin: Read and write BO contents in parallel
Implement multi-threaded code to read and write contents of each GPU
VRAM BOs in parallel in order to speed up dumping process when using
multiple GPUs.
Signed-off-by: David Yat Sin <david.yatsin@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/amdgpu/Makefile | 3 | ||||
-rw-r--r-- | plugins/amdgpu/amdgpu_plugin.c | 427 | ||||
-rw-r--r-- | plugins/amdgpu/amdgpu_plugin_topology.c | 11 | ||||
-rw-r--r-- | plugins/amdgpu/amdgpu_plugin_topology.h | 1 |
4 files changed, 305 insertions, 137 deletions
diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile index daaab35aa..e2f6b8a69 100644 --- a/plugins/amdgpu/Makefile +++ b/plugins/amdgpu/Makefile @@ -12,6 +12,7 @@ include $(__nmk_dir)msg.mk CC := gcc PLUGIN_CFLAGS := -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC +PLUGIN_LDFLAGS := -lpthread ifeq ($(CONFIG_AMDGPU),y) all: $(DEPS_OK) @@ -23,7 +24,7 @@ criu-amdgpu.pb-c.c: criu-amdgpu.proto protoc-c --proto_path=. --c_out=. criu-amdgpu.proto amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c - $(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE) + $(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) amdgpu_plugin_clean: $(call msg-clean, $@) diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index ab2cac20c..07c89d92c 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -13,6 +13,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <stdint.h> +#include <pthread.h> #include "criu-plugin.h" #include "plugin.h" @@ -443,6 +444,17 @@ void amdgpu_plugin_fini(int stage, int ret) CR_PLUGIN_REGISTER("amdgpu_plugin", amdgpu_plugin_init, amdgpu_plugin_fini) +struct thread_data { + pthread_t thread; + uint64_t num_of_bos; + uint32_t gpu_id; + pid_t pid; + struct kfd_criu_bo_bucket *bo_buckets; + BoEntry **bo_entries; + int drm_fd; + int ret; +}; + int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) { struct stat st_kfd, st_dri_min; @@ -480,6 +492,181 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma) +void *dump_bo_contents(void *_thread_data) +{ + int i, ret = 0; + int num_bos = 0; + struct thread_data *thread_data = (struct thread_data *)_thread_data; + struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets; + BoEntry **bo_info = thread_data->bo_entries; + char *fname; + int mem_fd = -1; + + pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id); + + if (asprintf(&fname, PROCPIDMEM, thread_data->pid) < 0) { + pr_perror("failed in asprintf, %s", fname); + ret = -1; + goto exit; + } + mem_fd = open(fname, O_RDONLY); + if (mem_fd < 0) { + pr_perror("Can't open %s for pid %d", fname, thread_data->pid); + free(fname); + ret = -errno; + goto exit; + } + plugin_log_msg("Opened %s file for pid = %d\n", fname, thread_data->pid); + free(fname); + + for (i = 0; i < thread_data->num_of_bos; i++) { + if (bo_buckets[i].gpu_id != thread_data->gpu_id) + continue; + + num_bos++; + if (!(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + !(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT)) + continue; + + if (bo_info[i]->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) { + void *addr; + + plugin_log_msg("amdgpu_plugin: large bar read possible\n"); + + addr = mmap(NULL, bo_buckets[i].size, PROT_READ, MAP_SHARED, thread_data->drm_fd, + bo_buckets[i].offset); + if (addr == MAP_FAILED) { + pr_perror("amdgpu_plugin: mmap failed"); + ret = -errno; + goto exit; + } + + /* direct memcpy is possible on large bars */ + memcpy(bo_info[i]->rawdata.data, addr, bo_buckets[i].size); + munmap(addr, bo_buckets[i].size); + } else { + size_t bo_size; + plugin_log_msg("Reading BO contents with /proc/pid/mem\n"); + if (lseek(mem_fd, (off_t)bo_buckets[i].addr, SEEK_SET) == -1) { + pr_perror("Can't lseek for BO offset for pid = %d", thread_data->pid); + ret = -errno; + goto exit; + } + + bo_size = read(mem_fd, bo_info[i]->rawdata.data, bo_info[i]->size); + if (bo_size != bo_info[i]->size) { + pr_perror("Can't read buffer"); + ret = -errno; + goto exit; + } + } /* PROCPIDMEM read done */ + } + +exit: + pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret); + + if (mem_fd >= 0) + close(mem_fd); + + thread_data->ret = ret; + return NULL; +}; + +void *restore_bo_contents(void *_thread_data) +{ + int i, ret = 0; + int num_bos = 0; + struct thread_data *thread_data = (struct thread_data *)_thread_data; + struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets; + BoEntry **bo_info = thread_data->bo_entries; + char *fname; + int mem_fd = -1; + + pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id); + + if (asprintf(&fname, PROCPIDMEM, thread_data->pid) < 0) { + pr_perror("failed in asprintf, %s", fname); + ret = -1; + goto exit; + } + + mem_fd = open(fname, O_RDWR); + if (mem_fd < 0) { + pr_perror("Can't open %s for pid %d", fname, thread_data->pid); + free(fname); + ret = -errno; + goto exit; + } + plugin_log_msg("Opened %s file for pid = %d\n", fname, thread_data->pid); + free(fname); + + for (i = 0; i < thread_data->num_of_bos; i++) { + void *addr; + + if (bo_buckets[i].gpu_id != thread_data->gpu_id) + continue; + + num_bos++; + + if (!(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + !(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT)) + continue; + + if (bo_info[i]->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) { + plugin_log_msg("amdgpu_plugin: large bar write possible\n"); + + addr = mmap(NULL, bo_buckets[i].size, PROT_WRITE, MAP_SHARED, thread_data->drm_fd, + bo_buckets[i].restored_offset); + if (addr == MAP_FAILED) { + pr_perror("amdgpu_plugin: mmap failed"); + ret = -errno; + goto exit; + } + + /* direct memcpy is possible on large bars */ + memcpy(addr, (void *)bo_info[i]->rawdata.data, bo_info[i]->size); + munmap(addr, bo_info[i]->size); + } else { + size_t bo_size; + /* Use indirect host data path via /proc/pid/mem on small pci bar GPUs or + * for Buffer Objects that don't have HostAccess permissions. + */ + plugin_log_msg("amdgpu_plugin: using PROCPIDMEM to restore BO contents\n"); + addr = mmap(NULL, bo_info[i]->size, PROT_NONE, MAP_SHARED, thread_data->drm_fd, + bo_buckets[i].restored_offset); + + if (addr == MAP_FAILED) { + pr_perror("amdgpu_plugin: mmap failed"); + ret = -errno; + goto exit; + } + + if (lseek(mem_fd, (off_t)addr, SEEK_SET) == -1) { + pr_perror("Can't lseek for BO offset for pid = %d", thread_data->pid); + ret = -errno; + goto exit; + } + + plugin_log_msg("Attempt writing now\n"); + bo_size = write(mem_fd, bo_info[i]->rawdata.data, bo_info[i]->size); + if (bo_size != bo_info[i]->size) { + pr_perror("Can't write buffer"); + ret = -errno; + goto exit; + } + munmap(addr, bo_info[i]->size); + } + } + +exit: + pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret); + + if (mem_fd >= 0) + close(mem_fd); + thread_data->ret = ret; + return NULL; +}; + static int unpause_process(int fd) { int ret = 0; @@ -539,11 +726,17 @@ exit: static int save_bos(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) { + struct thread_data *thread_datas; int ret = 0, i; - char *fname; pr_debug("Dumping %d BOs\n", args->num_bos); + thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); + if (!thread_datas) { + ret = -ENOMEM; + goto exit; + } + e->num_of_bos = args->num_bos; ret = allocate_bo_entries(e, e->num_of_bos, bo_buckets); if (ret) @@ -558,66 +751,48 @@ static int save_bos(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_bo boinfo->size = bo_bucket->size; boinfo->offset = bo_bucket->offset; boinfo->alloc_flags = bo_bucket->alloc_flags; + } - if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM || - bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) { - void *addr; - - pr_info("amdgpu_plugin: large bar read possible\n"); - - addr = mmap(NULL, boinfo->size, PROT_READ, MAP_SHARED, fd, boinfo->offset); - if (addr == MAP_FAILED) { - pr_perror("amdgpu_plugin: mmap failed\n"); - ret = -errno; - goto exit; - } - - /* direct memcpy is possible on large bars */ - memcpy(boinfo->rawdata.data, addr, boinfo->size); - munmap(addr, boinfo->size); - } else { - size_t bo_size; - int mem_fd; - - pr_info("Now try reading BO contents with /proc/pid/mem\n"); - if (asprintf(&fname, PROCPIDMEM, args->pid) < 0) { - pr_perror("failed in asprintf, %s", fname); - ret = -1; - goto exit; - } - - mem_fd = open(fname, O_RDONLY); - if (mem_fd < 0) { - pr_perror("Can't open %s for pid %d", fname, args->pid); - free(fname); - close(mem_fd); - ret = -1; - goto exit; - } - - pr_info("Opened %s file for pid = %d\n", fname, args->pid); - free(fname); - - if (lseek(mem_fd, (off_t)bo_bucket->addr, SEEK_SET) == -1) { - pr_perror("Can't lseek for bo_offset for pid = %d", args->pid); - close(mem_fd); - ret = -1; - goto exit; - } - - bo_size = read(mem_fd, boinfo->rawdata.data, boinfo->size); - if (bo_size != boinfo->size) { - close(mem_fd); - pr_perror("Can't read buffer"); - ret = -1; - goto exit; - } - close(mem_fd); - } + for (int i = 0; i < e->num_of_gpus; i++) { + struct tp_node *dev; + int ret_thread = 0; + + dev = sys_get_node_by_index(&src_topology, i); + if (!dev) { + ret = -ENODEV; + goto exit; + } + + thread_datas[i].gpu_id = dev->gpu_id; + thread_datas[i].bo_buckets = bo_buckets; + thread_datas[i].bo_entries = e->bo_entries; + thread_datas[i].pid = e->pid; + thread_datas[i].num_of_bos = args->num_bos; + thread_datas[i].drm_fd = node_get_drm_render_device(dev); + if (thread_datas[i].drm_fd < 0) { + ret = thread_datas[i].drm_fd; + goto exit; + } + + ret_thread = pthread_create(&thread_datas[i].thread, NULL, dump_bo_contents, (void *)&thread_datas[i]); + if (ret_thread) { + pr_err("Failed to create thread[%i]\n", i); + ret = -ret_thread; + goto exit; + } + } + + for (int i = 0; i < e->num_of_gpus; i++) { + pthread_join(thread_datas[i].thread, NULL); + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + + if (thread_datas[i].ret) { + ret = thread_datas[i].ret; + goto exit; } } exit: + xfree(thread_datas); pr_info("Dumped bos %s (ret:%d)\n", ret ? "failed" : "ok", ret); return ret; } @@ -922,15 +1097,20 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) return 0; } -static int restore_bo_data(int fd, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) +static int restore_bo_data(struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) { - int mem_fd = -1, ret = 0; + struct thread_data *thread_datas; + int thread_i, ret = 0; + + thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); + if (!thread_datas) { + ret = -ENOMEM; + goto exit; + } for (int i = 0; i < e->num_of_bos; i++) { - void *addr; struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; struct tp_node *tp_node; - BoEntry *bo_entry = e->bo_entries[i]; if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP | KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)) { @@ -967,86 +1147,61 @@ static int restore_bo_data(int fd, struct kfd_criu_bo_bucket *bo_buckets, CriuKf list_add_tail(&vma_md->list, &update_vma_info_list); } + } - if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - pr_info("amdgpu_plugin: Trying mmap in stage 2\n"); - if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC || - bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - plugin_log_msg("amdgpu_plugin: large bar write possible\n"); - addr = mmap(NULL, bo_bucket->size, PROT_WRITE, MAP_SHARED, fd, - bo_bucket->restored_offset); - if (addr == MAP_FAILED) { - pr_perror("amdgpu_plugin: mmap failed"); - fd = -EBADFD; - goto exit; - } - - /* direct memcpy is possible on large bars */ - memcpy(addr, (void *)bo_entry->rawdata.data, bo_entry->size); - munmap(addr, bo_entry->size); - } else { - size_t bo_size; - char *fname; - /* Use indirect host data path via /proc/pid/mem - * on small pci bar GPUs or for Buffer Objects - * that don't have HostAccess permissions. - */ - plugin_log_msg("amdgpu_plugin: using PROCPIDMEM to restore BO contents\n"); - addr = mmap(NULL, bo_bucket->size, PROT_NONE, MAP_SHARED, fd, - bo_bucket->restored_offset); - if (addr == MAP_FAILED) { - pr_perror("amdgpu_plugin: mmap failed"); - fd = -EBADFD; - goto exit; - } - - if (asprintf(&fname, PROCPIDMEM, e->pid) < 0) { - pr_perror("failed in asprintf, %s", fname); - munmap(addr, bo_bucket->size); - fd = -EBADFD; - goto exit; - } - - mem_fd = open(fname, O_RDWR); - if (mem_fd < 0) { - pr_perror("Can't open %s for pid %d", fname, e->pid); - free(fname); - munmap(addr, bo_bucket->size); - fd = -EBADFD; - goto exit; - } - - plugin_log_msg("Opened %s file for pid = %d", fname, e->pid); - free(fname); - - if (lseek(mem_fd, (off_t)addr, SEEK_SET) == -1) { - pr_perror("Can't lseek for bo_offset for pid = %d", e->pid); - munmap(addr, bo_entry->size); - fd = -EBADFD; - goto exit; - } - - plugin_log_msg("Attempt writing now"); - bo_size = write(mem_fd, bo_entry->rawdata.data, bo_entry->size); - if (bo_size != bo_entry->size) { - pr_perror("Can't write buffer"); - munmap(addr, bo_entry->size); - fd = -EBADFD; - goto exit; - } - munmap(addr, bo_entry->size); - close(mem_fd); - } - } else { - plugin_log_msg("Not a VRAM BO\n"); + thread_i = 0; + for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { + struct tp_node *dev; + int ret_thread = 0; + uint32_t target_gpu_id; + + if (!e->device_entries[i]->gpu_id) continue; + + /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ + target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); + + /* We need the fd for actual_gpu_id */ + dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); + if (!dev) { + pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); + ret = -ENODEV; + goto exit; + } + + thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; + thread_datas[thread_i].bo_buckets = bo_buckets; + thread_datas[thread_i].bo_entries = e->bo_entries; + thread_datas[thread_i].pid = e->pid; + thread_datas[thread_i].num_of_bos = e->num_of_bos; + + thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); + if (thread_datas[thread_i].drm_fd < 0) { + ret = -thread_datas[thread_i].drm_fd; + goto exit; } + + ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, + (void *)&thread_datas[thread_i]); + if (ret_thread) { + pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); + ret = -ret_thread; + goto exit; + } + thread_i++; } -exit: - if (mem_fd > 0) - close(mem_fd); + for (int i = 0; i < e->num_of_gpus; i++) { + pthread_join(thread_datas[i].thread, NULL); + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + if (thread_datas[i].ret) { + ret = thread_datas[i].ret; + goto exit; + } + } +exit: + xfree(thread_datas); return ret; } @@ -1197,7 +1352,7 @@ int amdgpu_plugin_restore_file(int id) goto exit; } - ret = restore_bo_data(fd, (struct kfd_criu_bo_bucket *)args.bos, e); + ret = restore_bo_data((struct kfd_criu_bo_bucket *)args.bos, e); if (ret) goto exit; diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c index 0bd620c62..04b495bd8 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.c +++ b/plugins/amdgpu/amdgpu_plugin_topology.c @@ -151,6 +151,17 @@ struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const return NULL; } +struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index) +{ + struct tp_node *node; + + list_for_each_entry(node, &sys->nodes, listm_system) { + if (NODE_IS_GPU(node) && index-- == 0) + return node; + } + return NULL; +} + struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32_t gpu_id) { struct tp_node *node; diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h index c032af696..9d99cda1c 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.h +++ b/plugins/amdgpu/amdgpu_plugin_topology.h @@ -114,6 +114,7 @@ struct tp_iolink *node_add_iolink(struct tp_node *node, uint32_t type, uint32_t struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32_t gpu_id); struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor); +struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index); int node_get_drm_render_device(struct tp_node *node); void sys_close_drm_render_devices(struct tp_system *sys); |