criu/plugin: Implement sDMA based buffer access

AMD Radeon GPUs have special sDMA (system dma engines) IPs that can be used to speed up the read write operations from the VRAM and GTT memory. Depends on: * The kernel mode driver (kfd) creating the dmabuf objects for the kfd BOs in both checkpoint and restore operation. * libdrm and libdrm_amdgpu libraries Suggested-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: David Yat Sin <david.yatsin@amd.com>
author: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> 2021-06-25 18:29:17 +0300
committer: Andrei Vagin <avagin@gmail.com> 2022-04-29 03:53:52 +0300
commit: bd83330095600adf5ea92c610e699c20a9693218 (patch)
tree: 42e6213373d94a049ed3d6d99e8bca120572af4e /plugins
parent: 6d79266229ec4938be6ecce976bcee278a7c6952 (diff)
2 files changed, 411 insertions, 117 deletions
diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile
index 8f192a3a0..288f2a450 100644
--- a/plugins/amdgpu/Makefile
+++ b/plugins/amdgpu/Makefile
@@ -12,7 +12,7 @@ include $(__nmk_dir)msg.mk
 
 CC      		:= gcc
 PLUGIN_CFLAGS  		:= -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC
-PLUGIN_LDFLAGS		:= -lpthread -lrt
+PLUGIN_LDFLAGS		:= -lpthread -lrt -ldrm -ldrm_amdgpu
 
 ifeq ($(CONFIG_AMDGPU),y)
         all: $(DEPS_OK)
@@ -24,7 +24,7 @@ criu-amdgpu.pb-c.c: criu-amdgpu.proto
 		protoc-c --proto_path=. --c_out=. criu-amdgpu.proto
 
 amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c
-	$(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS)
+	$(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
 
 amdgpu_plugin_clean:
 	$(call msg-clean, $@)
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 67caac2fc..7aa5450c3 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -16,6 +16,10 @@
 #include <pthread.h>
 #include <semaphore.h>
 
+#include <xf86drm.h>
+#include <libdrm/amdgpu.h>
+#include <libdrm/amdgpu_drm.h>
+
 #include "criu-plugin.h"
 #include "plugin.h"
 #include "criu-amdgpu.pb-c.h"
@@ -35,7 +39,7 @@
 #define HSAKMT_SEM	  "hsakmt_semaphore"
 
 #define KFD_IOCTL_MAJOR_VERSION	    1
-#define MIN_KFD_IOCTL_MINOR_VERSION 7
+#define MIN_KFD_IOCTL_MINOR_VERSION 8
 
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
@@ -54,6 +58,18 @@
 	}
 #endif
 
+#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0))
+
+#define SDMA_OPCODE_COPY	    1
+#define SDMA_COPY_SUB_OPCODE_LINEAR 0
+#define SDMA_NOP		    0
+#define SDMA_LINEAR_COPY_MAX_SIZE   (1ULL << 21)
+
+enum sdma_op_type {
+	SDMA_OP_VRAM_READ,
+	SDMA_OP_VRAM_WRITE,
+};
+
 struct vma_metadata {
 	struct list_head list;
 	uint64_t old_pgoff;
@@ -497,81 +513,389 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf)
 }
 CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma)
 
+int alloc_and_map(amdgpu_device_handle h_dev, uint64_t size, uint32_t domain, amdgpu_bo_handle *ph_bo,
+		  amdgpu_va_handle *ph_va, uint64_t *p_gpu_addr, void **p_cpu_addr)
+{
+	struct amdgpu_bo_alloc_request alloc_req;
+	amdgpu_bo_handle h_bo;
+	amdgpu_va_handle h_va;
+	uint64_t gpu_addr;
+	void *cpu_addr;
+	int err;
+
+	memset(&alloc_req, 0, sizeof(alloc_req));
+	alloc_req.alloc_size = size;
+	alloc_req.phys_alignment = 0x1000;
+	alloc_req.preferred_heap = domain;
+	alloc_req.flags = 0;
+	err = amdgpu_bo_alloc(h_dev, &alloc_req, &h_bo);
+	if (err) {
+		pr_perror("failed to alloc BO");
+		return err;
+	}
+	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, size, 0x1000, 0, &gpu_addr, &h_va, 0);
+	if (err) {
+		pr_perror("failed to alloc VA");
+		goto err_va;
+	}
+	err = amdgpu_bo_va_op(h_bo, 0, size, gpu_addr, 0, AMDGPU_VA_OP_MAP);
+	if (err) {
+		pr_perror("failed to GPU map BO");
+		goto err_gpu_map;
+	}
+	if (p_cpu_addr) {
+		err = amdgpu_bo_cpu_map(h_bo, &cpu_addr);
+		if (err) {
+			pr_perror("failed to CPU map BO");
+			goto err_cpu_map;
+		}
+		*p_cpu_addr = cpu_addr;
+	}
+
+	*ph_bo = h_bo;
+	*ph_va = h_va;
+	*p_gpu_addr = gpu_addr;
+
+	return 0;
+
+err_cpu_map:
+	amdgpu_bo_va_op(h_bo, 0, size, gpu_addr, 0, AMDGPU_VA_OP_UNMAP);
+err_gpu_map:
+	amdgpu_va_range_free(h_va);
+err_va:
+	amdgpu_bo_free(h_bo);
+	return err;
+}
+
+void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va, uint64_t gpu_addr, void *cpu_addr)
+{
+	if (cpu_addr)
+		amdgpu_bo_cpu_unmap(h_bo);
+	amdgpu_bo_va_op(h_bo, 0, size, gpu_addr, 0, AMDGPU_VA_OP_UNMAP);
+	amdgpu_va_range_free(h_va);
+	amdgpu_bo_free(h_bo);
+}
+
+int sdma_copy_bo(struct kfd_criu_bo_bucket *bo_buckets, BoEntry **bo_info_test, int i, amdgpu_device_handle h_dev,
+		 uint64_t max_copy_size, enum sdma_op_type type)
+{
+	uint64_t size, gpu_addr_src, gpu_addr_dest, gpu_addr_ib;
+	uint64_t gpu_addr_src_orig, gpu_addr_dest_orig;
+	amdgpu_va_handle h_va_src, h_va_dest, h_va_ib;
+	amdgpu_bo_handle h_bo_src, h_bo_dest, h_bo_ib;
+	struct amdgpu_bo_import_result res = { 0 };
+	uint64_t copy_size, bytes_remain, j = 0;
+	uint64_t n_packets;
+	struct amdgpu_cs_ib_info ib_info;
+	amdgpu_bo_list_handle h_bo_list;
+	struct amdgpu_cs_request cs_req;
+	amdgpu_bo_handle resources[3];
+	struct amdgpu_cs_fence fence;
+	uint32_t expired;
+	amdgpu_context_handle h_ctx;
+	void *userptr = NULL;
+	uint32_t *ib = NULL;
+	int err, shared_fd;
+
+	shared_fd = bo_buckets[i].dmabuf_fd;
+	size = bo_buckets[i].size;
+
+	plugin_log_msg("Enter %s\n", __func__);
+
+	/* prepare src buffer */
+	switch (type) {
+	case SDMA_OP_VRAM_WRITE:
+		/* create the userptr BO and prepare the src buffer */
+		posix_memalign(&userptr, sysconf(_SC_PAGE_SIZE), size);
+		if (!userptr) {
+			pr_perror("failed to alloc memory for userptr");
+			return -ENOMEM;
+		}
+
+		memcpy(userptr, bo_info_test[i]->rawdata.data, size);
+		plugin_log_msg("data copied to userptr from protobuf buffer\n");
+
+		err = amdgpu_create_bo_from_user_mem(h_dev, userptr, size, &h_bo_src);
+		if (err) {
+			pr_perror("failed to create userptr for sdma");
+			free(userptr);
+			return -EFAULT;
+		}
+
+		break;
+
+	case SDMA_OP_VRAM_READ:
+		err = amdgpu_bo_import(h_dev, amdgpu_bo_handle_type_dma_buf_fd, shared_fd, &res);
+		if (err) {
+			pr_perror("failed to import dmabuf handle from libdrm");
+			return -EFAULT;
+		}
+
+		h_bo_src = res.buf_handle;
+		break;
+
+	default:
+		pr_perror("Invalid sdma operation");
+		return -EINVAL;
+	}
+
+	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, size, 0x1000, 0, &gpu_addr_src, &h_va_src, 0);
+	if (err) {
+		pr_perror("failed to alloc VA for src bo");
+		goto err_src_va;
+	}
+	err = amdgpu_bo_va_op(h_bo_src, 0, size, gpu_addr_src, 0, AMDGPU_VA_OP_MAP);
+	if (err) {
+		pr_perror("failed to GPU map the src BO");
+		goto err_src_bo_map;
+	}
+	plugin_log_msg("Source BO: GPU VA: %lx, size: %lx\n", gpu_addr_src, size);
+	/* prepare dest buffer */
+	switch (type) {
+	case SDMA_OP_VRAM_WRITE:
+		err = amdgpu_bo_import(h_dev, amdgpu_bo_handle_type_dma_buf_fd, shared_fd, &res);
+		if (err) {
+			pr_perror("failed to import dmabuf handle from libdrm");
+			goto err_dest_bo_prep;
+		}
+
+		h_bo_dest = res.buf_handle;
+		break;
+
+	case SDMA_OP_VRAM_READ:
+		posix_memalign(&userptr, sysconf(_SC_PAGE_SIZE), size);
+		if (!userptr) {
+			pr_perror("failed to alloc memory for userptr");
+			goto err_dest_bo_prep;
+		}
+		memset(userptr, 0, size);
+		err = amdgpu_create_bo_from_user_mem(h_dev, userptr, size, &h_bo_dest);
+		if (err) {
+			pr_perror("failed to create userptr for sdma");
+			free(userptr);
+			goto err_dest_bo_prep;
+		}
+		break;
+
+	default:
+		pr_perror("Invalid sdma operation");
+		goto err_dest_bo_prep;
+	}
+
+	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, size, 0x1000, 0, &gpu_addr_dest, &h_va_dest, 0);
+	if (err) {
+		pr_perror("failed to alloc VA for dest bo");
+		goto err_dest_va;
+	}
+	err = amdgpu_bo_va_op(h_bo_dest, 0, size, gpu_addr_dest, 0, AMDGPU_VA_OP_MAP);
+	if (err) {
+		pr_perror("failed to GPU map the dest BO");
+		goto err_dest_bo_map;
+	}
+	plugin_log_msg("Dest BO: GPU VA: %lx, size: %lx\n", gpu_addr_dest, size);
+
+	n_packets = (size + max_copy_size) / max_copy_size;
+	/* prepare ring buffer/indirect buffer for command submission
+	 * each copy packet is 7 dwords so we need to alloc 28x size for ib
+	 */
+	err = alloc_and_map(h_dev, n_packets * 28, AMDGPU_GEM_DOMAIN_GTT, &h_bo_ib, &h_va_ib, &gpu_addr_ib,
+			    (void **)&ib);
+	if (err) {
+		pr_perror("failed to allocate and map ib/rb");
+		goto err_ib_gpu_alloc;
+	}
+
+	plugin_log_msg("Indirect BO: GPU VA: %lx, size: %lx\n", gpu_addr_ib, n_packets * 28);
+
+	resources[0] = h_bo_src;
+	resources[1] = h_bo_dest;
+	resources[2] = h_bo_ib;
+	err = amdgpu_bo_list_create(h_dev, 3, resources, NULL, &h_bo_list);
+	if (err) {
+		pr_perror("failed to create BO resources list");
+		goto err_bo_list;
+	}
+
+	memset(&cs_req, 0, sizeof(cs_req));
+	memset(&fence, 0, sizeof(fence));
+	memset(ib, 0, n_packets * 28);
+
+	plugin_log_msg("setting up sdma packets for command submission\n");
+	bytes_remain = size;
+	gpu_addr_src_orig = gpu_addr_src;
+	gpu_addr_dest_orig = gpu_addr_dest;
+	while (bytes_remain > 0) {
+		copy_size = min(bytes_remain, max_copy_size);
+
+		ib[j++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+		ib[j++] = copy_size;
+		ib[j++] = 0;
+		ib[j++] = 0xffffffff & gpu_addr_src;
+		ib[j++] = (0xffffffff00000000 & gpu_addr_src) >> 32;
+		ib[j++] = 0xffffffff & gpu_addr_dest;
+		ib[j++] = (0xffffffff00000000 & gpu_addr_dest) >> 32;
+
+		gpu_addr_src += copy_size;
+		gpu_addr_dest += copy_size;
+		bytes_remain -= copy_size;
+	}
+
+	gpu_addr_src = gpu_addr_src_orig;
+	gpu_addr_dest = gpu_addr_dest_orig;
+	plugin_log_msg("pad the IB to align on 8 dw boundary\n");
+	/* pad the IB to the required number of dw with SDMA_NOP */
+	while (j & 7)
+		ib[j++] = SDMA_NOP;
+
+	ib_info.ib_mc_address = gpu_addr_ib;
+	ib_info.size = j;
+
+	cs_req.ip_type = AMDGPU_HW_IP_DMA;
+	/* possible future optimization: may use other rings, info available in
+	 * amdgpu_query_hw_ip_info()
+	 */
+	cs_req.ring = 0;
+	cs_req.number_of_ibs = 1;
+	cs_req.ibs = &ib_info;
+	cs_req.resources = h_bo_list;
+	cs_req.fence_info.handle = NULL;
+
+	plugin_log_msg("create the context\n");
+	err = amdgpu_cs_ctx_create(h_dev, &h_ctx);
+	if (err) {
+		pr_perror("failed to create context for SDMA command submission");
+		goto err_ctx;
+	}
+
+	plugin_log_msg("initiate sdma command submission\n");
+	err = amdgpu_cs_submit(h_ctx, 0, &cs_req, 1);
+	if (err) {
+		pr_perror("failed to submit command for SDMA IB");
+		goto err_cs_submit_ib;
+	}
+
+	fence.context = h_ctx;
+	fence.ip_type = AMDGPU_HW_IP_DMA;
+	fence.ip_instance = 0;
+	fence.ring = 0;
+	fence.fence = cs_req.seq_no;
+	err = amdgpu_cs_query_fence_status(&fence, AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+	if (err) {
+		pr_perror("failed to query fence status");
+		goto err_cs_submit_ib;
+	}
+
+	if (!expired) {
+		pr_err("IB execution did not complete\n");
+		err = -EBUSY;
+		goto err_cs_submit_ib;
+	}
+
+	plugin_log_msg("done querying fence status\n");
+
+	if (type == SDMA_OP_VRAM_READ) {
+		memcpy(bo_info_test[i]->rawdata.data, userptr, size);
+		plugin_log_msg("data copied to protobuf buffer\n");
+	}
+
+err_cs_submit_ib:
+	amdgpu_cs_ctx_free(h_ctx);
+err_ctx:
+	amdgpu_bo_list_destroy(h_bo_list);
+err_bo_list:
+	free_and_unmap(n_packets * 28, h_bo_ib, h_va_ib, gpu_addr_ib, ib);
+err_ib_gpu_alloc:
+	err = amdgpu_bo_va_op(h_bo_dest, 0, size, gpu_addr_dest, 0, AMDGPU_VA_OP_UNMAP);
+	if (err)
+		pr_perror("failed to GPU unmap the dest BO %lx, size = %lx", gpu_addr_dest, size);
+err_dest_bo_map:
+	err = amdgpu_va_range_free(h_va_dest);
+	if (err)
+		pr_perror("dest range free failed");
+err_dest_va:
+	err = amdgpu_bo_free(h_bo_dest);
+	if (err)
+		pr_perror("dest bo free failed");
+
+	if (userptr && (type == SDMA_OP_VRAM_READ)) {
+		free(userptr);
+		userptr = NULL;
+	}
+
+err_dest_bo_prep:
+	err = amdgpu_bo_va_op(h_bo_src, 0, size, gpu_addr_src, 0, AMDGPU_VA_OP_UNMAP);
+	if (err)
+		pr_perror("failed to GPU unmap the src BO %lx, size = %lx", gpu_addr_src, size);
+err_src_bo_map:
+	err = amdgpu_va_range_free(h_va_src);
+	if (err)
+		pr_perror("src range free failed");
+err_src_va:
+	err = amdgpu_bo_free(h_bo_src);
+	if (err)
+		pr_perror("src bo free failed");
+
+	if (userptr && (type == SDMA_OP_VRAM_WRITE)) {
+		free(userptr);
+		userptr = NULL;
+	}
+
+	plugin_log_msg("Leaving sdma_copy_bo, err = %d\n", err);
+	return err;
+}
+
 void *dump_bo_contents(void *_thread_data)
 {
-	int i, ret = 0;
-	int num_bos = 0;
 	struct thread_data *thread_data = (struct thread_data *)_thread_data;
 	struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets;
 	BoEntry **bo_info = thread_data->bo_entries;
-	char *fname;
-	int mem_fd = -1;
+	struct amdgpu_gpu_info gpu_info = { 0 };
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	uint32_t major, minor;
+	int num_bos = 0;
+	int i, ret = 0;
 
 	pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id);
 
-	if (asprintf(&fname, PROCPIDMEM, thread_data->pid) < 0) {
-		pr_perror("failed in asprintf, %s", fname);
-		ret = -1;
+	ret = amdgpu_device_initialize(thread_data->drm_fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_perror("failed to initialize device");
 		goto exit;
 	}
-	mem_fd = open(fname, O_RDONLY);
-	if (mem_fd < 0) {
-		pr_perror("Can't open %s for pid %d", fname, thread_data->pid);
-		free(fname);
-		ret = -errno;
+	plugin_log_msg("libdrm initialized successfully\n");
+
+	ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
 		goto exit;
 	}
-	plugin_log_msg("Opened %s file for pid = %d\n", fname, thread_data->pid);
-	free(fname);
+
+	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+									 SDMA_LINEAR_COPY_MAX_SIZE - 1;
 
 	for (i = 0; i < thread_data->num_of_bos; i++) {
 		if (bo_buckets[i].gpu_id != thread_data->gpu_id)
 			continue;
 
-		num_bos++;
-		if (!(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
-		    !(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+		if (!(bo_buckets[i].alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)))
 			continue;
 
-		if (bo_info[i]->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) {
-			void *addr;
-
-			plugin_log_msg("amdgpu_plugin: large bar read possible\n");
-
-			addr = mmap(NULL, bo_buckets[i].size, PROT_READ, MAP_SHARED, thread_data->drm_fd,
-				    bo_buckets[i].offset);
-			if (addr == MAP_FAILED) {
-				pr_perror("amdgpu_plugin: mmap failed");
-				ret = -errno;
-				goto exit;
-			}
-
-			/* direct memcpy is possible on large bars */
-			memcpy(bo_info[i]->rawdata.data, addr, bo_buckets[i].size);
-			munmap(addr, bo_buckets[i].size);
-		} else {
-			size_t bo_size;
-			plugin_log_msg("Reading BO contents with /proc/pid/mem\n");
-			if (lseek(mem_fd, (off_t)bo_buckets[i].addr, SEEK_SET) == -1) {
-				pr_perror("Can't lseek for BO offset for pid = %d", thread_data->pid);
-				ret = -errno;
-				goto exit;
-			}
+		num_bos++;
 
-			bo_size = read(mem_fd, bo_info[i]->rawdata.data, bo_info[i]->size);
-			if (bo_size != bo_info[i]->size) {
-				pr_perror("Can't read buffer");
-				ret = -errno;
-				goto exit;
-			}
-		} /* PROCPIDMEM read done */
+		/* perform sDMA based vram copy */
+		ret = sdma_copy_bo(bo_buckets, bo_info, i, h_dev, max_copy_size, SDMA_OP_VRAM_READ);
+		if (ret) {
+			pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i);
+			break;
+		}
+		plugin_log_msg("** Successfully drained the BO using sDMA: bo_buckets[%d] **\n", i);
 	}
 
 exit:
 	pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
 
-	if (mem_fd >= 0)
-		close(mem_fd);
+	amdgpu_device_deinitialize(h_dev);
 
 	thread_data->ret = ret;
 	return NULL;
@@ -579,95 +903,55 @@ exit:
 
 void *restore_bo_contents(void *_thread_data)
 {
-	int i, ret = 0;
-	int num_bos = 0;
 	struct thread_data *thread_data = (struct thread_data *)_thread_data;
 	struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets;
 	BoEntry **bo_info = thread_data->bo_entries;
-	char *fname;
-	int mem_fd = -1;
+	struct amdgpu_gpu_info gpu_info = { 0 };
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	uint32_t major, minor;
+	int num_bos = 0;
+	int i, ret = 0;
 
 	pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id);
 
-	if (asprintf(&fname, PROCPIDMEM, thread_data->pid) < 0) {
-		pr_perror("failed in asprintf, %s", fname);
-		ret = -1;
+	ret = amdgpu_device_initialize(thread_data->drm_fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_perror("failed to initialize device");
 		goto exit;
 	}
+	plugin_log_msg("libdrm initialized successfully\n");
 
-	mem_fd = open(fname, O_RDWR);
-	if (mem_fd < 0) {
-		pr_perror("Can't open %s for pid %d", fname, thread_data->pid);
-		free(fname);
-		ret = -errno;
+	ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
 		goto exit;
 	}
-	plugin_log_msg("Opened %s file for pid = %d\n", fname, thread_data->pid);
-	free(fname);
 
-	for (i = 0; i < thread_data->num_of_bos; i++) {
-		void *addr;
+	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+									 SDMA_LINEAR_COPY_MAX_SIZE - 1;
 
+	for (i = 0; i < thread_data->num_of_bos; i++) {
 		if (bo_buckets[i].gpu_id != thread_data->gpu_id)
 			continue;
 
-		num_bos++;
-
-		if (!(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
-		    !(bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+		if (!(bo_buckets[i].alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)))
 			continue;
 
-		if (bo_info[i]->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) {
-			plugin_log_msg("amdgpu_plugin: large bar write possible\n");
-
-			addr = mmap(NULL, bo_buckets[i].size, PROT_WRITE, MAP_SHARED, thread_data->drm_fd,
-				    bo_buckets[i].restored_offset);
-			if (addr == MAP_FAILED) {
-				pr_perror("amdgpu_plugin: mmap failed");
-				ret = -errno;
-				goto exit;
-			}
-
-			/* direct memcpy is possible on large bars */
-			memcpy(addr, (void *)bo_info[i]->rawdata.data, bo_info[i]->size);
-			munmap(addr, bo_info[i]->size);
-		} else {
-			size_t bo_size;
-			/* Use indirect host data path via /proc/pid/mem on small pci bar GPUs or
-			 * for Buffer Objects that don't have HostAccess permissions.
-			 */
-			plugin_log_msg("amdgpu_plugin: using PROCPIDMEM to restore BO contents\n");
-			addr = mmap(NULL, bo_info[i]->size, PROT_NONE, MAP_SHARED, thread_data->drm_fd,
-				    bo_buckets[i].restored_offset);
-
-			if (addr == MAP_FAILED) {
-				pr_perror("amdgpu_plugin: mmap failed");
-				ret = -errno;
-				goto exit;
-			}
-
-			if (lseek(mem_fd, (off_t)addr, SEEK_SET) == -1) {
-				pr_perror("Can't lseek for BO offset for pid = %d", thread_data->pid);
-				ret = -errno;
-				goto exit;
-			}
+		num_bos++;
 
-			plugin_log_msg("Attempt writing now\n");
-			bo_size = write(mem_fd, bo_info[i]->rawdata.data, bo_info[i]->size);
-			if (bo_size != bo_info[i]->size) {
-				pr_perror("Can't write buffer");
-				ret = -errno;
-				goto exit;
-			}
-			munmap(addr, bo_info[i]->size);
+		ret = sdma_copy_bo(bo_buckets, bo_info, i, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE);
+		if (ret) {
+			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
+			break;
 		}
+		plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i);
 	}
 
 exit:
 	pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
 
-	if (mem_fd >= 0)
-		close(mem_fd);
+	amdgpu_device_deinitialize(h_dev);
 	thread_data->ret = ret;
 	return NULL;
 };
@@ -862,6 +1146,11 @@ static int save_bos(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_bo
 		}
 	}
 exit:
+	for (int i = 0; i < e->num_of_bos; i++) {
+		if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
+			close(bo_buckets[i].dmabuf_fd);
+	}
+
 	xfree(thread_datas);
 	pr_info("Dumped bos %s (ret:%d)\n", ret ? "failed" : "ok", ret);
 	return ret;
@@ -1275,6 +1564,11 @@ static int restore_bo_data(struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e)
 		}
 	}
 exit:
+	for (int i = 0; i < e->num_of_bos; i++) {
+		if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
+			close(bo_buckets[i].dmabuf_fd);
+	}
+
 	xfree(thread_datas);
 	return ret;
 }
author	Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>	2021-06-25 18:29:17 +0300
committer	Andrei Vagin <avagin@gmail.com>	2022-04-29 03:53:52 +0300
commit	bd83330095600adf5ea92c610e699c20a9693218 (patch)
tree	42e6213373d94a049ed3d6d99e8bca120572af4e /plugins
parent	6d79266229ec4938be6ecce976bcee278a7c6952 (diff)