Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Yat Sin <david.yatsin@amd.com>2021-09-29 15:26:04 +0300
committerAndrei Vagin <avagin@gmail.com>2022-04-29 03:53:52 +0300
commit2095de9f0369a547a478751506efbc719bb23e80 (patch)
treeddef5313b20e840241d8cad4091c5ed4ff948dc9 /plugins
parentbd83330095600adf5ea92c610e699c20a9693218 (diff)
criu/plugin: Fix for FDs not allowed to mmap
On newer kernel's (> 5.13), KFD & DRM drivers will only allow the /dev/renderD* file descriptors that were used during the CRIU_RESTORE ioctl when calling mmap for the vma's. During restore, after opening /dev/renderD*, amdgpu_plugin keeps the FDs opened and instead returns a copy of the FDs to CRIU. The same FDs are then returned during the UPDATE_VMAMAP hooks so that they can be used by CRIU to call mmap. Duplicated FDs created using dup are references to the same struct file inside the kernel so they are also allowed to mmap. To prevent the opened FDs inside amdgpu_plugin from conflicting with FDs used by the target restore application, we make sure that the lowest-numbered FD that amdgpu_plugin will use is greater than the highest-numbered FD that is used by the target application. Signed-off-by: David Yat Sin <david.yatsin@amd.com>
Diffstat (limited to 'plugins')
-rw-r--r--plugins/amdgpu/Makefile14
-rw-r--r--plugins/amdgpu/amdgpu_plugin.c56
-rw-r--r--plugins/amdgpu/amdgpu_plugin_topology.c20
3 files changed, 55 insertions, 35 deletions
diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile
index 288f2a450..84b9f8714 100644
--- a/plugins/amdgpu/Makefile
+++ b/plugins/amdgpu/Makefile
@@ -1,10 +1,14 @@
PLUGIN_NAME := amdgpu_plugin
PLUGIN_SOBJ := amdgpu_plugin.so
-PLUGIN_INC := ../../../criu/include
-PLUGIN_INC_EXTRA := ../../criu/include
-PLUGIN_INCLUDE := -iquote$(PLUGIN_INC) -iquote$(PLUGIN_INC_EXTRA)
-LIBDRM_INC := -I/usr/include/libdrm
+
+PLUGIN_INCLUDE := -iquote../../../criu/include
+PLUGIN_INCLUDE += -iquote../../criu/include
+PLUGIN_INCLUDE += -iquote../../criu/arch/$(ARCH)/include/
+PLUGIN_INCLUDE += -iquote../../
+
+COMPEL := ../../compel/compel-host
+LIBDRM_INC := -I/usr/include/libdrm
DEPS_OK := amdgpu_plugin.so amdgpu_plugin_test
DEPS_NOK := ;
@@ -24,7 +28,7 @@ criu-amdgpu.pb-c.c: criu-amdgpu.proto
protoc-c --proto_path=. --c_out=. criu-amdgpu.proto
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c
- $(CC) $(PLUGIN_CFLAGS) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
+ $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
amdgpu_plugin_clean:
$(call msg-clean, $@)
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 7aa5450c3..13fb80aa2 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -27,6 +27,7 @@
#include "kfd_ioctl.h"
#include "xmalloc.h"
#include "criu-log.h"
+#include "files.h"
#include "common/list.h"
#include "amdgpu_plugin_topology.h"
@@ -86,6 +87,8 @@ struct tp_system dest_topology;
struct device_maps checkpoint_maps;
struct device_maps restore_maps;
+extern int fd_next;
+
static LIST_HEAD(update_vma_info_list);
extern bool kfd_fw_version_check;
@@ -98,31 +101,6 @@ extern bool kfd_capability_check;
/**************************************************************************************************/
-int open_drm_render_device(int minor)
-{
- char path[128];
- int fd;
-
- if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
- pr_perror("DRM render minor %d out of range [%d, %d]", minor, DRM_FIRST_RENDER_NODE,
- DRM_LAST_RENDER_NODE);
- return -EINVAL;
- }
-
- snprintf(path, sizeof(path), "/dev/dri/renderD%d", minor);
- fd = open(path, O_RDWR | O_CLOEXEC);
- if (fd < 0) {
- if (errno != ENOENT && errno != EPERM) {
- pr_err("Failed to open %s: %s\n", path, strerror(errno));
- if (errno == EACCES)
- pr_err("Check user is in \"video\" group\n");
- }
- return -EBADFD;
- }
-
- return fd;
-}
-
int write_file(const char *file_path, const void *buf, const size_t buf_len)
{
int fd;
@@ -456,6 +434,9 @@ void amdgpu_plugin_fini(int stage, int ret)
{
pr_info("amdgpu_plugin: finished %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
+ if (stage == CR_PLUGIN_STAGE__RESTORE)
+ sys_close_drm_render_devices(&dest_topology);
+
maps_free(&checkpoint_maps);
maps_free(&restore_maps);
@@ -1362,6 +1343,7 @@ exit:
/* Restore all queues */
unpause_process(fd);
+ sys_close_drm_render_devices(&src_topology);
xfree((void *)args.devices);
xfree((void *)args.bos);
xfree((void *)args.priv_data);
@@ -1646,7 +1628,15 @@ int amdgpu_plugin_restore_file(int id)
fail:
criu_render_node__free_unpacked(rd, NULL);
xfree(buf);
- return fd;
+ /*
+ * We need to use the file descriptor used to create the BOs for mmap later, otherwise the kernel DRM
+ * drivers will not allow the mmap. Therefore, we keep a copy of the file descriptor (stored in tp_node)
+ * so that we can return it in amdgpu_plugin_update_vmamap later. Also, CRIU core will dup and close the
+ * returned fd after this function returns, and this will make our fd invalid. So we return a dup'ed
+ * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
+ * tp_node.
+ */
+ return dup(fd);
}
fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@@ -1682,6 +1672,18 @@ int amdgpu_plugin_restore_file(int id)
plugin_log_msg("amdgpu_plugin: read image file data\n");
+ /*
+ * Initialize fd_next to be 1 greater than the biggest file descriptor in use by the target restore process.
+ * This way, we know that the file descriptors we store will not conflict with file descriptors inside core
+ * CRIU.
+ */
+ fd_next = find_unused_fd_pid(e->pid);
+ if (fd_next <= 0) {
+ pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
+ ret = -EINVAL;
+ goto exit;
+ }
+
ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology);
if (ret) {
pr_err("Failed to convert stored device information to topology\n");
@@ -1727,8 +1729,6 @@ int amdgpu_plugin_restore_file(int id)
ret = restore_hsakmt_shared_mem(e->shared_mem_size, e->shared_mem_magic);
exit:
- sys_close_drm_render_devices(&dest_topology);
-
if (e)
criu_kfd__free_unpacked(e, NULL);
diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c
index 04b495bd8..1a6ed059f 100644
--- a/plugins/amdgpu/amdgpu_plugin_topology.c
+++ b/plugins/amdgpu/amdgpu_plugin_topology.c
@@ -61,10 +61,17 @@ bool kfd_numa_check = true;
/* Skip capability check */
bool kfd_capability_check = true;
+/*
+ * During dump, we can use any fd value so fd_next is always -1.
+ * During restore, we have to use a fd value that does not conflict with fd values in use by the target restore process.
+ * fd_next is initialized as 1 greather than the highest-numbered file descriptor used by the target restore process.
+ */
+int fd_next = -1;
+
static int open_drm_render_device(int minor)
{
char path[128];
- int fd;
+ int fd, ret_fd;
if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
pr_perror("DRM render minor %d out of range [%d, %d]", minor, DRM_FIRST_RENDER_NODE,
@@ -83,7 +90,16 @@ static int open_drm_render_device(int minor)
return -EBADFD;
}
- return fd;
+ if (fd_next < 0)
+ return fd;
+
+ ret_fd = fcntl(fd, F_DUPFD, fd_next++);
+ close(fd);
+
+ if (ret_fd < 0)
+ pr_perror("Failed to duplicate fd for minor:%d (fd_next:%d)", minor, fd_next);
+
+ return ret_fd;
}
static const char *link_type(uint32_t type)