Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@gmail.com>2018-06-09 16:06:54 +0300
committerAndrei Vagin <avagin@virtuozzo.com>2018-06-19 22:51:01 +0300
commit671336f34405d4c960cf4660b6ad121189175e0b (patch)
tree98a0377eeacd6dc1c30b17934a565249d692c801
parent6ed3df46d67fd9c29ec7e9f1d08ccf5236f59994 (diff)
unix: Add support of ghost sockets
Unix sockets may be connected via deleted socket name, moreover the name may be reused (ie same sun_addr but different inodes). To be able to handle them we do a few tricks: - when collecting sockets we figure out if "deleted" mark is present on the socket and if such we order this sockets creation and deletion with mutex, together with adding missing directories, and save this descriptors in fdstore if there are peers connected to - on restore we connect via procfs/fd/X as suggested by Andrew Vagin Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
-rw-r--r--criu/cr-restore.c4
-rw-r--r--criu/include/sockets.h1
-rw-r--r--criu/sk-unix.c415
3 files changed, 348 insertions, 72 deletions
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 3564a9b60..c2354c3d1 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -384,6 +384,10 @@ static int root_prepare_shared(void)
if (ret)
goto err;
+ ret = unix_prepare_root_shared();
+ if (ret)
+ goto err;
+
ret = add_fake_unix_queuers();
if (ret)
goto err;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 1d0e1f293..f2085ace7 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -60,6 +60,7 @@ extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
extern int unix_sk_id_add(unsigned int ino);
extern int unix_sk_ids_parse(char *optarg);
+extern int unix_prepare_root_shared(void);
extern int do_dump_opt(int sk, int level, int name, void *val, int len);
#define dump_opt(s, l, n, f) do_dump_opt(s, l, n, f, sizeof(*f))
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 88859da02..c14c34589 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -9,6 +9,7 @@
#include <sys/un.h>
#include <stdlib.h>
#include <dlfcn.h>
+#include <libgen.h>
#include "libnetlink.h"
#include "cr_options.h"
@@ -31,6 +32,7 @@
#include "fdstore.h"
#include "fdinfo.h"
#include "kerndat.h"
+#include "rst-malloc.h"
#include "protobuf.h"
#include "images/sk-unix.pb-c.h"
@@ -89,11 +91,21 @@ struct unix_sk_desc {
UnixSkEntry *ue;
};
+/*
+ * The mutex_ghost is accessed from different tasks,
+ * so make sure it is in shared memory.
+ */
+static mutex_t *mutex_ghost;
+
static LIST_HEAD(unix_sockets);
+static LIST_HEAD(unix_ghost_addr);
static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
UnixSkEntry *ue, const struct fd_parms *p);
+struct unix_sk_info;
+static int unlink_sk(struct unix_sk_info *ui);
+
struct unix_sk_listen_icon {
unsigned int peer_ino;
struct unix_sk_desc *sk_desc;
@@ -886,12 +898,15 @@ struct unix_sk_info {
char *name;
char *name_dir;
unsigned flags;
+ int fdstore_id;
struct unix_sk_info *peer;
struct pprep_head peer_resolve; /* XXX : union with the above? */
struct file_desc d;
struct list_head connected; /* List of sockets, connected to me */
struct list_head node; /* To link in peer's connected list */
struct list_head scm_fles;
+ struct list_head ghost_node;
+ size_t ghost_dir_pos;
/*
* For DGRAM sockets with queues, we should only restore the queue
@@ -916,6 +931,7 @@ struct scm_fle {
#define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2
+#define USK_GHOST_FDSTORE 0x4 /* bound but removed address */
static struct unix_sk_info *find_unix_sk_by_ino(int ino)
{
@@ -1241,6 +1257,7 @@ err:
static int post_open_standalone(struct file_desc *d, int fd)
{
+ int fdstore_fd = -1, procfs_self_dir = -1, len;
struct unix_sk_info *ui;
struct unix_sk_info *peer;
struct sockaddr_un addr;
@@ -1269,22 +1286,49 @@ static int post_open_standalone(struct file_desc *d, int fd)
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
- memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
pr_info("\tConnect %d to %d\n", ui->ue->ino, peer->ue->ino);
- if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
+ if (prep_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd))
return -1;
- if (connect(fd, (struct sockaddr *)&addr,
- sizeof(addr.sun_family) +
- peer->ue->name.len) < 0) {
+ if (peer->flags & USK_GHOST_FDSTORE) {
+ procfs_self_dir = open_proc(getpid(), "fd");
+ fdstore_fd = fdstore_get(peer->fdstore_id);
+
+ if (fdstore_fd < 0 || procfs_self_dir < 0)
+ goto err_revert_and_exit;
+
+ /*
+ * WARNING: After this call we rely on revert_unix_sk_cwd
+ * to restore the former directories so that connect
+ * will operate inside proc/$pid/fd/X.
+ */
+ if (fchdir(procfs_self_dir)) {
+ pr_perror("Can't change to procfs");
+ goto err_revert_and_exit;
+ }
+ len = snprintf(addr.sun_path, UNIX_PATH_MAX, "%d", fdstore_fd);
+ } else {
+ memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
+ len = peer->ue->name.len;
+ }
+
+ /*
+ * Make sure the target is not being renamed at the moment
+ * while we're connecting in sake of ghost sockets.
+ */
+ mutex_lock(mutex_ghost);
+ if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
pr_perror("Can't connect %d socket", ui->ue->ino);
- revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
- return -1;
+ goto err_revert_and_exit;
}
+ mutex_unlock(mutex_ghost);
+
ui->is_connected = true;
+ close_safe(&procfs_self_dir);
+ close_safe(&fdstore_fd);
revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
restore_queue:
@@ -1296,48 +1340,217 @@ restore_sk_common:
if (ui->queuer && !ui->queuer->peer_queue_restored)
return 1;
return restore_sk_common(fd, ui);
+
+err_revert_and_exit:
+ close_safe(&procfs_self_dir);
+ close_safe(&fdstore_fd);
+ revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
+ return -1;
}
-static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
- struct sockaddr_un *addr)
+static int restore_file_perms(struct unix_sk_info *ui)
{
- char temp[PATH_MAX];
- int ret;
+ if (ui->ue->file_perms) {
+ FilePermsEntry *perms = ui->ue->file_perms;
+ char fname[PATH_MAX];
+
+ if (ui->ue->name.len >= sizeof(fname)) {
+ pr_err("The file name is too long\n");
+ return -E2BIG;
+ }
+
+ memcpy(fname, ui->name, ui->ue->name.len);
+ fname[ui->ue->name.len] = '\0';
+
+ if (fchownat(AT_FDCWD, fname, perms->uid, perms->gid, 0) < 0) {
+ int errno_cpy = errno;
+ pr_perror("Unable to change file owner and group");
+ return -errno_cpy;
+ }
+
+ if (fchmodat(AT_FDCWD, fname, perms->mode, 0) < 0) {
+ int errno_cpy = errno;
+ pr_perror("Unable to change file mode bits");
+ return -errno_cpy;
+ }
+ }
- pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
+ return 0;
+}
- ret = snprintf(temp, sizeof(temp),
- "%s-%s-%d", addr->sun_path, "criu-temp", getpid());
- /* this shouldn't happen, since sun_addr is only 108 chars long */
- if (ret < 0 || ret >= sizeof(temp)) {
- pr_err("snprintf of %s failed?\n", addr->sun_path);
+static int keep_deleted(struct unix_sk_info *ui)
+{
+ int fd = open(ui->name, O_PATH);
+ if (fd < 0) {
+ pr_perror("ghost: Can't open id %#x ino %d addr %s",
+ ui->ue->id, ui->ue->ino, ui->name);
return -1;
}
+ ui->fdstore_id = fdstore_add(fd);
+ pr_debug("ghost: id %#x %d fdstore_id %d %s\n",
+ ui->ue->id, ui->ue->ino, ui->fdstore_id, ui->name);
+ close(fd);
+ return ui->fdstore_id;
+}
+
+
+#define UNIX_GHOST_FMT "%s.criu-sk-ghost"
+
+/*
+ * When path where socket lives is deleted, we need to reconstruct
+ * it back up but allow caller to remove it after.
+ */
+static int bind_on_deleted(int sk, struct unix_sk_info *ui)
+{
+ char path[PATH_MAX], path_parked[PATH_MAX], *pos;
+ struct sockaddr_un addr;
+ bool renamed = false;
+ int ret;
+
+ if (ui->ue->name.len >= sizeof(path)) {
+ pr_err("ghost: Too long name for socket id %#x ino %d name %s\n",
+ ui->ue->id, ui->ue->ino, ui->name);
+ return -ENOSPC;
+ }
+
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ for (pos = strrchr(path, '/'); pos;
+ pos = strrchr(path, '/')) {
+ *pos = '\0';
+
+ ret = access(path, R_OK | W_OK | X_OK);
+ if (ret == 0) {
+ ui->ghost_dir_pos = pos - path;
+ pr_debug("ghost: socket id %#x ino %d name %s detected F_OK %s\n",
+ ui->ue->id, ui->ue->ino, ui->name, path);
+ break;
+ }
+
+ if (errno != ENOENT) {
+ ret = -errno;
+ pr_perror("ghost: Can't access %s for socket id %#x ino %d name %s",
+ path, ui->ue->id, ui->ue->ino, ui->name);
+ return ret;
+ }
+ }
- ret = rename(addr->sun_path, temp);
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ pos = dirname(path);
+ pr_debug("ghost: socket id %#x ino %d name %s creating %s\n",
+ ui->ue->id, ui->ue->ino, ui->name, pos);
+ ret = mkdirpat(AT_FDCWD, pos, 0755);
+ if (ret) {
+ errno = -ret;
+ pr_perror("ghost: Can't create %s", pos);
+ return ret;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
+
+ ret = bind(sk, (struct sockaddr *)&addr,
+ sizeof(addr.sun_family) + ui->ue->name.len);
if (ret < 0) {
- pr_perror("couldn't move socket for binding");
- return -1;
+ /*
+ * In case if there some real living socket
+ * with same name just move it aside for a
+ * while, we will move it back once ghost
+ * socket is processed.
+ */
+ if (errno == EADDRINUSE) {
+ snprintf(path_parked, sizeof(path_parked), UNIX_GHOST_FMT, ui->name);
+ /*
+ * Say previous restore get killed in a middle due to
+ * any reason, be ready the file might already exist,
+ * clean it up.
+ */
+ if (unlinkat(AT_FDCWD, path_parked, 0) == 0)
+ pr_debug("ghost: Unlinked stale socket id %#x ino %d name %s\n",
+ ui->ue->id, ui->ue->ino, path_parked);
+ if (rename(ui->name, path_parked)) {
+ ret = -errno;
+ pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s",
+ ui->ue->id, ui->ue->ino, ui->name, path_parked);
+ return ret;
+ }
+ pr_debug("ghost: id %#x ino %d renamed %s -> %s\n",
+ ui->ue->id, ui->ue->ino, ui->name, path_parked);
+ renamed = true;
+ ret = bind(sk, (struct sockaddr *)&addr,
+ sizeof(addr.sun_family) + ui->ue->name.len);
+ }
+ if (ret < 0) {
+ ret = -errno;
+ pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s",
+ ui->ue->id, ui->ue->ino, ui->name);
+ return ret;
+ }
}
- ret = bind(sk, (struct sockaddr *)addr,
- sizeof(addr->sun_family) + ui->ue->name.len);
+ ret = restore_file_perms(ui);
+ if (ret < 0)
+ return ret;
+
+ ret = keep_deleted(ui);
if (ret < 0) {
- pr_perror("Can't bind socket after move");
- return -1;
+ pr_err("ghost: Can't save socket %#x ino %d addr %s into fdstore\n",
+ ui->ue->id, ui->ue->ino, ui->name);
+ return -EIO;
}
- ret = rename(temp, addr->sun_path);
+ /*
+ * Once everything is ready, just remove the socket from the
+ * filesystem and rename back the original one if it were here.
+ */
+ ret = unlinkat(AT_FDCWD, ui->name, 0);
if (ret < 0) {
- pr_perror("couldn't move socket back");
- return -1;
+ ret = -errno;
+ pr_perror("ghost: Can't unlink socket %#x ino %d addr %s",
+ ui->ue->id, ui->ue->ino, ui->name);
+ return ret;
+ }
+
+ if (renamed) {
+ if (rename(path_parked, ui->name)) {
+ ret = -errno;
+ pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s",
+ ui->ue->id, ui->ue->ino, path_parked, ui->name);
+ return ret;
+ }
+
+ pr_debug("ghost: id %#x ino %d renamed %s -> %s\n",
+ ui->ue->id, ui->ue->ino, path_parked, ui->name);
}
- /* we've handled the deleted-ness of this
- * socket and we don't want to delete it later
- * since it's not /this/ socket.
+ /*
+ * Finally remove directories we've created.
*/
- ui->ue->deleted = false;
+ if (ui->ghost_dir_pos) {
+ char *pos;
+
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ for (pos = strrchr(path, '/');
+ pos && (pos - path) > ui->ghost_dir_pos;
+ pos = strrchr(path, '/')) {
+ *pos = '\0';
+ if (rmdir(path)) {
+ ret = - errno;
+ pr_perror("ghost: Can't remove directory %s on id %#x ino %d",
+ path, ui->ue->id, ui->ue->ino);
+ return -1;
+ }
+ pr_debug("ghost: Removed %s on id %#x ino %d\n",
+ path, ui->ue->id, ui->ue->ino);
+ }
+ }
+
return 0;
}
@@ -1365,46 +1578,38 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
addr.sun_family = AF_UNIX;
memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
- if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
+ if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd))
return -1;
- ret = bind(sk, (struct sockaddr *)&addr,
- sizeof(addr.sun_family) + ui->ue->name.len);
- if (ret < 0) {
- if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
- if (bind_deleted_unix_sk(sk, ui, &addr))
- goto done;
- } else {
- pr_perror("Can't bind socket");
- goto done;
- }
- }
-
- if (*ui->name && ui->ue->file_perms) {
- FilePermsEntry *perms = ui->ue->file_perms;
- char fname[PATH_MAX];
-
- if (ui->ue->name.len >= sizeof(fname)) {
- pr_err("The file name is too long\n");
- goto done;
- }
-
- memcpy(fname, ui->name, ui->ue->name.len);
- fname[ui->ue->name.len] = '\0';
-
- if (fchownat(AT_FDCWD, fname, perms->uid, perms->gid, 0) == -1) {
- pr_perror("Unable to change file owner and group");
- goto done;
- }
-
- if (fchmodat(AT_FDCWD, fname, perms->mode, 0) == -1) {
- pr_perror("Unable to change file mode bits");
+ /*
+ * Order binding for sake of ghost sockets. We might rename
+ * existing socket to some temp name, bind ghost, delete it,
+ * and finally move the former back, thus while we're doing
+ * this stuff we should not be interruped by connection
+ * from another sockets.
+ *
+ * FIXME: Probably wort make it per address rather for
+ * optimization sake.
+ */
+ mutex_lock(mutex_ghost);
+
+ if (ui->flags & USK_GHOST_FDSTORE) {
+ pr_debug("ghost: bind id %#x ino %d addr %s\n",
+ ui->ue->id, ui->ue->ino, ui->name);
+ ret = bind_on_deleted(sk, ui);
+ if (ret)
+ errno = -ret;
+ } else {
+ pr_debug("bind id %#x ino %d addr %s\n",
+ ui->ue->id, ui->ue->ino, ui->name);
+ ret = bind(sk, (struct sockaddr *)&addr,
+ sizeof(addr.sun_family) + ui->ue->name.len);
+ if (ret == 0 && restore_file_perms(ui))
goto done;
- }
}
-
- if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
- pr_perror("failed to unlink %s", ui->ue->name.data);
+ if (ret < 0) {
+ pr_perror("Can't bind id %#x ino %d addr %s",
+ ui->ue->id, ui->ue->ino, ui->name);
goto done;
}
@@ -1416,6 +1621,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
exit_code = 0;
done:
revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+ mutex_unlock(mutex_ghost);
return exit_code;
}
@@ -1551,11 +1757,27 @@ static int setup_second_end(int *sks, struct fdinfo_list_entry *second_end)
static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
{
struct unix_sk_info *queuer = ui->queuer;
- struct fdinfo_list_entry *fle;
+ struct unix_sk_info *peer = ui->peer;
+ struct fdinfo_list_entry *fle, *fle_peer;
int sk;
fle = file_master(&ui->d);
pr_info_opening("standalone", ui, fle);
+
+ /*
+ * If we're about to connect to the peer which
+ * has been bound to removed address we should
+ * wait until it is processed and put into fdstore
+ * engine, later we will use the engine to connect
+ * into it in a special way.
+ */
+ if (peer && (peer->flags & USK_GHOST_FDSTORE)) {
+ fle_peer = file_master(&peer->d);
+ if (fle_peer->stage < FLE_OPEN) {
+ return 1;
+ }
+ }
+
if (fle->stage == FLE_OPEN)
return post_open_standalone(&ui->d, fle->fe->fd);
@@ -1758,15 +1980,15 @@ static struct file_desc_ops unix_desc_ops = {
* Make FS clean from sockets we're about to
* restore. See for how we bind them for details
*/
-static void unlink_sk(struct unix_sk_info *ui)
+static int unlink_sk(struct unix_sk_info *ui)
{
- int ret, cwd_fd = -1, root_fd = -1, ns_fd = -1;
+ int ret = 0, cwd_fd = -1, root_fd = -1, ns_fd = -1;
if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN))
- return;
+ return 0;
if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL))
- return;
+ return -1;
ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0;
if (ret < 0 && errno != ENOENT) {
@@ -1774,13 +1996,17 @@ static void unlink_sk(struct unix_sk_info *ui)
ui->ue->ino, ui->ue->peer,
ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
ui->name_dir ? ui->name_dir : "-");
+ ret = -errno;
+ goto out;
} else if (ret == 0) {
pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n",
ui->ue->ino, ui->ue->peer,
ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
ui->name_dir ? ui->name_dir : "-");
}
+out:
revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+ return ret;
}
static void try_resolve_unix_peer(struct unix_sk_info *ui);
@@ -1812,6 +2038,8 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
ui->name_dir = (void *)ue->name_dir;
ui->flags = 0;
+ ui->fdstore_id = -1;
+ ui->ghost_dir_pos = 0;
ui->peer = NULL;
ui->queuer = NULL;
ui->bound = 0;
@@ -1826,6 +2054,40 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
INIT_LIST_HEAD(&ui->connected);
INIT_LIST_HEAD(&ui->node);
INIT_LIST_HEAD(&ui->scm_fles);
+ INIT_LIST_HEAD(&ui->ghost_node);
+
+ return 0;
+}
+
+int unix_prepare_root_shared(void)
+{
+ struct unix_sk_info *ui;
+
+ mutex_ghost = shmalloc(sizeof(*mutex_ghost));
+ if (!mutex_ghost) {
+ pr_err("ghost: Can't allocate mutex\n");
+ return -ENOMEM;
+ }
+ mutex_init(mutex_ghost);
+
+ pr_debug("ghost: Resolving addresses\n");
+
+ list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
+ pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n",
+ ui->ue->id, socket_type_name(ui->ue->type),
+ tcp_state_name(ui->ue->state),
+ ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
+ ui->name);
+
+ /*
+ * Drop any existing trash on the FS and mark the
+ * peer as a ghost one, so we will put it into
+ * fdstore to be able to connect into it even
+ * when the address is removed from the FS.
+ */
+ unlink_sk(ui);
+ ui->flags |= USK_GHOST_FDSTORE;
+ }
return 0;
}
@@ -1873,6 +2135,15 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
add_post_prepare_cb(&ui->peer_resolve);
}
+ if (ui->ue->deleted) {
+ if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
+ pr_err("No name present, ino %d\n", ui->ue->ino);
+ return -1;
+ }
+
+ list_add_tail(&ui->ghost_node, &unix_ghost_addr);
+ }
+
list_add_tail(&ui->list, &unix_sockets);
return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
}