diff options
-rw-r--r-- | criu/include/sysctl.h | 7 | ||||
-rw-r--r-- | criu/ipc_ns.c | 11 | ||||
-rw-r--r-- | criu/sysctl.c | 35 |
3 files changed, 45 insertions, 8 deletions
diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h index ac7924dcd..cb3eba817 100644 --- a/criu/include/sysctl.h +++ b/criu/include/sysctl.h @@ -34,8 +34,9 @@ enum { /* * Some entries might be missing mark them as optional. */ -#define CTL_FLAGS_OPTIONAL 1 -#define CTL_FLAGS_HAS 2 -#define CTL_FLAGS_READ_EIO_SKIP 4 +#define CTL_FLAGS_OPTIONAL 1 +#define CTL_FLAGS_HAS 2 +#define CTL_FLAGS_READ_EIO_SKIP 4 +#define CTL_FLAGS_IPC_EACCES_SKIP 5 #endif /* __CR_SYSCTL_H__ */ diff --git a/criu/ipc_ns.c b/criu/ipc_ns.c index 4fe082fbb..7e95be8c5 100644 --- a/criu/ipc_ns.c +++ b/criu/ipc_ns.c @@ -292,6 +292,8 @@ static void pr_info_ipc_shm(const IpcShmEntry *shm) static int ipc_sysctl_req(IpcVarEntry *e, int op) { + int i; + struct sysctl_req req[] = { { "kernel/sem", e->sem_ctls, CTL_U32A(e->n_sem_ctls) }, { "kernel/msgmax", &e->msg_ctlmax, CTL_U32 }, @@ -332,6 +334,9 @@ static int ipc_sysctl_req(IpcVarEntry *e, int op) if (e->has_shm_next_id) req[nr++] = req[16]; + for (i = 0; i < nr; i++) + req[i].flags = CTL_FLAGS_IPC_EACCES_SKIP; + return sysctl_op(req, nr, op, CLONE_NEWIPC); } @@ -570,7 +575,7 @@ static int prepare_ipc_sem_desc(struct cr_img *img, const IpcSemEntry *sem) { int ret, id; struct sysctl_req req[] = { - { "kernel/sem_next_id", &sem->desc->id, CTL_U32 }, + { "kernel/sem_next_id", &sem->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP }, }; struct semid_ds semid; @@ -703,7 +708,7 @@ static int prepare_ipc_msg_queue(struct cr_img *img, const IpcMsgEntry *msq) { int ret, id; struct sysctl_req req[] = { - { "kernel/msg_next_id", &msq->desc->id, CTL_U32 }, + { "kernel/msg_next_id", &msq->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP }, }; struct msqid_ds msqid; @@ -841,7 +846,7 @@ static int prepare_ipc_shm_seg(struct cr_img *img, const IpcShmEntry *shm) { int ret, id, hugetlb_flag = 0; struct sysctl_req req[] = { - { "kernel/shm_next_id", &shm->desc->id, CTL_U32 }, + { "kernel/shm_next_id", &shm->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP }, }; struct shmid_ds shmid; diff --git a/criu/sysctl.c b/criu/sysctl.c index b06688712..99026acf4 100644 --- a/criu/sysctl.c +++ b/criu/sysctl.c @@ -203,6 +203,17 @@ static int __userns_sysctl_op(void *arg, int proc_fd, pid_t pid) * 2. forks a task * 3. setns()es to the UTS/IPC namespace of the caller * 4. write()s to the files and exits + * + * For the IPC namespace, since + * https://github.com/torvalds/linux/commit/5563cabdde, user with + * enough capability can open IPC sysctl files and write to it. Later + * commit https://github.com/torvalds/linux/commit/1f5c135ee5 and + * https://github.com/torvalds/linux/commit/0889f44e28 bind the IPC + * namespace at the open() time so the changed value does not depend + * on the IPC namespace at the write() time. Also, the permission check + * changes a little bit which makes the above approach unusable but we + * can simply use nonuserns version for restoring as IPC sysctl as the + * restored process currently has enough capability. */ dir = open("/proc/sys", O_RDONLY, O_DIRECTORY); if (dir < 0) { @@ -335,9 +346,12 @@ out: return ret; } -static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op) +/* exit_code = 1 in case nonuserns failed but we want to fallback to userns approach */ +static int __nonuserns_sysctl_op(struct sysctl_req **orig_req, size_t *orig_nr_req, int op) { int ret, exit_code = -1; + struct sysctl_req *req = *orig_req; + size_t nr_req = *orig_nr_req; while (nr_req--) { int fd; @@ -351,6 +365,14 @@ static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op) req++; continue; } + if (errno == EACCES && (req->flags & CTL_FLAGS_IPC_EACCES_SKIP)) { + /* The remaining requests are restored using userns approach */ + *orig_req = req; + *orig_nr_req = nr_req + 1; + exit_code = 1; + goto out; + } + pr_perror("Can't open sysctl %s", req->name); goto out; } @@ -404,7 +426,16 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns) * so we can do those in process as well. */ if (!ns || ns & CLONE_NEWNET || op == CTL_READ) - return __nonuserns_sysctl_op(req, nr_req, op); + return __nonuserns_sysctl_op(&req, &nr_req, op); + + /* Try to use nonuserns for restoring IPC sysctl and fallback to + * userns approach when the returned code is 1. + */ + if (ns & CLONE_NEWIPC && op == CTL_WRITE) { + ret = __nonuserns_sysctl_op(&req, &nr_req, op); + if (ret <= 0) + return ret; + } /* * In order to avoid lots of opening of /proc/sys for each struct sysctl_req, |