Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBui Quang Minh <minhquangbui99@gmail.com>2022-10-23 10:16:21 +0300
committerAndrei Vagin <avagin@gmail.com>2022-11-02 07:31:58 +0300
commit5bcde6f5a48b45014a92f7b695e860b7005a585c (patch)
tree75180ce3a60e6d33e56ca68614111f41a7c8a57e
parent18c6426eaeebc5fe7d0f9ca0acb592a3ec828b0c (diff)
ipc_sysctl: Prioritize restoring IPC variables using non usernsd approach
Since commit https://github.com/torvalds/linux/commit/5563cabdde, user with enough capability can open IPC sysctl files and write to them. Therefore, we don't need to use usernsd process in the outside user namespace to help with that anymore. Furthermore, some later commits: https://github.com/torvalds/linux/commit/1f5c135ee5, https://github.com/torvalds/linux/commit/0889f44e28 bind the IPC namespace to the opened file descriptor of IPC sysctl at the open() time, the changed value does not depend on the IPC namespace of write() time anymore. This breaks the current usernsd approach. So, we prioritize opening/writing IPC sysctl files in the context of restored process directly without usernsd help. This approach succeeds in the newer kernel since the restored process has enough capabilities at this restore stage. With older kernel, the open() fails and we fallback to the usernsd approach. Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
-rw-r--r--criu/include/sysctl.h7
-rw-r--r--criu/ipc_ns.c11
-rw-r--r--criu/sysctl.c35
3 files changed, 45 insertions, 8 deletions
diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
index ac7924dcd..cb3eba817 100644
--- a/criu/include/sysctl.h
+++ b/criu/include/sysctl.h
@@ -34,8 +34,9 @@ enum {
/*
* Some entries might be missing mark them as optional.
*/
-#define CTL_FLAGS_OPTIONAL 1
-#define CTL_FLAGS_HAS 2
-#define CTL_FLAGS_READ_EIO_SKIP 4
+#define CTL_FLAGS_OPTIONAL 1
+#define CTL_FLAGS_HAS 2
+#define CTL_FLAGS_READ_EIO_SKIP 4
+#define CTL_FLAGS_IPC_EACCES_SKIP 5
#endif /* __CR_SYSCTL_H__ */
diff --git a/criu/ipc_ns.c b/criu/ipc_ns.c
index 4fe082fbb..7e95be8c5 100644
--- a/criu/ipc_ns.c
+++ b/criu/ipc_ns.c
@@ -292,6 +292,8 @@ static void pr_info_ipc_shm(const IpcShmEntry *shm)
static int ipc_sysctl_req(IpcVarEntry *e, int op)
{
+ int i;
+
struct sysctl_req req[] = {
{ "kernel/sem", e->sem_ctls, CTL_U32A(e->n_sem_ctls) },
{ "kernel/msgmax", &e->msg_ctlmax, CTL_U32 },
@@ -332,6 +334,9 @@ static int ipc_sysctl_req(IpcVarEntry *e, int op)
if (e->has_shm_next_id)
req[nr++] = req[16];
+ for (i = 0; i < nr; i++)
+ req[i].flags = CTL_FLAGS_IPC_EACCES_SKIP;
+
return sysctl_op(req, nr, op, CLONE_NEWIPC);
}
@@ -570,7 +575,7 @@ static int prepare_ipc_sem_desc(struct cr_img *img, const IpcSemEntry *sem)
{
int ret, id;
struct sysctl_req req[] = {
- { "kernel/sem_next_id", &sem->desc->id, CTL_U32 },
+ { "kernel/sem_next_id", &sem->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
};
struct semid_ds semid;
@@ -703,7 +708,7 @@ static int prepare_ipc_msg_queue(struct cr_img *img, const IpcMsgEntry *msq)
{
int ret, id;
struct sysctl_req req[] = {
- { "kernel/msg_next_id", &msq->desc->id, CTL_U32 },
+ { "kernel/msg_next_id", &msq->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
};
struct msqid_ds msqid;
@@ -841,7 +846,7 @@ static int prepare_ipc_shm_seg(struct cr_img *img, const IpcShmEntry *shm)
{
int ret, id, hugetlb_flag = 0;
struct sysctl_req req[] = {
- { "kernel/shm_next_id", &shm->desc->id, CTL_U32 },
+ { "kernel/shm_next_id", &shm->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
};
struct shmid_ds shmid;
diff --git a/criu/sysctl.c b/criu/sysctl.c
index b06688712..99026acf4 100644
--- a/criu/sysctl.c
+++ b/criu/sysctl.c
@@ -203,6 +203,17 @@ static int __userns_sysctl_op(void *arg, int proc_fd, pid_t pid)
* 2. forks a task
* 3. setns()es to the UTS/IPC namespace of the caller
* 4. write()s to the files and exits
+ *
+ * For the IPC namespace, since
+ * https://github.com/torvalds/linux/commit/5563cabdde, user with
+ * enough capability can open IPC sysctl files and write to it. Later
+ * commit https://github.com/torvalds/linux/commit/1f5c135ee5 and
+ * https://github.com/torvalds/linux/commit/0889f44e28 bind the IPC
+ * namespace at the open() time so the changed value does not depend
+ * on the IPC namespace at the write() time. Also, the permission check
+ * changes a little bit which makes the above approach unusable but we
+ * can simply use nonuserns version for restoring as IPC sysctl as the
+ * restored process currently has enough capability.
*/
dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);
if (dir < 0) {
@@ -335,9 +346,12 @@ out:
return ret;
}
-static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
+/* exit_code = 1 in case nonuserns failed but we want to fallback to userns approach */
+static int __nonuserns_sysctl_op(struct sysctl_req **orig_req, size_t *orig_nr_req, int op)
{
int ret, exit_code = -1;
+ struct sysctl_req *req = *orig_req;
+ size_t nr_req = *orig_nr_req;
while (nr_req--) {
int fd;
@@ -351,6 +365,14 @@ static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
req++;
continue;
}
+ if (errno == EACCES && (req->flags & CTL_FLAGS_IPC_EACCES_SKIP)) {
+ /* The remaining requests are restored using userns approach */
+ *orig_req = req;
+ *orig_nr_req = nr_req + 1;
+ exit_code = 1;
+ goto out;
+ }
+
pr_perror("Can't open sysctl %s", req->name);
goto out;
}
@@ -404,7 +426,16 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
* so we can do those in process as well.
*/
if (!ns || ns & CLONE_NEWNET || op == CTL_READ)
- return __nonuserns_sysctl_op(req, nr_req, op);
+ return __nonuserns_sysctl_op(&req, &nr_req, op);
+
+ /* Try to use nonuserns for restoring IPC sysctl and fallback to
+ * userns approach when the returned code is 1.
+ */
+ if (ns & CLONE_NEWIPC && op == CTL_WRITE) {
+ ret = __nonuserns_sysctl_op(&req, &nr_req, op);
+ if (ret <= 0)
+ return ret;
+ }
/*
* In order to avoid lots of opening of /proc/sys for each struct sysctl_req,