diff options
author | Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> | 2022-04-08 23:04:08 +0300 |
---|---|---|
committer | Andrei Vagin <avagin@gmail.com> | 2022-04-29 03:53:52 +0300 |
commit | c5162cef529807f6fa57f23130c72f28aa838aa7 (patch) | |
tree | 72ebdbfa23208e55d47e7f07f730284fff3fb18a | |
parent | f81e3062ca6a4a0376b63b31749773f8f0c09949 (diff) |
rseq: fail dump if rseq is used but host doesn't support get_rseq_conf feature
A lot of kernel versions lacks support for ptrace(PTRACE_GET_RSEQ_CONFIGURATION).
But the userspace may be fresh (for instance containers with fresh Fedora runs
on CentOS 7 host). Consider two scenarious:
- kernel has no ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support
1. there is a process which use rseq => fail dump
2. there is no process which use rseq => we can dump without any problems
But how to determine if process use rseq or not without get_rseq_conf feature?
Let's just try to do rseq registration from the parasite. If rseq is already
registered then we'll got EBUSY error. If not we'll success in registration.
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
-rw-r--r-- | criu/cr-dump.c | 30 | ||||
-rw-r--r-- | criu/include/parasite.h | 7 | ||||
-rw-r--r-- | criu/parasite-syscall.c | 11 | ||||
-rw-r--r-- | criu/pie/parasite.c | 98 |
4 files changed, 146 insertions, 0 deletions
diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 02a9ea4bb..c1df3c901 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -188,6 +188,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) return 0; } +static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq) +{ + if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf) + return 0; + + pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited); + + /* + * We have no kdat.has_ptrace_get_rseq_conf and user + * process has rseq() used, let's fail dump. + */ + if (ti_rseq->rseq_inited) { + pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid); + return -1; + } + + return 0; +} + struct cr_imgset *glob_imgset; static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds) @@ -718,6 +737,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread if (!ret) ret = seccomp_dump_thread(pid, tc); + /* + * We are dumping rseq() in the dump_thread_rseq() function, + * *before* processes gets infected (because of ptrace requests + * API restriction). At this point, if the kernel lacks + * kdat.has_ptrace_get_rseq_conf support we have to ensure + * that dumpable processes haven't initialized rseq() or + * fail dump if rseq() was used. + */ + if (!ret) + ret = check_thread_rseq(pid, &ti->rseq); + return ret; } diff --git a/criu/include/parasite.h b/criu/include/parasite.h index 8107aa49d..5fde80996 100644 --- a/criu/include/parasite.h +++ b/criu/include/parasite.h @@ -164,10 +164,17 @@ struct parasite_dump_creds { unsigned int groups[0]; }; +struct parasite_check_rseq { + bool has_rseq; + bool has_ptrace_get_rseq_conf; /* no need to check if supported */ + bool rseq_inited; +}; + struct parasite_dump_thread { unsigned int *tid_addr; pid_t tid; tls_t tls; + struct parasite_check_rseq rseq; stack_t sas; int pdeath_sig; char comm[TASK_COMM_LEN]; diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index 7175adee1..ee4fa86f4 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c return ce->groups ? 0 : -ENOMEM; } +static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq) +{ + rseq->has_rseq = kdat.has_rseq; + rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf; + rseq->rseq_inited = false; +} + int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core) { ThreadCoreEntry *tc = core->thread_core; @@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn pc = args->creds; pc->cap_last_cap = kdat.last_cap; + init_parasite_rseq_arg(&args->rseq); + ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl); if (ret < 0) return ret; @@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit compel_arch_get_tls_thread(tctl, &args->tls); + init_parasite_rseq_arg(&args->rseq); + ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD); if (ret) { pr_err("Can't init thread in parasite %d\n", pid); diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index e17321894..e7eb1fcb6 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -169,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args) } static int dump_creds(struct parasite_dump_creds *args); +static int check_rseq(struct parasite_check_rseq *rseq); static int dump_thread_common(struct parasite_dump_thread *ti) { @@ -199,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti) goto out; } + ret = check_rseq(&ti->rseq); + if (ret) { + pr_err("Unable to check if rseq() is initialized: %d\n", ret); + goto out; + } + ret = dump_creds(ti->creds); out: return ret; @@ -315,6 +322,97 @@ grps_err: return -1; } +static int check_rseq(struct parasite_check_rseq *rseq) +{ + int ret; + unsigned long rseq_abi_pointer; + unsigned long rseq_abi_size; + uint32_t rseq_signature; + void *addr; + + /* no need to do hacky check if we can get all info from ptrace() */ + if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf) + return 0; + + /* + * We need to determine if victim process has rseq() + * initialized, but we have no *any* proper kernel interface + * supported at this point. + * Our plan: + * 1. We know that if we call rseq() syscall and process already + * has current->rseq filled, then we get: + * -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq), + * -EPERM if current->rseq_sig != sig), + * -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) && + * current->rseq_sig != sig + * if current->rseq == NULL (rseq() wasn't used) then we go to: + * IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it + * will be hard to distinguish case when rseq() was initialized or not. + * Let's construct arguments payload + * with: + * 1. correct rseq_abi_size + * 2. aligned and correct rseq_abi_pointer + * And see what rseq() return to us. + * If ret value is: + * 0: it means that rseq *wasn't* used and we successfully registered it, + * -EINVAL or : it means that rseq is already initialized, + * so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false, + * we should just fail dump as it's unsafe to skip rseq() dump for processes + * with rseq() initialized. + * -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq + */ + addr = (void *)sys_mmap(NULL, sizeof(struct criu_rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (addr == MAP_FAILED) { + pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr); + return -1; + } + + memset(addr, 0, sizeof(struct criu_rseq)); + + /* sys_mmap returns page aligned addresses */ + rseq_abi_pointer = (unsigned long)addr; + rseq_abi_size = (unsigned long)sizeof(struct criu_rseq); + /* it's not so important to have unique signature for us, + * because rseq_abi_pointer is guaranteed to be unique + */ + rseq_signature = 0x12345612; + + pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); + ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); + if (ret) { + if (ret == -EINVAL) { + pr_info("\trseq is initialized in the victim\n"); + rseq->rseq_inited = true; + + ret = 0; + } else { + pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, + rseq_abi_size, 0, rseq_signature, ret); + + ret = -1; + } + } else { + ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct criu_rseq), RSEQ_FLAG_UNREGISTER, + rseq_signature); + if (ret) { + pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, + rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret); + + ret = -1; + /* we can't do munmap() because rseq is registered and we failed to unregister it */ + goto out_nounmap; + } + + rseq->rseq_inited = false; + ret = 0; + } + + sys_munmap(addr, sizeof(struct criu_rseq)); +out_nounmap: + return ret; +} + static int fill_fds_fown(int fd, struct fd_opts *p) { int flags, ret; |