Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>2022-02-21 15:25:21 +0300
committerAndrei Vagin <avagin@gmail.com>2022-04-29 03:53:52 +0300
commit2d3354e7b63ea6f9566f0ff3b17e7cb9f282be9e (patch)
treeaa2888080b258d0e66d301ddcd269e2c0e55f619
parent4c7ece0bb7e407116b8ae42fdfde94c50ed683d3 (diff)
cr-dump: fixup thread IP when inside rseq cs
If we caught the process when it's inside rseq critical section we have to handle it properly. From the kernel side of view, if the process is executing inside the rseq cs and gets a signal, rseq critical section execution will be interrupted and after signal handler execution, we will proceed to rseq cs abort handler instead of continuing normal rseq cs execution (if RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL isn't set). When CRIU seizes processes that's the same thing as getting signal from the rseq point of view. So we need to fixup instruction pointer to rseq cs abort handler address. Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
-rw-r--r--criu/cr-dump.c153
-rw-r--r--criu/include/parasite.h2
-rw-r--r--criu/include/pstree.h1
3 files changed, 153 insertions, 3 deletions
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index c1df3c901..9a7060756 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1034,11 +1034,59 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
return 0;
}
-static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
+{
+ int ret;
+ uint64_t addr;
+
+ /* rseq is not registered */
+ if (!rseq->rseq_abi_pointer)
+ return 0;
+
+ /*
+ * We need to cover the case when victim process was inside rseq critical section
+ * at the moment when CRIU comes and seized it. We need to determine the borders
+ * of rseq critical section at first. To achieve that we need to access thread
+ * memory and read pointer to struct rseq_cs.
+ *
+ * We have two ways to access thread memory: from the parasite and using ptrace().
+ * But it this case we can't use parasite, because if victim process returns to the
+ * execution, on the kernel side __rseq_handle_notify_resume hook will be called,
+ * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
+ * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
+ */
+ ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct criu_rseq, rseq_cs)),
+ sizeof(uint64_t));
+ if (ret) {
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
+ (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct criu_rseq, rseq_cs)),
+ (unsigned long)sizeof(uint64_t));
+ return -1;
+ }
+
+ /* (struct rseq)->rseq_cs is NULL */
+ if (!addr)
+ return 0;
+
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
+ if (ret) {
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
+ (unsigned long)rseq_cs, (unsigned long)addr, (unsigned long)sizeof(struct rseq_cs));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int dump_thread_rseq(struct pstree_item *item, int i)
{
struct __ptrace_rseq_configuration rseq;
RseqEntry *rseqe = NULL;
int ret;
+ CoreEntry *core = item->core[i];
+ RseqEntry **rseqep = &core->thread_core->rseq_entry;
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
+ pid_t tid = item->threads[i].real;
/*
* If we are here it means that rseq() syscall is supported,
@@ -1076,25 +1124,118 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
rseqe->rseq_abi_size = rseq.rseq_abi_size;
rseqe->signature = rseq.signature;
+ if (read_rseq_cs(tid, &rseq, rseq_cs))
+ goto err;
+
+ /* save rseq entry to the image */
*rseqep = rseqe;
return 0;
+
+err:
+ xfree(rseqe);
+ return -1;
}
static int dump_task_rseq(pid_t pid, struct pstree_item *item)
{
int i;
+ struct rseq_cs *thread_rseq_cs;
/* if rseq() syscall isn't supported then nothing to dump */
if (!kdat.has_rseq)
return 0;
+ thread_rseq_cs = xzalloc(sizeof(*thread_rseq_cs) * item->nr_threads);
+ if (!thread_rseq_cs)
+ return -1;
+
+ dmpi(item)->thread_rseq_cs = thread_rseq_cs;
+
for (i = 0; i < item->nr_threads; i++) {
- if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry))
- return -1;
+ if (dump_thread_rseq(item, i))
+ goto free_rseq;
}
return 0;
+
+free_rseq:
+ xfree(thread_rseq_cs);
+ dmpi(item)->thread_rseq_cs = NULL;
+ return -1;
+}
+
+static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr)
+{
+ return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset;
+}
+
+static int fixup_thread_rseq(struct pstree_item *item, int i)
+{
+ CoreEntry *core = item->core[i];
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
+ pid_t tid = item->threads[i].real;
+
+ /* (struct rseq)->rseq_cs is NULL */
+ if (!rseq_cs->start_ip)
+ return 0;
+
+ pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
+ tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
+ rseq_cs->version, (unsigned long)TI_IP(core));
+
+ if (rseq_cs->version != 0) {
+ pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version);
+ return -1;
+ }
+
+ if (task_in_rseq(rseq_cs, TI_IP(core))) {
+ struct pid *tid = &item->threads[i];
+
+ pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
+ tid->real);
+
+ /*
+ * We need to fixup task instruction pointer from
+ * the original one (which lays inside rseq critical section)
+ * to rseq abort handler address.
+ *
+ * It's worth to mention that we need to fixup IP in CoreEntry
+ * (used when full dump/restore is performed) and also in
+ * the parasite regs storage (used if --leave-running option is used,
+ * or if dump error occurred and process execution is resumed).
+ */
+ TI_IP(core) = rseq_cs->abort_ip;
+
+ if (item->pid->real == tid->real) {
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
+ } else {
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
+ }
+ }
+
+ return 0;
+}
+
+static int fixup_task_rseq(pid_t pid, struct pstree_item *item)
+{
+ int ret = 0;
+ int i;
+
+ if (!kdat.has_ptrace_get_rseq_conf)
+ return 0;
+
+ for (i = 0; i < item->nr_threads; i++) {
+ if (fixup_thread_rseq(item, i)) {
+ ret = -1;
+ goto exit;
+ }
+ }
+
+exit:
+ xfree(dmpi(item)->thread_rseq_cs);
+ dmpi(item)->thread_rseq_cs = NULL;
+ return ret;
}
static struct proc_pid_stat pps_buf;
@@ -1404,6 +1545,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
goto err;
}
+ ret = fixup_task_rseq(pid, item);
+ if (ret) {
+ pr_err("Fixup rseq for %d failed %d\n", pid, ret);
+ goto err;
+ }
+
if (fault_injected(FI_DUMP_EARLY)) {
pr_info("fault: CRIU sudden detach\n");
kill(getpid(), SIGKILL);
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
index 5fde80996..d2a06889f 100644
--- a/criu/include/parasite.h
+++ b/criu/include/parasite.h
@@ -10,6 +10,8 @@
#include <time.h>
#include <signal.h>
+#include "linux/rseq.h"
+
#include "image.h"
#include "util-pie.h"
#include "common/lock.h"
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
index c1c79867b..8ae750e1a 100644
--- a/criu/include/pstree.h
+++ b/criu/include/pstree.h
@@ -63,6 +63,7 @@ struct dmp_info {
struct parasite_ctl *parasite_ctl;
struct parasite_thread_ctl **thread_ctls;
uint64_t *thread_sp;
+ struct rseq_cs *thread_rseq_cs;
/*
* Although we don't support dumping different struct creds in general,