diff options
Diffstat (limited to 'compel/arch')
13 files changed, 1329 insertions, 8 deletions
diff --git a/compel/arch/aarch64/src/lib/include/ptrace.h b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h index e18454df2..5f090490d 100644 --- a/compel/arch/aarch64/src/lib/include/ptrace.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h @@ -1,5 +1,5 @@ -#ifndef __COMPEL_PTRACE_H__ -#define __COMPEL_PTRACE_H__ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ #define ARCH_SI_TRAP TRAP_BRKPT static inline int ptrace_set_breakpoint(pid_t pid, void *addr) diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h new file mode 100644 index 000000000..714881c57 --- /dev/null +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,57 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include <stdint.h> +#include <signal.h> +#include <sys/mman.h> +#include <asm/ptrace.h> +#include "common/page.h" + +#define SIGMAX 64 +#define SIGMAX_OLD 31 + +/* + * Copied from the Linux kernel header arch/arm64/include/uapi/asm/ptrace.h + * + * A thread ARM CPU context + */ + +typedef struct user_pt_regs user_regs_struct_t; +typedef struct user_fpsimd_state user_fpregs_struct_t; + +#define REG_RES(r) ((uint64_t)(r).regs[0]) +#define REG_IP(r) ((uint64_t)(r).pc) +#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8]) + +#define user_regs_native(pregs) true + +/* + * Range for task size calculated from the following Linux kernel files: + * arch/arm64/include/asm/memory.h + * arch/arm64/Kconfig + * + * TODO: handle 32 bit tasks + */ +#define TASK_SIZE_MIN (1UL << 39) +#define TASK_SIZE_MAX (1UL << 48) + +static inline unsigned long task_size(void) +{ + unsigned long task_size; + + for (task_size = TASK_SIZE_MIN; task_size < TASK_SIZE_MAX; task_size <<= 1) + if (munmap((void *)task_size, page_size())) + break; + return task_size; +} + +#define AT_VECTOR_SIZE 40 + +typedef uint64_t auxv_t; +typedef uint64_t tls_t; + +#define ARCH_SI_TRAP TRAP_BRKPT + +#define __NR(syscall, compat) __NR_##syscall + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c new file mode 100644 index 000000000..fceea2816 --- /dev/null +++ b/compel/arch/aarch64/src/lib/infect.c @@ -0,0 +1,111 @@ +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <linux/elf.h> +#include <compel/plugins/std/syscall-codes.h> +#include "uapi/compel/asm/infect-types.h" +#include "log.h" +#include "errno.h" +#include "infect.h" +#include "infect-priv.h" + +/* + * Injected syscall instruction + */ +const char code_syscall[] = { + 0x01, 0x00, 0x00, 0xd4, /* SVC #0 */ + 0x00, 0x00, 0x20, 0xd4 /* BRK #0 */ +}; + +static const int +code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); + +static inline void __always_unused __check_code_syscall(void) +{ + BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE); + BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); +} + +int compel_get_task_regs(pid_t pid, user_regs_struct_t regs, save_regs_t save, void *arg) +{ + struct iovec iov; + user_fpregs_struct_t fpsimd; + int ret; + + pr_info("Dumping GP/FPU registers for %d\n", pid); + + iov.iov_base = ®s; + iov.iov_len = sizeof(user_regs_struct_t); + if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) { + pr_perror("Failed to obtain CPU registers for %d", pid); + goto err; + } + + iov.iov_base = &fpsimd; + iov.iov_len = sizeof(fpsimd); + if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) { + pr_perror("Failed to obtain FPU registers for %d", pid); + goto err; + } + + ret = save(arg, ®s, &fpsimd); +err: + return ret; +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, unsigned long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + regs.regs[8] = (unsigned long)nr; + regs.regs[0] = arg1; + regs.regs[1] = arg2; + regs.regs[2] = arg3; + regs.regs[3] = arg4; + regs.regs[4] = arg5; + regs.regs[5] = arg6; + regs.regs[6] = 0; + regs.regs[7] = 0; + + err = compel_execute_syscall(ctl, ®s, code_syscall); + + *ret = regs.regs[0]; + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + unsigned long map; + int err; + + err = compel_syscall(ctl, __NR_mmap, &map, + (unsigned long)addr, length, prot, flags, fd, offset); + if (err < 0 || (long)map < 0) + map = 0; + + return (void *)map; +} + +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + regs->pc = new_ip; + if (stack) + regs->sp = (unsigned long)stack; +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + /* + * TODO: Add proper check here + */ + return true; +} diff --git a/compel/arch/arm/src/lib/include/ptrace.h b/compel/arch/arm/src/lib/include/uapi/asm/breakpoints.h index e18454df2..5f090490d 100644 --- a/compel/arch/arm/src/lib/include/ptrace.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/breakpoints.h @@ -1,5 +1,5 @@ -#ifndef __COMPEL_PTRACE_H__ -#define __COMPEL_PTRACE_H__ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ #define ARCH_SI_TRAP TRAP_BRKPT static inline int ptrace_set_breakpoint(pid_t pid, void *addr) diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h new file mode 100644 index 000000000..9c2092e5d --- /dev/null +++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,91 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include <stdint.h> +#include <sys/mman.h> +#include "common/page.h" + +#define SIGMAX 64 +#define SIGMAX_OLD 31 + +/* + * Copied from the Linux kernel header arch/arm/include/asm/ptrace.h + * + * A thread ARM CPU context + */ + +typedef struct { + long uregs[18]; +} user_regs_struct_t; + +typedef struct user_vfp user_fpregs_struct_t; + +#define ARM_cpsr uregs[16] +#define ARM_pc uregs[15] +#define ARM_lr uregs[14] +#define ARM_sp uregs[13] +#define ARM_ip uregs[12] +#define ARM_fp uregs[11] +#define ARM_r10 uregs[10] +#define ARM_r9 uregs[9] +#define ARM_r8 uregs[8] +#define ARM_r7 uregs[7] +#define ARM_r6 uregs[6] +#define ARM_r5 uregs[5] +#define ARM_r4 uregs[4] +#define ARM_r3 uregs[3] +#define ARM_r2 uregs[2] +#define ARM_r1 uregs[1] +#define ARM_r0 uregs[0] +#define ARM_ORIG_r0 uregs[17] + + +/* Copied from arch/arm/include/asm/user.h */ + +struct user_vfp { + unsigned long long fpregs[32]; + unsigned long fpscr; +}; + +struct user_vfp_exc { + unsigned long fpexc; + unsigned long fpinst; + unsigned long fpinst2; +}; + +#define REG_RES(regs) ((regs).ARM_r0) +#define REG_IP(regs) ((regs).ARM_pc) +#define REG_SYSCALL_NR(regs) ((regs).ARM_r7) + +#define user_regs_native(pregs) true + +/* + * Range for task size calculated from the following Linux kernel files: + * arch/arm/include/asm/memory.h + * arch/arm/Kconfig (PAGE_OFFSET values in Memory split section) + */ +#define TASK_SIZE_MIN 0x3f000000 +#define TASK_SIZE_MAX 0xbf000000 +#define SZ_1G 0x40000000 + +static inline unsigned long task_size(void) +{ + unsigned long task_size; + + for (task_size = TASK_SIZE_MIN; task_size < TASK_SIZE_MAX; task_size += SZ_1G) + if (munmap((void *)task_size, page_size())) + break; + + return task_size; +} + +#define AT_VECTOR_SIZE 40 + +typedef uint32_t auxv_t; +typedef uint32_t tls_t; + +#define ARCH_SI_TRAP TRAP_BRKPT + +#define __NR(syscall, compat) __NR_##syscall + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h b/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h index 65ae8a8b9..3e7bc0104 100644 --- a/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h @@ -1,6 +1,8 @@ #ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ #define UAPI_COMPEL_ASM_SIGFRAME_H__ +#include <compel/asm/infect-types.h> + /* Copied from the Linux kernel header arch/arm/include/asm/sigcontext.h */ struct rt_sigcontext { diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c new file mode 100644 index 000000000..b440ff736 --- /dev/null +++ b/compel/arch/arm/src/lib/infect.c @@ -0,0 +1,122 @@ +#include <sys/ptrace.h> +#include <sys/types.h> +#include <compel/plugins/std/syscall-codes.h> +#include <compel/asm/processor-flags.h> +#include "uapi/compel/asm/infect-types.h" +#include "log.h" +#include "errno.h" +#include "infect.h" +#include "infect-priv.h" + +/* + * Injected syscall instruction + */ +const char code_syscall[] = { + 0x00, 0x00, 0x00, 0xef, /* SVC #0 */ + 0xf0, 0x01, 0xf0, 0xe7 /* UDF #32 */ +}; + +static const int +code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); + +static inline __always_unused void __check_code_syscall(void) +{ + BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE); + BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); +} + +#define PTRACE_GETVFPREGS 27 +int compel_get_task_regs(pid_t pid, user_regs_struct_t regs, save_regs_t save, void *arg) +{ + user_fpregs_struct_t vfp; + int ret = -1; + + pr_info("Dumping GP/FPU registers for %d\n", pid); + + if (ptrace(PTRACE_GETVFPREGS, pid, NULL, &vfp)) { + pr_perror("Can't obtain FPU registers for %d", pid); + goto err; + } + + /* Did we come from a system call? */ + if ((int)regs.ARM_ORIG_r0 >= 0) { + /* Restart the system call */ + switch ((long)(int)regs.ARM_r0) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs.ARM_r0 = regs.ARM_ORIG_r0; + regs.ARM_pc -= 4; + break; + case -ERESTART_RESTARTBLOCK: + regs.ARM_r0 = __NR_restart_syscall; + regs.ARM_pc -= 4; + break; + } + } + + ret = save(arg, ®s, &vfp); +err: + return ret; +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, unsigned long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + regs.ARM_r7 = (unsigned long)nr; + regs.ARM_r0 = arg1; + regs.ARM_r1 = arg2; + regs.ARM_r2 = arg3; + regs.ARM_r3 = arg4; + regs.ARM_r4 = arg5; + regs.ARM_r5 = arg6; + + err = compel_execute_syscall(ctl, ®s, code_syscall); + + *ret = regs.ARM_r0; + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + unsigned long map; + int err; + + if (offset & ~PAGE_MASK) + return 0; + + err = compel_syscall(ctl, __NR_mmap2, &map, + (unsigned long)addr, length, prot, flags, fd, offset >> 12); + if (err < 0 || map > ctl->ictx.task_size) + map = 0; + + return (void *)map; +} + +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + regs->ARM_pc = new_ip; + if (stack) + regs->ARM_sp = (unsigned long)stack; + + /* Make sure flags are in known state */ + regs->ARM_cpsr &= PSR_f | PSR_s | PSR_x | MODE32_BIT; +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + /* + * TODO: Add proper check here + */ + return true; +} diff --git a/compel/arch/ppc64/src/lib/include/ptrace.h b/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h index 0274c2675..1ab89af76 100644 --- a/compel/arch/ppc64/src/lib/include/ptrace.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h @@ -1,5 +1,5 @@ -#ifndef __COMPEL_PTRACE_H__ -#define __COMPEL_PTRACE_H__ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ #define ARCH_SI_TRAP TRAP_BRKPT static inline int ptrace_set_breakpoint(pid_t pid, void *addr) diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h new file mode 100644 index 000000000..f243def73 --- /dev/null +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,110 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include <stdbool.h> +#include <signal.h> +#include <stdint.h> + +#define SIGMAX_OLD 31 +#define SIGMAX 64 + +/* + * Copied from kernel header arch/powerpc/include/uapi/asm/ptrace.h + */ +typedef struct { + unsigned long gpr[32]; + unsigned long nip; + unsigned long msr; + unsigned long orig_gpr3; /* Used for restarting system calls */ + unsigned long ctr; + unsigned long link; + unsigned long xer; + unsigned long ccr; + unsigned long softe; /* Soft enabled/disabled */ + unsigned long trap; /* Reason for being here */ + /* + * N.B. for critical exceptions on 4xx, the dar and dsisr + * fields are overloaded to hold srr0 and srr1. + */ + unsigned long dar; /* Fault registers */ + unsigned long dsisr; /* on 4xx/Book-E used for ESR */ + unsigned long result; /* Result of a system call */ +} user_regs_struct_t; + +#define NVSXREG 32 + +#define USER_FPREGS_FL_FP 0x00001 +#define USER_FPREGS_FL_ALTIVEC 0x00002 +#define USER_FPREGS_FL_VSX 0x00004 +#define USER_FPREGS_FL_TM 0x00010 + +#ifndef NT_PPC_TM_SPR +# define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +# define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +# define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +# define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +# define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#endif + +#define MSR_TMA (1UL<<34) /* bit 29 Trans Mem state: Transactional */ +#define MSR_TMS (1UL<<33) /* bit 30 Trans Mem state: Suspended */ +#define MSR_TM (1UL<<32) /* bit 31 Trans Mem Available */ +#define MSR_VEC (1UL<<25) +#define MSR_VSX (1UL<<23) + +#define MSR_TM_ACTIVE(x) ((((x) & MSR_TM) && ((x)&(MSR_TMA|MSR_TMS))) != 0) + +typedef struct { + uint64_t fpregs[NFPREG]; + __vector128 vrregs[NVRREG]; + uint64_t vsxregs[NVSXREG]; + + int flags; + struct tm_regs { + int flags; + struct { + uint64_t tfhar, texasr, tfiar; + } tm_spr_regs; + user_regs_struct_t regs; + uint64_t fpregs[NFPREG]; + __vector128 vrregs[NVRREG]; + uint64_t vsxregs[NVSXREG]; + } tm; +} user_fpregs_struct_t; + +#define REG_RES(regs) ((uint64_t)(regs).gpr[3]) +#define REG_IP(regs) ((uint64_t)(regs).nip) +#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0]) + +#define user_regs_native(pregs) true + +/* + * Copied from the following kernel header files : + * include/linux/auxvec.h + * arch/powerpc/include/uapi/asm/auxvec.h + * include/linux/mm_types.h + */ +#define AT_VECTOR_SIZE_BASE 20 +#define AT_VECTOR_SIZE_ARCH 6 +#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) + +typedef uint64_t auxv_t; + +/* Not used but the structure parasite_dump_thread needs a tls_t field */ +typedef uint64_t tls_t; + +/* + * Copied for the Linux kernel arch/powerpc/include/asm/processor.h + * + * NOTE: 32bit tasks are not supported. + */ +#define TASK_SIZE_USER64 (0x0000400000000000UL) +#define TASK_SIZE TASK_SIZE_USER64 + +static inline unsigned long task_size(void) { return TASK_SIZE; } + +#define ARCH_SI_TRAP TRAP_BRKPT + +#define __NR(syscall, compat) __NR_##syscall + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c new file mode 100644 index 000000000..959098b8c --- /dev/null +++ b/compel/arch/ppc64/src/lib/infect.c @@ -0,0 +1,318 @@ +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <stdint.h> +#include <errno.h> +#include <compel/plugins/std/syscall-codes.h> +#include "uapi/compel/asm/infect-types.h" +#include "errno.h" +#include "log.h" +#include "common/bug.h" +#include "infect.h" +#include "infect-priv.h" + +#ifndef NT_PPC_TM_SPR +#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#endif + +/* + * Injected syscall instruction + */ +const uint32_t code_syscall[] = { + 0x44000002, /* sc */ + 0x0fe00000 /* twi 31,0,0 */ +}; + +static inline void __check_code_syscall(void) +{ + BUILD_BUG_ON(sizeof(code_syscall) != BUILTIN_SYSCALL_SIZE); + BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); +} + +/* This is the layout of the POWER7 VSX registers and the way they + * overlap with the existing FPR and VMX registers. + * + * VSR doubleword 0 VSR doubleword 1 + * ---------------------------------------------------------------- + * VSR[0] | FPR[0] | | + * ---------------------------------------------------------------- + * VSR[1] | FPR[1] | | + * ---------------------------------------------------------------- + * | ... | | + * ---------------------------------------------------------------- + * VSR[30] | FPR[30] | | + * ---------------------------------------------------------------- + * VSR[31] | FPR[31] | | + * ---------------------------------------------------------------- + * VSR[32] | VR[0] | + * ---------------------------------------------------------------- + * VSR[33] | VR[1] | + * ---------------------------------------------------------------- + * | ... | + * ---------------------------------------------------------------- + * VSR[62] | VR[30] | + * ---------------------------------------------------------------- + * VSR[63] | VR[31] | + * ---------------------------------------------------------------- + * + * PTRACE_GETFPREGS returns FPR[0..31] + FPSCR + * PTRACE_GETVRREGS returns VR[0..31] + VSCR + VRSAVE + * PTRACE_GETVSRREGS returns VSR[0..31] + * + * PTRACE_GETVSRREGS and PTRACE_GETFPREGS are required since we need + * to save FPSCR too. + * + * There 32 VSX double word registers to save since the 32 first VSX double + * word registers are saved through FPR[0..32] and the remaining registers + * are saved when saving the Altivec registers VR[0..32]. + */ + +static int get_fpu_regs(pid_t pid, user_fpregs_struct_t *fp) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, (void *)&fp->fpregs) < 0) { + pr_perror("Couldn't get floating-point registers"); + return -1; + } + fp->flags |= USER_FPREGS_FL_FP; + + return 0; +} + +static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp) +{ + if (ptrace(PTRACE_GETVRREGS, pid, 0, (void*)&fp->vrregs) < 0) { + /* PTRACE_GETVRREGS returns EIO if Altivec is not supported. + * This should not happen if msr_vec is set. */ + if (errno != EIO) { + pr_perror("Couldn't get Altivec registers"); + return -1; + } + pr_debug("Altivec not supported\n"); + } + else { + pr_debug("Dumping Altivec registers\n"); + fp->flags |= USER_FPREGS_FL_ALTIVEC; + } + return 0; +} + +/* + * Since the FPR[0-31] is stored in the first double word of VSR[0-31] and + * FPR are saved through the FP state, there is no need to save the upper part + * of the first 32 VSX registers. + * Furthermore, the 32 last VSX registers are also the 32 Altivec registers + * already saved, so no need to save them. + * As a consequence, only the doubleword 1 of the 32 first VSX registers have + * to be saved (the ones are returned by PTRACE_GETVSRREGS). + */ +static int get_vsx_regs(pid_t pid, user_fpregs_struct_t *fp) +{ + if (ptrace(PTRACE_GETVSRREGS, pid, 0, (void*)fp->vsxregs) < 0) { + /* + * EIO is returned in the case PTRACE_GETVRREGS is not + * supported. + */ + if (errno != EIO) { + pr_perror("Couldn't get VSX registers"); + return -1; + } + pr_debug("VSX register's dump not supported.\n"); + } + else { + pr_debug("Dumping VSX registers\n"); + fp->flags |= USER_FPREGS_FL_VSX; + } + return 0; +} + +static int get_tm_regs(pid_t pid, user_fpregs_struct_t *fpregs) +{ + struct iovec iov; + + pr_debug("Dumping TM registers\n"); + +#define TM_REQUIRED 0 +#define TM_OPTIONAL 1 +#define PTRACE_GET_TM(s,n,c,u) do { \ + iov.iov_base = &s; \ + iov.iov_len = sizeof(s); \ + if (ptrace(PTRACE_GETREGSET, pid, c, &iov)) { \ + if (!u || errno != EIO) { \ + pr_perror("Couldn't get TM "n); \ + pr_err("Your kernel seems to not support the " \ + "new TM ptrace API (>= 4.8)\n"); \ + goto out_free; \ + } \ + pr_debug("TM "n" not supported.\n"); \ + iov.iov_base = NULL; \ + } \ +} while(0) + + /* Get special registers */ + PTRACE_GET_TM(fpregs->tm.tm_spr_regs, "SPR", NT_PPC_TM_SPR, TM_REQUIRED); + + /* Get checkpointed regular registers */ + PTRACE_GET_TM(fpregs->tm.regs, "GPR", NT_PPC_TM_CGPR, TM_REQUIRED); + + /* Get checkpointed FP registers */ + PTRACE_GET_TM(fpregs->tm.fpregs, "FPR", NT_PPC_TM_CFPR, TM_OPTIONAL); + if (iov.iov_base) + fpregs->tm.flags |= USER_FPREGS_FL_FP; + + /* Get checkpointed VMX (Altivec) registers */ + PTRACE_GET_TM(fpregs->tm.vrregs, "VMX", NT_PPC_TM_CVMX, TM_OPTIONAL); + if (iov.iov_base) + fpregs->tm.flags |= USER_FPREGS_FL_ALTIVEC; + + /* Get checkpointed VSX registers */ + PTRACE_GET_TM(fpregs->tm.vsxregs, "VSX", NT_PPC_TM_CVSX, TM_OPTIONAL); + if (iov.iov_base) + fpregs->tm.flags |= USER_FPREGS_FL_VSX; + + return 0; + +out_free: + return -1; /* still failing the checkpoint */ +} + +static int __get_task_regs(pid_t pid, user_regs_struct_t *regs, + user_fpregs_struct_t *fpregs) +{ + pr_info("Dumping GP/FPU registers for %d\n", pid); + + /* + * This is inspired by kernel function check_syscall_restart in + * arch/powerpc/kernel/signal.c + */ +#ifndef TRAP +#define TRAP(r) ((r).trap & ~0xF) +#endif + + if (TRAP(*regs) == 0x0C00 && regs->ccr & 0x10000000) { + /* Restart the system call */ + switch (regs->gpr[3]) { + case ERESTARTNOHAND: + case ERESTARTSYS: + case ERESTARTNOINTR: + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + break; + case ERESTART_RESTARTBLOCK: + regs->gpr[0] = __NR_restart_syscall; + regs->nip -= 4; + break; + } + } + + /* Resetting trap since we are now coming from user space. */ + regs->trap = 0; + + fpregs->flags = 0; + /* + * Check for Transactional Memory operation in progress. + * Until we have support of TM register's state through the ptrace API, + * we can't checkpoint process with TM operation in progress (almost + * impossible) or suspended (easy to get). + */ + if (MSR_TM_ACTIVE(regs->msr)) { + pr_debug("Task %d has %s TM operation at 0x%lx\n", + pid, + (regs->msr & MSR_TMS) ? "a suspended" : "an active", + regs->nip); + if (get_tm_regs(pid, fpregs)) + return -1; + fpregs->flags = USER_FPREGS_FL_TM; + } + + if (get_fpu_regs(pid, fpregs)) + return -1; + + if (get_altivec_regs(pid, fpregs)) + return -1; + + if (fpregs->flags & USER_FPREGS_FL_ALTIVEC) { + /* + * Save the VSX registers if Altivec registers are supported + */ + if (get_vsx_regs(pid, fpregs)) + return -1; + } + return 0; +} + +int compel_get_task_regs(pid_t pid, user_regs_struct_t regs, save_regs_t save, void *arg) +{ + user_fpregs_struct_t fpregs; + int ret; + + ret = __get_task_regs(pid, ®s, &fpregs); + if (ret) + return ret; + + return save(arg, ®s, &fpregs); +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, unsigned long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + regs.gpr[0] = (unsigned long)nr; + regs.gpr[3] = arg1; + regs.gpr[4] = arg2; + regs.gpr[5] = arg3; + regs.gpr[6] = arg4; + regs.gpr[7] = arg5; + regs.gpr[8] = arg6; + + err = compel_execute_syscall(ctl, ®s, (char*)code_syscall); + + *ret = regs.gpr[3]; + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + unsigned long map = 0; + int err; + + err = compel_syscall(ctl, __NR_mmap, &map, + (unsigned long)addr, length, prot, flags, fd, offset); + if (err < 0 || (long)map < 0) + map = 0; + + return (void *)map; +} + +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + /* + * OpenPOWER ABI requires that r12 is set to the calling function addressi + * to compute the TOC pointer. + */ + regs->gpr[12] = new_ip; + regs->nip = new_ip; + if (stack) + regs->gpr[1] = (unsigned long) stack; + regs->trap = 0; +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + /* + * TODO: We should detect 32bit task when BE support is done. + */ + return true; +} diff --git a/compel/arch/x86/src/lib/include/ptrace.h b/compel/arch/x86/src/lib/include/uapi/asm/breakpoints.h index 844ea0efd..980f25d06 100644 --- a/compel/arch/x86/src/lib/include/ptrace.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/breakpoints.h @@ -1,5 +1,5 @@ -#ifndef __COMPEL_PTRACE_H__ -#define __COMPEL_PTRACE_H__ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ #define ARCH_SI_TRAP SI_KERNEL extern int ptrace_set_breakpoint(pid_t pid, void *addr); extern int ptrace_flush_breakpoints(pid_t pid); diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h new file mode 100644 index 000000000..bbc6bcf22 --- /dev/null +++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,159 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include "log.h" +#include "common/bug.h" +#include "common/page.h" +#include <compel/plugins/std/asm/syscall-types.h> + +#define SIGMAX 64 +#define SIGMAX_OLD 31 + +typedef struct { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + uint64_t bp; + uint64_t bx; + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + uint64_t ax; + uint64_t cx; + uint64_t dx; + uint64_t si; + uint64_t di; + uint64_t orig_ax; + uint64_t ip; + uint64_t cs; + uint64_t flags; + uint64_t sp; + uint64_t ss; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ds; + uint64_t es; + uint64_t fs; + uint64_t gs; +} user_regs_struct64; + +typedef struct { + uint32_t bx; + uint32_t cx; + uint32_t dx; + uint32_t si; + uint32_t di; + uint32_t bp; + uint32_t ax; + uint32_t ds; + uint32_t es; + uint32_t fs; + uint32_t gs; + uint32_t orig_ax; + uint32_t ip; + uint32_t cs; + uint32_t flags; + uint32_t sp; + uint32_t ss; +} user_regs_struct32; + +#ifdef CONFIG_X86_64 +/* + * To be sure that we rely on inited reg->__is_native, this member + * is (short int) instead of initial (bool). The right way to + * check if regs are native or compat is to use user_regs_native() macro. + * This should cost nothing, as *usually* sizeof(bool) == sizeof(short) + */ +typedef struct { + union { + user_regs_struct64 native; + user_regs_struct32 compat; + }; + short __is_native; /* use user_regs_native macro to check it */ +} user_regs_struct_t; + +#define NATIVE_MAGIC 0x0A +#define COMPAT_MAGIC 0x0C +static inline bool user_regs_native(user_regs_struct_t *pregs) +{ + return pregs->__is_native == NATIVE_MAGIC; +} + +#define get_user_reg(pregs, name) \ + ((user_regs_native(pregs)) ? \ + ((pregs)->native.name) : \ + ((pregs)->compat.name)) + +#define set_user_reg(pregs, name, val) \ + ((user_regs_native(pregs)) ? \ + ((pregs)->native.name = (val)) : \ + ((pregs)->compat.name = (val))) +#else +typedef struct { + union { + user_regs_struct32 native; + }; +} user_regs_struct_t; +#define user_regs_native(pregs) true +#define get_user_reg(pregs, name) ((pregs)->native.name) +#define set_user_reg(pregs, name, val) ((pregs)->native.name = val) +#endif + +#if 0 +typedef struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; /* Note this is not the same as + the 32bit/x87/FSAVE twd */ + unsigned short fop; + u64 rip; + u64 rdp; + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + u32 padding[24]; +} user_fpregs_struct_t; +#endif + +typedef struct xsave_struct user_fpregs_struct_t; + +#ifdef CONFIG_X86_64 +# define TASK_SIZE ((1UL << 47) - PAGE_SIZE) +#else +/* + * Task size may be limited to 3G but we need a + * higher limit, because it's backward compatible. + */ +# define TASK_SIZE (0xffffe000) +#endif + +static inline unsigned long task_size(void) { return TASK_SIZE; } + +typedef uint64_t auxv_t; + +/* + * Linux preserves three TLS segments in GDT. + * Offsets in GDT differ between 32-bit and 64-bit machines. + * For 64-bit x86 those GDT offsets are the same + * for native and compat tasks. + */ +#define GDT_ENTRY_TLS_MIN 12 +#define GDT_ENTRY_TLS_MAX 14 +#define GDT_ENTRY_TLS_NUM 3 +typedef struct { + user_desc_t desc[GDT_ENTRY_TLS_NUM]; +} tls_t; + +#define REG_RES(regs) get_user_reg(®s, ax) +#define REG_IP(regs) get_user_reg(®s, ip) +#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax) + +#define AT_VECTOR_SIZE 44 + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c new file mode 100644 index 000000000..53cae1dc5 --- /dev/null +++ b/compel/arch/x86/src/lib/infect.c @@ -0,0 +1,351 @@ +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/user.h> + +#include <compel/asm/fpu.h> + +#include "asm/cpu.h" + +#include <compel/asm/processor-flags.h> +#include <compel/cpu.h> +#include "errno.h" +#include <compel/plugins/std/syscall-codes.h> +#include <compel/plugins/std/syscall.h> +#include "asm/ptrace.h" +#include "common/err.h" +#include "asm/infect-types.h" +#include "uapi/compel/ptrace.h" +#include "infect.h" +#include "infect-priv.h" +#include "log.h" + +/* + * Injected syscall instruction + */ +const char code_syscall[] = { + 0x0f, 0x05, /* syscall */ + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */ +}; + +const char code_int_80[] = { + 0xcd, 0x80, /* int $0x80 */ + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */ +}; + +static const int +code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); +static const int +code_int_80_aligned = round_up(sizeof(code_syscall), sizeof(long)); + +static inline __always_unused void __check_code_syscall(void) +{ + BUILD_BUG_ON(code_int_80_aligned != BUILTIN_SYSCALL_SIZE); + BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE); + BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); +} + +#define get_signed_user_reg(pregs, name) \ + ((user_regs_native(pregs)) ? (int64_t)((pregs)->native.name) : \ + (int32_t)((pregs)->compat.name)) + +int compel_get_task_regs(pid_t pid, user_regs_struct_t regs, save_regs_t save, void *arg) +{ + user_fpregs_struct_t xsave = { }, *xs = NULL; + + struct iovec iov; + int ret = -1; + + pr_info("Dumping general registers for %d in %s mode\n", pid, + user_regs_native(®s) ? "native" : "compat"); + + /* Did we come from a system call? */ + if (get_signed_user_reg(®s, orig_ax) >= 0) { + /* Restart the system call */ + switch (get_signed_user_reg(®s, ax)) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + set_user_reg(®s, ax, get_user_reg(®s, orig_ax)); + set_user_reg(®s, ip, get_user_reg(®s, ip) - 2); + break; + case -ERESTART_RESTARTBLOCK: + pr_warn("Will restore %d with interrupted system call\n", pid); + set_user_reg(®s, ax, -EINTR); + break; + } + } + +#ifndef PTRACE_GETREGSET +# define PTRACE_GETREGSET 0x4204 +#endif + + if (!cpu_has_feature(X86_FEATURE_FPU)) + goto out; + + /* + * FPU fetched either via fxsave or via xsave, + * thus decode it accrodingly. + */ + + pr_info("Dumping GP/FPU registers for %d\n", pid); + + if (cpu_has_feature(X86_FEATURE_OSXSAVE)) { + iov.iov_base = &xsave; + iov.iov_len = sizeof(xsave); + + if (ptrace(PTRACE_GETREGSET, pid, (unsigned int)NT_X86_XSTATE, &iov) < 0) { + pr_perror("Can't obtain FPU registers for %d", pid); + goto err; + } + } else { + if (ptrace(PTRACE_GETFPREGS, pid, NULL, &xsave)) { + pr_perror("Can't obtain FPU registers for %d", pid); + goto err; + } + } + + xs = &xsave; +out: + ret = save(arg, ®s, xs); +err: + return ret; +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, unsigned long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + if (user_regs_native(®s)) { + user_regs_struct64 *r = ®s.native; + + r->ax = (uint64_t)nr; + r->di = arg1; + r->si = arg2; + r->dx = arg3; + r->r10 = arg4; + r->r8 = arg5; + r->r9 = arg6; + + err = compel_execute_syscall(ctl, ®s, code_syscall); + } else { + user_regs_struct32 *r = ®s.compat; + + r->ax = (uint32_t)nr; + r->bx = arg1; + r->cx = arg2; + r->dx = arg3; + r->si = arg4; + r->di = arg5; + r->bp = arg6; + + err = compel_execute_syscall(ctl, ®s, code_int_80); + } + + *ret = get_user_reg(®s, ax); + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + unsigned long map; + int err; + bool compat_task = !user_regs_native(&ctl->orig.regs); + + err = compel_syscall(ctl, __NR(mmap, compat_task), &map, + (unsigned long)addr, length, prot, flags, fd, offset); + if (err < 0) + return NULL; + + if (IS_ERR_VALUE(map)) { + if (map == -EACCES && (prot & PROT_WRITE) && (prot & PROT_EXEC)) + pr_warn("mmap(PROT_WRITE | PROT_EXEC) failed for %d, " + "check selinux execmem policy\n", ctl->rpid); + return NULL; + } + + return (void *)map; +} + +/* + * regs must be inited when calling this function from original context + */ +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + set_user_reg(regs, ip, new_ip); + if (stack) + set_user_reg(regs, sp, (unsigned long) stack); + + /* Avoid end of syscall processing */ + set_user_reg(regs, orig_ax, -1); + + /* Make sure flags are in known state */ + set_user_reg(regs, flags, get_user_reg(regs, flags) & + ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF)); +} + +#define USER32_CS 0x23 +#define USER_CS 0x33 + +static bool ldt_task_selectors(pid_t pid) +{ + unsigned long cs; + + errno = 0; + /* + * Offset of register must be from 64-bit set even for + * compatible tasks. Fix this to support native i386 tasks + */ + cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, cs), 0); + if (errno != 0) { + pr_perror("Can't get CS register for %d", pid); + return -1; + } + + return cs != USER_CS && cs != USER32_CS; +} + +static int arch_task_compatible(pid_t pid) +{ + user_regs_struct_t r; + int ret = ptrace_get_regs(pid, &r); + + if (ret) + return -1; + + return !user_regs_native(&r); +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + pid_t pid = ctl->rpid; + int ret; + + ret = arch_task_compatible(pid); + if (ret < 0) + return false; + + if (ret && !(ctl->ictx.flags & INFECT_HAS_COMPAT_SIGRETURN)) { + pr_err("Can't dump task %d running in 32-bit mode\n", pid); + return false; + } + + if (ldt_task_selectors(pid)) { + pr_err("Can't dump task %d with LDT descriptors\n", pid); + return false; + } + + return true; +} + +/* Copied from the gdb header gdb/nat/x86-dregs.h */ + +/* Debug registers' indices. */ +#define DR_FIRSTADDR 0 +#define DR_LASTADDR 3 +#define DR_NADDR 4 /* The number of debug address registers. */ +#define DR_STATUS 6 /* Index of debug status register (DR6). */ +#define DR_CONTROL 7 /* Index of debug control register (DR7). */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit. */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit. */ +#define DR_ENABLE_SIZE 2 /* Two enable bits per debug register. */ + +/* Locally enable the break/watchpoint in the I'th debug register. */ +#define X86_DR_LOCAL_ENABLE(i) (1 << (DR_LOCAL_ENABLE_SHIFT + DR_ENABLE_SIZE * (i))) + +int ptrace_set_breakpoint(pid_t pid, void *addr) +{ + int ret; + + /* Set a breakpoint */ + if (ptrace(PTRACE_POKEUSER, pid, + offsetof(struct user, u_debugreg[DR_FIRSTADDR]), + addr)) { + pr_perror("Unable to setup a breakpoint into %d", pid); + return -1; + } + + /* Enable the breakpoint */ + if (ptrace(PTRACE_POKEUSER, pid, + offsetof(struct user, u_debugreg[DR_CONTROL]), + X86_DR_LOCAL_ENABLE(DR_FIRSTADDR))) { + pr_perror("Unable to enable the breakpoint for %d", pid); + return -1; + } + + ret = ptrace(PTRACE_CONT, pid, NULL, NULL); + if (ret) { + pr_perror("Unable to restart the stopped tracee process %d", pid); + return -1; + } + + return 1; +} + +int ptrace_flush_breakpoints(pid_t pid) +{ + /* Disable the breakpoint */ + if (ptrace(PTRACE_POKEUSER, pid, + offsetof(struct user, u_debugreg[DR_CONTROL]), + 0)) { + pr_perror("Unable to disable the breakpoint for %d", pid); + return -1; + } + + return 0; +} + +int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs) +{ + struct iovec iov; + int ret; + + iov.iov_base = ®s->native; + iov.iov_len = sizeof(user_regs_struct64); + + ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov); + if (ret == -1) { + pr_perror("PTRACE_GETREGSET failed"); + return -1; + } + + if (iov.iov_len == sizeof(regs->native)) { + regs->__is_native = NATIVE_MAGIC; + return ret; + } + if (iov.iov_len == sizeof(regs->compat)) { + regs->__is_native = COMPAT_MAGIC; + return ret; + } + + pr_err("PTRACE_GETREGSET read %zu bytes for pid %d, but native/compat regs sizes are %zu/%zu bytes", + iov.iov_len, pid, + sizeof(regs->native), sizeof(regs->compat)); + return -1; +} + +int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs) +{ + struct iovec iov; + + if (user_regs_native(regs)) { + iov.iov_base = ®s->native; + iov.iov_len = sizeof(user_regs_struct64); + } else { + iov.iov_base = ®s->compat; + iov.iov_len = sizeof(user_regs_struct32); + } + return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov); +} |