Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8e02ae57 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'uprobes/core' of...

Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc

 into perf/uprobes

Pull uprobes updates from Oleg Nesterov:

  "This hopefully completes the previous 'fix the handling of relative
   jmp/call's' series, all changes except the last 3 unrelated fixes try
   to address TODO's mentioned in the changelogs."

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 3617660e 13f59c5e
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -41,18 +41,21 @@ struct arch_uprobe {
		u8			ixol[MAX_UINSN_BYTES];
	};

	u16				fixups;
	const struct uprobe_xol_ops	*ops;

	union {
#ifdef CONFIG_X86_64
		unsigned long			rip_rela_target_address;
#endif
		struct {
			s32	offs;
			u8	ilen;
			u8	opc1;
		}			branch;
		struct {
#ifdef CONFIG_X86_64
			long	riprel_target;
#endif
			u8	fixups;
			u8	ilen;
		} 			def;
	};
};

+4 −3
Original line number Diff line number Diff line
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
	set_thread_flag(TIF_ADDR32);

	/* Mark the associated mm as containing 32-bit tasks. */
	if (current->mm)
		current->mm->context.ia32_compat = 1;

	if (x32) {
		clear_thread_flag(TIF_IA32);
		set_thread_flag(TIF_X32);
		if (current->mm)
			current->mm->context.ia32_compat = TIF_X32;
		current->personality &= ~READ_IMPLIES_EXEC;
		/* is_compat_task() uses the presence of the x32
		   syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
	} else {
		set_thread_flag(TIF_IA32);
		clear_thread_flag(TIF_X32);
		if (current->mm)
			current->mm->context.ia32_compat = TIF_IA32;
		current->personality |= force_personality32;
		/* Prepare the first "return" to user space */
		current_thread_info()->status |= TS_COMPAT;
+162 −171
Original line number Diff line number Diff line
@@ -32,20 +32,17 @@

/* Post-execution fixups. */

/* No fixup needed */
#define UPROBE_FIX_NONE		0x0

/* Adjust IP back to vicinity of actual insn */
#define UPROBE_FIX_IP		0x1
#define UPROBE_FIX_IP		0x01

/* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL	0x2
#define UPROBE_FIX_CALL		0x02

/* Instruction will modify TF, don't change it */
#define UPROBE_FIX_SETF	0x4
#define UPROBE_FIX_SETF		0x04

#define UPROBE_FIX_RIP_AX	0x8000
#define UPROBE_FIX_RIP_CX	0x4000
#define UPROBE_FIX_RIP_AX	0x08
#define UPROBE_FIX_RIP_CX	0x10

#define	UPROBE_TRAP_NR		UINT_MAX

@@ -67,6 +64,7 @@
 * to keep gcc from statically optimizing it out, as variable_test_bit makes
 * some versions of gcc to think only *(unsigned long*) is used.
 */
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static volatile u32 good_insns_32[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
@@ -89,33 +87,12 @@ static volatile u32 good_insns_32[256 / 32] = {
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};
#else
#define good_insns_32	NULL
#endif

/* Using this for both 64-bit and 32-bit apps */
static volatile u32 good_2byte_insns[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};

#ifdef CONFIG_X86_64
/* Good-instruction tables for 64-bit apps */
#if defined(CONFIG_X86_64)
static volatile u32 good_insns_64[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
@@ -138,7 +115,33 @@ static volatile u32 good_insns_64[256 / 32] = {
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};
#else
#define good_insns_64	NULL
#endif

/* Using this for both 64-bit and 32-bit apps */
static volatile u32 good_2byte_insns[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};
#undef W

/*
@@ -209,16 +212,25 @@ static bool is_prefix_bad(struct insn *insn)
	return false;
}

static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
{
	insn_init(insn, auprobe->insn, false);
	u32 volatile *good_insns;

	insn_init(insn, auprobe->insn, x86_64);
	/* has the side-effect of processing the entire instruction */
	insn_get_length(insn);
	if (WARN_ON_ONCE(!insn_complete(insn)))
		return -ENOEXEC;

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;

	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
	if (x86_64)
		good_insns = good_insns_64;
	else
		good_insns = good_insns_32;

	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
		return 0;

	if (insn->opcode.nbytes == 2) {
@@ -230,14 +242,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
}

#ifdef CONFIG_X86_64
static inline bool is_64bit_mm(struct mm_struct *mm)
{
	return	!config_enabled(CONFIG_IA32_EMULATION) ||
		!(mm->context.ia32_compat == TIF_IA32);
}
/*
 * If arch_uprobe->insn doesn't use rip-relative addressing, return
 * immediately.  Otherwise, rewrite the instruction so that it accesses
 * its memory operand indirectly through a scratch register.  Set
 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
 * accordingly.  (The contents of the scratch register will be saved
 * before we single-step the modified instruction, and restored
 * afterward.)
 * def->fixups and def->riprel_target accordingly. (The contents of the
 * scratch register will be saved before we single-step the modified
 * instruction, and restored afterward).
 *
 * We do this because a rip-relative instruction can access only a
 * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -252,8 +268,7 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
 *  - There's never a SIB byte.
 *  - The displacement is always 4 bytes.
 */
static void
handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
{
	u8 *cursor;
	u8 reg;
@@ -277,8 +292,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
	 * is the immediate operand.
	 */
	cursor = auprobe->insn + insn_offset_modrm(insn);
	insn_get_length(insn);

	/*
	 * Convert from rip-relative addressing to indirect addressing
	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
@@ -293,18 +306,18 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
		 * is NOT the register operand, so we use %rcx (register
		 * #1) for the scratch register.
		 */
		auprobe->fixups = UPROBE_FIX_RIP_CX;
		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
		/* Change modrm from 00 000 101 to 00 000 001. */
		*cursor = 0x1;
	} else {
		/* Use %rax (register #0) for the scratch register. */
		auprobe->fixups = UPROBE_FIX_RIP_AX;
		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
		*cursor = (reg << 3);
	}

	/* Target address = address of next instruction + (signed) offset */
	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
	auprobe->def.riprel_target = (long)insn->length + insn->displacement.value;

	/* Displacement field is gone; slide immediate field (if any) over. */
	if (insn->immediate.nbytes) {
@@ -313,37 +326,35 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
	}
}

static inline unsigned long *
scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx;
}

/*
 * If we're emulating a rip-relative instruction, save the contents
 * of the scratch register and store the target address in that register.
 */
static void
pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)
static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
		autask->saved_scratch_register = regs->ax;
		regs->ax = current->utask->vaddr;
		regs->ax += auprobe->rip_rela_target_address;
	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
		autask->saved_scratch_register = regs->cx;
		regs->cx = current->utask->vaddr;
		regs->cx += auprobe->rip_rela_target_address;
	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
		struct uprobe_task *utask = current->utask;
		unsigned long *sr = scratch_reg(auprobe, regs);

		utask->autask.saved_scratch_register = *sr;
		*sr = utask->vaddr + auprobe->def.riprel_target;
	}
}

static void
handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
				long *correction)
{
	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
		struct arch_uprobe_task *autask;

		autask = &current->utask->autask;
		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
			regs->ax = autask->saved_scratch_register;
		else
			regs->cx = autask->saved_scratch_register;
	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
		struct uprobe_task *utask = current->utask;
		unsigned long *sr = scratch_reg(auprobe, regs);

		*sr = utask->autask.saved_scratch_register;
		/*
		 * The original instruction includes a displacement, and so
		 * is 4 bytes longer than what we've just single-stepped.
@@ -354,58 +365,31 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *
			*correction += 4;
	}
}

static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
{
	insn_init(insn, auprobe->insn, true);

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;

	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
		return 0;

	if (insn->opcode.nbytes == 2) {
		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
			return 0;
	}
	return -ENOTSUPP;
}

static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
#else /* 32-bit: */
static inline bool is_64bit_mm(struct mm_struct *mm)
{
	if (mm->context.ia32_compat)
		return validate_insn_32bits(auprobe, insn);
	return validate_insn_64bits(auprobe, insn);
	return false;
}
#else /* 32-bit: */
/*
 * No RIP-relative addressing on 32-bit
 */
static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
{
}
static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)
static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
}
static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
					long *correction)
{
}

static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
{
	return validate_insn_32bits(auprobe, insn);
}
#endif /* CONFIG_X86_64 */

struct uprobe_xol_ops {
	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *);
	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *);
	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *);
	void	(*abort)(struct arch_uprobe *, struct pt_regs *);
};

static inline int sizeof_long(void)
@@ -415,25 +399,18 @@ static inline int sizeof_long(void)

static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
	riprel_pre_xol(auprobe, regs);
	return 0;
}

/*
 * Adjust the return address pushed by a call insn executed out of line.
 */
static int adjust_ret_addr(unsigned long sp, long correction)
static int push_ret_address(struct pt_regs *regs, unsigned long ip)
{
	int rasize = sizeof_long();
	long ra;

	if (copy_from_user(&ra, (void __user *)sp, rasize))
		return -EFAULT;
	unsigned long new_sp = regs->sp - sizeof_long();

	ra += correction;
	if (copy_to_user((void __user *)sp, &ra, rasize))
	if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
		return -EFAULT;

	regs->sp = new_sp;
	return 0;
}

@@ -442,23 +419,30 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
	struct uprobe_task *utask = current->utask;
	long correction = (long)(utask->vaddr - utask->xol_vaddr);

	handle_riprel_post_xol(auprobe, regs, &correction);
	if (auprobe->fixups & UPROBE_FIX_IP)
	riprel_post_xol(auprobe, regs, &correction);
	if (auprobe->def.fixups & UPROBE_FIX_IP) {
		regs->ip += correction;

	if (auprobe->fixups & UPROBE_FIX_CALL) {
		if (adjust_ret_addr(regs->sp, correction)) {
	} else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
		regs->sp += sizeof_long();
		if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen))
			return -ERESTART;
	}
	}
	/* popf; tell the caller to not touch TF */
	if (auprobe->def.fixups & UPROBE_FIX_SETF)
		utask->autask.saved_tf = true;

	return 0;
}

static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	riprel_post_xol(auprobe, regs, NULL);
}

static struct uprobe_xol_ops default_xol_ops = {
	.pre_xol  = default_pre_xol_op,
	.post_xol = default_post_xol_op,
	.abort	  = default_abort_op,
};

static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +504,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
	unsigned long offs = (long)auprobe->branch.offs;

	if (branch_is_call(auprobe)) {
		unsigned long new_sp = regs->sp - sizeof_long();
		/*
		 * If it fails we execute this (mangled, see the comment in
		 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +513,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
		 *
		 * But there is corner case, see the comment in ->post_xol().
		 */
		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
		if (push_ret_address(regs, new_ip))
			return false;
		regs->sp = new_sp;
	} else if (!check_jmp_cond(auprobe, regs)) {
		offs = 0;
	}
@@ -583,11 +565,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
	u8 opc1 = OPCODE1(insn);

	/* has the side-effect of processing the entire instruction */
	insn_get_length(insn);
	if (WARN_ON_ONCE(!insn_complete(insn)))
		return -ENOEXEC;
	int i;

	switch (opc1) {
	case 0xeb:	/* jmp 8 */
@@ -612,6 +590,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
			return -ENOSYS;
	}

	/*
	 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
	 * No one uses these insns, reject any branch insns with such prefix.
	 */
	for (i = 0; i < insn->prefixes.nbytes; i++) {
		if (insn->prefixes.bytes[i] == 0x66)
			return -ENOTSUPP;
	}

	auprobe->branch.opc1 = opc1;
	auprobe->branch.ilen = insn->length;
	auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +618,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
{
	struct insn insn;
	bool fix_ip = true, fix_call = false;
	u8 fix_ip_or_call = UPROBE_FIX_IP;
	int ret;

	ret = validate_insn_bits(auprobe, mm, &insn);
	ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
	if (ret)
		return ret;

@@ -642,44 +630,40 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
		return ret;

	/*
	 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
	 * is either zero or it reflects rip-related fixups.
	 * Figure out which fixups default_post_xol_op() will need to perform,
	 * and annotate def->fixups accordingly. To start with, ->fixups is
	 * either zero or it reflects rip-related fixups.
	 */
	switch (OPCODE1(&insn)) {
	case 0x9d:		/* popf */
		auprobe->fixups |= UPROBE_FIX_SETF;
		auprobe->def.fixups |= UPROBE_FIX_SETF;
		break;
	case 0xc3:		/* ret or lret -- ip is correct */
	case 0xcb:
	case 0xc2:
	case 0xca:
		fix_ip = false;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip_or_call = 0;
		break;
	case 0x9a:		/* call absolute - Fix return addr, not ip */
		fix_call = true;
		fix_ip = false;
		break;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip = false;
		fix_ip_or_call = UPROBE_FIX_CALL;
		break;
	case 0xff:
		insn_get_modrm(&insn);
		switch (MODRM_REG(&insn)) {
		case 2: case 3:			/* call or lcall, indirect */
			fix_call = true;
			fix_ip_or_call = UPROBE_FIX_CALL;
			break;
		case 4: case 5:			/* jmp or ljmp, indirect */
			fix_ip = false;
			fix_ip_or_call = 0;
			break;
		}
		/* fall through */
	default:
		handle_riprel_insn(auprobe, &insn);
		riprel_analyze(auprobe, &insn);
	}

	if (fix_ip)
		auprobe->fixups |= UPROBE_FIX_IP;
	if (fix_call)
		auprobe->fixups |= UPROBE_FIX_CALL;
	auprobe->def.ilen = insn.length;
	auprobe->def.fixups |= fix_ip_or_call;

	auprobe->ops = &default_xol_ops;
	return 0;
@@ -694,6 +678,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	struct uprobe_task *utask = current->utask;

	if (auprobe->ops->pre_xol) {
		int err = auprobe->ops->pre_xol(auprobe, regs);
		if (err)
			return err;
	}

	regs->ip = utask->xol_vaddr;
	utask->autask.saved_trap_nr = current->thread.trap_nr;
	current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +693,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
		set_task_blockstep(current, false);

	if (auprobe->ops->pre_xol)
		return auprobe->ops->pre_xol(auprobe, regs);
	return 0;
}

@@ -753,35 +741,38 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	struct uprobe_task *utask = current->utask;
	bool send_sigtrap = utask->autask.saved_tf;
	int err = 0;

	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
	current->thread.trap_nr = utask->autask.saved_trap_nr;

	if (auprobe->ops->post_xol) {
		int err = auprobe->ops->post_xol(auprobe, regs);
		err = auprobe->ops->post_xol(auprobe, regs);
		if (err) {
			arch_uprobe_abort_xol(auprobe, regs);
			/*
			 * Restart the probed insn. ->post_xol() must ensure
			 * this is really possible if it returns -ERESTART.
			 * Restore ->ip for restart or post mortem analysis.
			 * ->post_xol() must not return -ERESTART unless this
			 * is really possible.
			 */
			regs->ip = utask->vaddr;
			if (err == -ERESTART)
				return 0;
			return err;
				err = 0;
			send_sigtrap = false;
		}
	}

	current->thread.trap_nr = utask->autask.saved_trap_nr;
	/*
	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
	 * so we can get an extra SIGTRAP if we do not clear TF. We need
	 * to examine the opcode to make it right.
	 */
	if (utask->autask.saved_tf)
	if (send_sigtrap)
		send_sig(SIGTRAP, current, 0);
	else if (!(auprobe->fixups & UPROBE_FIX_SETF))

	if (!utask->autask.saved_tf)
		regs->flags &= ~X86_EFLAGS_TF;

	return 0;
	return err;
}

/* callback routine for handling exceptions. */
@@ -815,18 +806,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,

/*
 * This function gets called when XOL instruction either gets trapped or
 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
 * Reset the instruction pointer to its probed address for the potential
 * restart or for post mortem analysis.
 * the thread has a fatal signal. Reset the instruction pointer to its
 * probed address for the potential restart or for post mortem analysis.
 */
void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	struct uprobe_task *utask = current->utask;

	current->thread.trap_nr = utask->autask.saved_trap_nr;
	handle_riprel_post_xol(auprobe, regs, NULL);
	instruction_pointer_set(regs, utask->vaddr);
	if (auprobe->ops->abort)
		auprobe->ops->abort(auprobe, regs);

	current->thread.trap_nr = utask->autask.saved_trap_nr;
	regs->ip = utask->vaddr;
	/* clear TF if it was set by us in arch_uprobe_pre_xol() */
	if (!utask->autask.saved_tf)
		regs->flags &= ~X86_EFLAGS_TF;
+1 −1
Original line number Diff line number Diff line
@@ -127,7 +127,7 @@ struct xol_area {
 */
static bool valid_vma(struct vm_area_struct *vma, bool is_register)
{
	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;

	if (is_register)
		flags |= VM_WRITE;
+25 −21
Original line number Diff line number Diff line
@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
}

static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
{
	bool done;

	write_lock(&tu->filter.rwlock);
	if (event->hw.tp_target) {
		/*
		 * event->parent != NULL means copy_process(), we can avoid
		 * uprobe_apply(). current->mm must be probed and we can rely
		 * on dup_mmap() which preserves the already installed bp's.
		 *
		 * attr.enable_on_exec means that exec/mmap will install the
		 * breakpoints we need.
		 */
		list_del(&event->hw.tp_list);
		done = tu->filter.nr_systemwide ||
			event->parent || event->attr.enable_on_exec ||
			(event->hw.tp_target->flags & PF_EXITING) ||
			uprobe_filter_event(tu, event);
		list_add(&event->hw.tp_list, &tu->filter.perf_events);
	} else {
		tu->filter.nr_systemwide--;
		done = tu->filter.nr_systemwide;
		tu->filter.nr_systemwide++;
	}
	write_unlock(&tu->filter.rwlock);

	if (!done)
		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
		return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);

	return 0;
}

static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
{
	bool done;
	int err;

	write_lock(&tu->filter.rwlock);
	if (event->hw.tp_target) {
		list_del(&event->hw.tp_list);
		/*
		 * event->parent != NULL means copy_process(), we can avoid
		 * uprobe_apply(). current->mm must be probed and we can rely
		 * on dup_mmap() which preserves the already installed bp's.
		 *
		 * attr.enable_on_exec means that exec/mmap will install the
		 * breakpoints we need.
		 */
		done = tu->filter.nr_systemwide ||
			(event->hw.tp_target->flags & PF_EXITING) ||
			event->parent || event->attr.enable_on_exec ||
			uprobe_filter_event(tu, event);
		list_add(&event->hw.tp_list, &tu->filter.perf_events);
	} else {
		tu->filter.nr_systemwide--;
		done = tu->filter.nr_systemwide;
		tu->filter.nr_systemwide++;
	}
	write_unlock(&tu->filter.rwlock);

	if (!done)
		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);

	return 0;
	err = 0;
	if (!done) {
		err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
		if (err)
			uprobe_perf_close(tu, event);
	}
	return err;
}

static bool uprobe_perf_filter(struct uprobe_consumer *uc,