Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3d521f91 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull perf updates from Ingo Molnar:
 "The tooling changes maintained by Jiri Olsa until Arnaldo is on
  vacation:

  User visible changes:
   - Add -F option for specifying output fields (Namhyung Kim)
   - Propagate exit status of a command line workload for record command
     (Namhyung Kim)
   - Use tid for finding thread (Namhyung Kim)
   - Clarify the output of perf sched map plus small sched command
     fixes (Dongsheng Yang)
   - Wire up perf_regs and unwind support for ARM64 (Jean Pihet)
   - Factor hists statistics counts processing which in turn also fixes
     several bugs in TUI report command (Namhyung Kim)
   - Add --percentage option to control absolute/relative percentage
     output (Namhyung Kim)
   - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by
     completion scripts (Ramkumar Ramachandra)

  Development/infrastructure changes and fixes:
   - Android related fixes for pager and map dso resolving (Michael
     Lentine)
   - Add libdw DWARF post unwind support for ARM (Jean Pihet)
   - Consolidate types.h for ARM and ARM64 (Jean Pihet)
   - Fix possible null pointer dereference in session.c (Masanari Iida)
   - Cleanup, remove unused variables in map_switch_event() (Dongsheng
     Yang)
   - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)
   - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)
   - Cleanups for perf.h header (Jiri Olsa)
   - Consolidate types.h and export.h within tools (Borislav Petkov)
   - Move u64_swap union to its single user's header, evsel.h (Borislav
     Petkov)
   - Fix for s390 to properly parse tracepoints plus test code
     (Alexander Yarygin)
   - Handle EINTR error for readn/writen (Namhyung Kim)
   - Add a test case for hists filtering (Namhyung Kim)
   - Share map_groups among threads of the same group (Arnaldo Carvalho
     de Melo, Jiri Olsa)
   - Making some code (cpu node map and report parse callchain callback)
     global to be usable by upcomming changes (Don Zickus)
   - Fix pmu object compilation error (Jiri Olsa)

  Kernel side changes:
   - intrusive uprobes fixes from Oleg Nesterov.  Since the interface is
     admin-only, and the bug only affects user-space ("any probed
     jmp/call can kill the application"), we queued these fixes via the
     development tree, as a special exception.
   - more fuzzer motivated race fixes and related refactoring and
     robustization.
   - allow PMU drivers to be built as modules.  (No actual module yet,
     because the x86 Intel uncore module wasn't ready in time for this)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits)
  perf tools: Add automatic remapping of Android libraries
  perf tools: Add cat as fallback pager
  perf tests: Add a testcase for histogram output sorting
  perf tests: Factor out print_hists_*()
  perf tools: Introduce reset_output_field()
  perf tools: Get rid of obsolete hist_entry__sort_list
  perf hists: Reset width of output fields with header length
  perf tools: Skip elided sort entries
  perf top: Add --fields option to specify output fields
  perf report/tui: Fix a bug when --fields/sort is given
  perf tools: Add ->sort() member to struct sort_entry
  perf report: Add -F option to specify output fields
  perf tools: Call perf_hpp__init() before setting up GUI browsers
  perf tools: Consolidate management of default sort orders
  perf tools: Allow hpp fields to be sort keys
  perf ui: Get rid of callback from __hpp__fmt()
  perf tools: Consolidate output field handling to hpp format routines
  perf tools: Use hpp formats to sort final output
  perf tools: Support event grouping in hpp ->sort()
  perf tools: Use hpp formats to sort hist entries
  ...
parents 776edb59 e450f90e
Loading
Loading
Loading
Loading
+14 −2
Original line number Diff line number Diff line
@@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN		0xcc
#define UPROBE_SWBP_INSN_SIZE		   1

struct uprobe_xol_ops;

struct arch_uprobe {
	u16				fixups;
	union {
		u8			insn[MAX_UINSN_BYTES];
		u8			ixol[MAX_UINSN_BYTES];
	};

	u16				fixups;
	const struct uprobe_xol_ops	*ops;

	union {
#ifdef CONFIG_X86_64
		unsigned long			rip_rela_target_address;
#endif
		struct {
			s32	offs;
			u8	ilen;
			u8	opc1;
		}				branch;
	};
};

struct arch_uprobe_task {
+1 −0
Original line number Diff line number Diff line
@@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n,

	return sched.state.unassigned;
}
EXPORT_SYMBOL_GPL(perf_assign_events);

int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
+19 −3
Original line number Diff line number Diff line
@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
	return val;
}

static u64 precise_store_data_hsw(u64 status)
static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{
	union perf_mem_data_src dse;
	u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;

	dse.val = 0;
	dse.mem_op = PERF_MEM_OP_STORE;
	dse.mem_lvl = PERF_MEM_LVL_NA;

	/*
	 * L1 info only valid for following events:
	 *
	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
	 * MEM_UOPS_RETIRED.LOCK_STORES
	 * MEM_UOPS_RETIRED.SPLIT_STORES
	 * MEM_UOPS_RETIRED.ALL_STORES
	 */
	if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
		return dse.mem_lvl;

	if (status & 1)
		dse.mem_lvl = PERF_MEM_LVL_L1;
		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
	else
		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;

	/* Nothing else supported. Sorry. */
	return dse.val;
}
@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
				data.data_src.val = load_latency_data(pebs->dse);
			else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
				data.data_src.val =
					precise_store_data_hsw(pebs->dse);
					precise_store_data_hsw(event, pebs->dse);
			else
				data.data_src.val = precise_store_data(pebs->dse);
		}
+349 −202
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@
#define OPCODE1(insn)		((insn)->opcode.bytes[0])
#define OPCODE2(insn)		((insn)->opcode.bytes[1])
#define OPCODE3(insn)		((insn)->opcode.bytes[2])
#define MODRM_REG(insn)		X86_MODRM_REG(insn->modrm.value)
#define MODRM_REG(insn)		X86_MODRM_REG((insn)->modrm.value)

#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
@@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
	return -ENOTSUPP;
}

/*
 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
 * annotate arch_uprobe->fixups accordingly.  To start with,
 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
 */
static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
{
	bool fix_ip = true, fix_call = false;	/* defaults */
	int reg;

	insn_get_opcode(insn);	/* should be a nop */

	switch (OPCODE1(insn)) {
	case 0x9d:
		/* popf */
		auprobe->fixups |= UPROBE_FIX_SETF;
		break;
	case 0xc3:		/* ret/lret */
	case 0xcb:
	case 0xc2:
	case 0xca:
		/* ip is correct */
		fix_ip = false;
		break;
	case 0xe8:		/* call relative - Fix return addr */
		fix_call = true;
		break;
	case 0x9a:		/* call absolute - Fix return addr, not ip */
		fix_call = true;
		fix_ip = false;
		break;
	case 0xff:
		insn_get_modrm(insn);
		reg = MODRM_REG(insn);
		if (reg == 2 || reg == 3) {
			/* call or lcall, indirect */
			/* Fix return addr; ip is correct. */
			fix_call = true;
			fix_ip = false;
		} else if (reg == 4 || reg == 5) {
			/* jmp or ljmp, indirect */
			/* ip is correct. */
			fix_ip = false;
		}
		break;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip = false;
		break;
	default:
		break;
	}
	if (fix_ip)
		auprobe->fixups |= UPROBE_FIX_IP;
	if (fix_call)
		auprobe->fixups |= UPROBE_FIX_CALL;
}

#ifdef CONFIG_X86_64
/*
 * If arch_uprobe->insn doesn't use rip-relative addressing, return
@@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
 *  - The displacement is always 4 bytes.
 */
static void
handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
{
	u8 *cursor;
	u8 reg;

	if (mm->context.ia32_compat)
		return;

	auprobe->rip_rela_target_address = 0x0;
	if (!insn_rip_relative(insn))
		return;

@@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins
		cursor++;
		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
	}
	return;
}

/*
 * If we're emulating a rip-relative instruction, save the contents
 * of the scratch register and store the target address in that register.
 */
static void
pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)
{
	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
		autask->saved_scratch_register = regs->ax;
		regs->ax = current->utask->vaddr;
		regs->ax += auprobe->rip_rela_target_address;
	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
		autask->saved_scratch_register = regs->cx;
		regs->cx = current->utask->vaddr;
		regs->cx += auprobe->rip_rela_target_address;
	}
}

static void
handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
{
	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
		struct arch_uprobe_task *autask;

		autask = &current->utask->autask;
		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
			regs->ax = autask->saved_scratch_register;
		else
			regs->cx = autask->saved_scratch_register;

		/*
		 * The original instruction includes a displacement, and so
		 * is 4 bytes longer than what we've just single-stepped.
		 * Caller may need to apply other fixups to handle stuff
		 * like "jmpq *...(%rip)" and "callq *...(%rip)".
		 */
		if (correction)
			*correction += 4;
	}
}

static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
@@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
	return validate_insn_64bits(auprobe, insn);
}
#else /* 32-bit: */
static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
/*
 * No RIP-relative addressing on 32-bit
 */
static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
{
}
static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)
{
}
static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
					long *correction)
{
	/* No RIP-relative addressing on 32-bit */
}

static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
@@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
}
#endif /* CONFIG_X86_64 */

/**
 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
 * @mm: the probed address space.
 * @arch_uprobe: the probepoint information.
 * @addr: virtual address at which to install the probepoint
 * Return 0 on success or a -ve number on error.
struct uprobe_xol_ops {
	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *);
	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *);
	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *);
};

static inline int sizeof_long(void)
{
	return is_ia32_task() ? 4 : 8;
}

static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
	return 0;
}

/*
 * Adjust the return address pushed by a call insn executed out of line.
 */
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
static int adjust_ret_addr(unsigned long sp, long correction)
{
	int ret;
	struct insn insn;
	int rasize = sizeof_long();
	long ra;

	auprobe->fixups = 0;
	ret = validate_insn_bits(auprobe, mm, &insn);
	if (ret != 0)
		return ret;
	if (copy_from_user(&ra, (void __user *)sp, rasize))
		return -EFAULT;

	handle_riprel_insn(auprobe, mm, &insn);
	prepare_fixups(auprobe, &insn);
	ra += correction;
	if (copy_to_user((void __user *)sp, &ra, rasize))
		return -EFAULT;

	return 0;
}

#ifdef CONFIG_X86_64
/*
 * If we're emulating a rip-relative instruction, save the contents
 * of the scratch register and store the target address in that register.
 */
static void
pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)
static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
		autask->saved_scratch_register = regs->ax;
		regs->ax = current->utask->vaddr;
		regs->ax += auprobe->rip_rela_target_address;
	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
		autask->saved_scratch_register = regs->cx;
		regs->cx = current->utask->vaddr;
		regs->cx += auprobe->rip_rela_target_address;
	struct uprobe_task *utask = current->utask;
	long correction = (long)(utask->vaddr - utask->xol_vaddr);

	handle_riprel_post_xol(auprobe, regs, &correction);
	if (auprobe->fixups & UPROBE_FIX_IP)
		regs->ip += correction;

	if (auprobe->fixups & UPROBE_FIX_CALL) {
		if (adjust_ret_addr(regs->sp, correction)) {
			regs->sp += sizeof_long();
			return -ERESTART;
		}
	}
#else
static void
pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
				struct arch_uprobe_task *autask)

	return 0;
}

static struct uprobe_xol_ops default_xol_ops = {
	.pre_xol  = default_pre_xol_op,
	.post_xol = default_post_xol_op,
};

static bool branch_is_call(struct arch_uprobe *auprobe)
{
	/* No RIP-relative addressing on 32-bit */
	return auprobe->branch.opc1 == 0xe8;
}
#endif

/*
 * arch_uprobe_pre_xol - prepare to execute out of line.
 * @auprobe: the probepoint information.
 * @regs: reflects the saved user state of current task.
 */
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
#define CASE_COND					\
	COND(70, 71, XF(OF))				\
	COND(72, 73, XF(CF))				\
	COND(74, 75, XF(ZF))				\
	COND(78, 79, XF(SF))				\
	COND(7a, 7b, XF(PF))				\
	COND(76, 77, XF(CF) || XF(ZF))			\
	COND(7c, 7d, XF(SF) != XF(OF))			\
	COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF))

#define COND(op_y, op_n, expr)				\
	case 0x ## op_y: DO((expr) != 0)		\
	case 0x ## op_n: DO((expr) == 0)

#define XF(xf)	(!!(flags & X86_EFLAGS_ ## xf))

static bool is_cond_jmp_opcode(u8 opcode)
{
	struct arch_uprobe_task *autask;
	switch (opcode) {
	#define DO(expr)	\
		return true;
	CASE_COND
	#undef	DO

	autask = &current->utask->autask;
	autask->saved_trap_nr = current->thread.trap_nr;
	current->thread.trap_nr = UPROBE_TRAP_NR;
	regs->ip = current->utask->xol_vaddr;
	pre_xol_rip_insn(auprobe, regs, autask);
	default:
		return false;
	}
}

	autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
	regs->flags |= X86_EFLAGS_TF;
	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
		set_task_blockstep(current, false);
static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	unsigned long flags = regs->flags;

	return 0;
	switch (auprobe->branch.opc1) {
	#define DO(expr)	\
		return expr;
	CASE_COND
	#undef	DO

	default:	/* not a conditional jmp */
		return true;
	}
}

#undef	XF
#undef	COND
#undef	CASE_COND

static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	unsigned long new_ip = regs->ip += auprobe->branch.ilen;
	unsigned long offs = (long)auprobe->branch.offs;

	if (branch_is_call(auprobe)) {
		unsigned long new_sp = regs->sp - sizeof_long();
		/*
 * This function is called by arch_uprobe_post_xol() to adjust the return
 * address pushed by a call instruction executed out of line.
		 * If it fails we execute this (mangled, see the comment in
		 * branch_clear_offset) insn out-of-line. In the likely case
		 * this should trigger the trap, and the probed application
		 * should die or restart the same insn after it handles the
		 * signal, arch_uprobe_post_xol() won't be even called.
		 *
		 * But there is corner case, see the comment in ->post_xol().
		 */
static int adjust_ret_addr(unsigned long sp, long correction)
		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
			return false;
		regs->sp = new_sp;
	} else if (!check_jmp_cond(auprobe, regs)) {
		offs = 0;
	}

	regs->ip = new_ip + offs;
	return true;
}

static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	int rasize, ncopied;
	long ra = 0;
	BUG_ON(!branch_is_call(auprobe));
	/*
	 * We can only get here if branch_emulate_op() failed to push the ret
	 * address _and_ another thread expanded our stack before the (mangled)
	 * "call" insn was executed out-of-line. Just restore ->sp and restart.
	 * We could also restore ->ip and try to call branch_emulate_op() again.
	 */
	regs->sp += sizeof_long();
	return -ERESTART;
}

	if (is_ia32_task())
		rasize = 4;
	else
		rasize = 8;
static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
{
	/*
	 * Turn this insn into "call 1f; 1:", this is what we will execute
	 * out-of-line if ->emulate() fails. We only need this to generate
	 * a trap, so that the probed task receives the correct signal with
	 * the properly filled siginfo.
	 *
	 * But see the comment in ->post_xol(), in the unlikely case it can
	 * succeed. So we need to ensure that the new ->ip can not fall into
	 * the non-canonical area and trigger #GP.
	 *
	 * We could turn it into (say) "pushf", but then we would need to
	 * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte
	 * of ->insn[] for set_orig_insn().
	 */
	memset(auprobe->insn + insn_offset_immediate(insn),
		0, insn->immediate.nbytes);
}

	ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
	if (unlikely(ncopied))
		return -EFAULT;
static struct uprobe_xol_ops branch_xol_ops = {
	.emulate  = branch_emulate_op,
	.post_xol = branch_post_xol_op,
};

	ra += correction;
	ncopied = copy_to_user((void __user *)sp, &ra, rasize);
	if (unlikely(ncopied))
		return -EFAULT;
/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
	u8 opc1 = OPCODE1(insn);

	return 0;
	/* has the side-effect of processing the entire instruction */
	insn_get_length(insn);
	if (WARN_ON_ONCE(!insn_complete(insn)))
		return -ENOEXEC;

	switch (opc1) {
	case 0xeb:	/* jmp 8 */
	case 0xe9:	/* jmp 32 */
	case 0x90:	/* prefix* + nop; same as jmp with .offs = 0 */
		break;

	case 0xe8:	/* call relative */
		branch_clear_offset(auprobe, insn);
		break;

	case 0x0f:
		if (insn->opcode.nbytes != 2)
			return -ENOSYS;
		/*
		 * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches
		 * OPCODE1() of the "short" jmp which checks the same condition.
		 */
		opc1 = OPCODE2(insn) - 0x10;
	default:
		if (!is_cond_jmp_opcode(opc1))
			return -ENOSYS;
	}

#ifdef CONFIG_X86_64
static bool is_riprel_insn(struct arch_uprobe *auprobe)
{
	return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
	auprobe->branch.opc1 = opc1;
	auprobe->branch.ilen = insn->length;
	auprobe->branch.offs = insn->immediate.value;

	auprobe->ops = &branch_xol_ops;
	return 0;
}

static void
handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
/**
 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
 * @mm: the probed address space.
 * @arch_uprobe: the probepoint information.
 * @addr: virtual address at which to install the probepoint
 * Return 0 on success or a -ve number on error.
 */
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
{
	if (is_riprel_insn(auprobe)) {
		struct arch_uprobe_task *autask;
	struct insn insn;
	bool fix_ip = true, fix_call = false;
	int ret;

		autask = &current->utask->autask;
		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
			regs->ax = autask->saved_scratch_register;
		else
			regs->cx = autask->saved_scratch_register;
	ret = validate_insn_bits(auprobe, mm, &insn);
	if (ret)
		return ret;

	ret = branch_setup_xol_ops(auprobe, &insn);
	if (ret != -ENOSYS)
		return ret;

	/*
		 * The original instruction includes a displacement, and so
		 * is 4 bytes longer than what we've just single-stepped.
		 * Fall through to handle stuff like "jmpq *...(%rip)" and
		 * "callq *...(%rip)".
	 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
	 * is either zero or it reflects rip-related fixups.
	 */
		if (correction)
			*correction += 4;
	switch (OPCODE1(&insn)) {
	case 0x9d:		/* popf */
		auprobe->fixups |= UPROBE_FIX_SETF;
		break;
	case 0xc3:		/* ret or lret -- ip is correct */
	case 0xcb:
	case 0xc2:
	case 0xca:
		fix_ip = false;
		break;
	case 0x9a:		/* call absolute - Fix return addr, not ip */
		fix_call = true;
		fix_ip = false;
		break;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip = false;
		break;
	case 0xff:
		insn_get_modrm(&insn);
		switch (MODRM_REG(&insn)) {
		case 2: case 3:			/* call or lcall, indirect */
			fix_call = true;
		case 4: case 5:			/* jmp or ljmp, indirect */
			fix_ip = false;
		}
		/* fall through */
	default:
		handle_riprel_insn(auprobe, &insn);
	}
#else
static void
handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)

	if (fix_ip)
		auprobe->fixups |= UPROBE_FIX_IP;
	if (fix_call)
		auprobe->fixups |= UPROBE_FIX_CALL;

	auprobe->ops = &default_xol_ops;
	return 0;
}

/*
 * arch_uprobe_pre_xol - prepare to execute out of line.
 * @auprobe: the probepoint information.
 * @regs: reflects the saved user state of current task.
 */
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	/* No RIP-relative addressing on 32-bit */
	struct uprobe_task *utask = current->utask;

	regs->ip = utask->xol_vaddr;
	utask->autask.saved_trap_nr = current->thread.trap_nr;
	current->thread.trap_nr = UPROBE_TRAP_NR;

	utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF);
	regs->flags |= X86_EFLAGS_TF;
	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
		set_task_blockstep(current, false);

	if (auprobe->ops->pre_xol)
		return auprobe->ops->pre_xol(auprobe, regs);
	return 0;
}
#endif

/*
 * If xol insn itself traps and generates a signal(Say,
@@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
 */
int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	struct uprobe_task *utask;
	long correction;
	int result = 0;
	struct uprobe_task *utask = current->utask;

	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);

	utask = current->utask;
	current->thread.trap_nr = utask->autask.saved_trap_nr;
	correction = (long)(utask->vaddr - utask->xol_vaddr);
	handle_riprel_post_xol(auprobe, regs, &correction);
	if (auprobe->fixups & UPROBE_FIX_IP)
		regs->ip += correction;

	if (auprobe->fixups & UPROBE_FIX_CALL)
		result = adjust_ret_addr(regs->sp, correction);
	if (auprobe->ops->post_xol) {
		int err = auprobe->ops->post_xol(auprobe, regs);
		if (err) {
			arch_uprobe_abort_xol(auprobe, regs);
			/*
			 * Restart the probed insn. ->post_xol() must ensure
			 * this is really possible if it returns -ERESTART.
			 */
			if (err == -ERESTART)
				return 0;
			return err;
		}
	}

	current->thread.trap_nr = utask->autask.saved_trap_nr;
	/*
	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
	 * so we can get an extra SIGTRAP if we do not clear TF. We need
@@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
		regs->flags &= ~X86_EFLAGS_TF;

	return result;
	return 0;
}

/* callback routine for handling exceptions. */
@@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,

/*
 * This function gets called when XOL instruction either gets trapped or
 * the thread has a fatal signal, so reset the instruction pointer to its
 * probed address.
 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
 * Reset the instruction pointer to its probed address for the potential
 * restart or for post mortem analysis.
 */
void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
@@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
		regs->flags &= ~X86_EFLAGS_TF;
}

/*
 * Skip these instructions as per the currently known x86 ISA.
 * rep=0x66*; nop=0x90
 */
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	int i;

	for (i = 0; i < MAX_UINSN_BYTES; i++) {
		if (auprobe->insn[i] == 0x66)
			continue;

		if (auprobe->insn[i] == 0x90) {
			regs->ip += i + 1;
			return true;
		}

		break;
	}
	if (auprobe->ops->emulate)
		return auprobe->ops->emulate(auprobe, regs);
	return false;
}

@@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
	int rasize, ncopied;
	int rasize = sizeof_long(), nleft;
	unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */

	rasize = is_ia32_task() ? 4 : 8;
	ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize);
	if (unlikely(ncopied))
	if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
		return -1;

	/* check whether address has been already hijacked */
	if (orig_ret_vaddr == trampoline_vaddr)
		return orig_ret_vaddr;

	ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
	if (likely(!ncopied))
	nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
	if (likely(!nleft))
		return orig_ret_vaddr;

	if (ncopied != rasize) {
	if (nleft != rasize) {
		pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
			"%%ip=%#lx\n", current->pid, regs->sp, regs->ip);

+1 −0
Original line number Diff line number Diff line
@@ -172,6 +172,7 @@ struct perf_event;
struct pmu {
	struct list_head		entry;

	struct module			*module;
	struct device			*dev;
	const struct attribute_group	**attr_groups;
	const char			*name;
Loading