Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d4d67150 authored by Roland McGrath's avatar Roland McGrath
Browse files

x86 ptrace: unify syscall tracing



This unifies and cleans up the syscall tracing code on i386 and x86_64.

Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti.  The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.

The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks.  It behaves just like 32-bit.

Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.

Signed-off-by: default avatarRoland McGrath <roland@redhat.com>
parent 64f09733
Loading
Loading
Loading
Loading
+9 −8
Original line number Diff line number Diff line
@@ -37,6 +37,11 @@
	movq	%rax,R8(%rsp)
	.endm

	/*
	 * Reload arg registers from stack in case ptrace changed them.
	 * We don't reload %eax because syscall_trace_enter() returned
	 * the value it wants us to use in the table lookup.
	 */
	.macro LOAD_ARGS32 offset
	movl \offset(%rsp),%r11d
	movl \offset+8(%rsp),%r10d
@@ -46,7 +51,6 @@
	movl \offset+48(%rsp),%edx
	movl \offset+56(%rsp),%esi
	movl \offset+64(%rsp),%edi
	movl \offset+72(%rsp),%eax
	.endm
	
	.macro CFI_STARTPROC32 simple
@@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target)
 	.previous	
	GET_THREAD_INFO(%r10)
	orl    $TS_COMPAT,TI_status(%r10)
	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
		 TI_flags(%r10)
	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
	CFI_REMEMBER_STATE
	jnz  sysenter_tracesys
sysenter_do_call:	
	cmpl	$(IA32_NR_syscalls-1),%eax
	ja	ia32_badsys
sysenter_do_call:
	IA32_ARG_FIXUP 1
	call	*ia32_sys_call_table(,%rax,8)
	movq	%rax,RAX-ARGOFFSET(%rsp)
@@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target)
	.previous	
	GET_THREAD_INFO(%r10)
	orl   $TS_COMPAT,TI_status(%r10)
	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
		TI_flags(%r10)
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
	CFI_REMEMBER_STATE
	jnz   cstar_tracesys
cstar_do_call:	
@@ -336,8 +338,7 @@ ENTRY(ia32_syscall)
	SAVE_ARGS 0,0,1
	GET_THREAD_INFO(%r10)
	orl   $TS_COMPAT,TI_status(%r10)
	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
		TI_flags(%r10)
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
	jnz ia32_tracesys
ia32_do_syscall:	
	cmpl $(IA32_NR_syscalls-1),%eax
+7 −12
Original line number Diff line number Diff line
@@ -332,7 +332,7 @@ sysenter_past_esp:
	GET_THREAD_INFO(%ebp)

	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
	GET_THREAD_INFO(%ebp)
					# system call tracing in operation / emulation
	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
@@ -510,12 +510,8 @@ END(work_pending)
syscall_trace_entry:
	movl $-ENOSYS,PT_EAX(%esp)
	movl %esp, %eax
	xorl %edx,%edx
	call do_syscall_trace
	cmpl $0, %eax
	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
					# so must skip actual syscall
	movl PT_ORIG_EAX(%esp), %eax
	call syscall_trace_enter
	/* What it returned is what we'll actually use.  */
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit
@@ -524,14 +520,13 @@ END(syscall_trace_entry)
	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
	testb $_TIF_WORK_SYSCALL_EXIT, %cl
	jz work_pending
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let do_syscall_trace() call
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
					# schedule() instead
	movl %esp, %eax
	movl $1, %edx
	call do_syscall_trace
	call syscall_trace_leave
	jmp resume_userspace
END(syscall_exit_work)
	CFI_ENDPROC
+9 −5
Original line number Diff line number Diff line
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
	movq  %rcx,RIP-ARGOFFSET(%rsp)
	CFI_REL_OFFSET rip,RIP-ARGOFFSET
	GET_THREAD_INFO(%rcx)
	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
		TI_flags(%rcx)
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
	jnz tracesys
	cmpq $__NR_syscall_max,%rax
	ja badsys
@@ -430,7 +429,12 @@ tracesys:
	FIXUP_TOP_OF_STACK %rdi
	movq %rsp,%rdi
	call syscall_trace_enter
	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
	/*
	 * Reload arg registers from stack in case ptrace changed them.
	 * We don't reload %rax because syscall_trace_enter() returned
	 * the value it wants us to use in the table lookup.
	 */
	LOAD_ARGS ARGOFFSET, 1
	RESTORE_REST
	cmpq $__NR_syscall_max,%rax
	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
	ENABLE_INTERRUPTS(CLBR_NONE)
	SAVE_REST
	/* Check for syscall exit trace */	
	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
	testl $_TIF_WORK_SYSCALL_EXIT,%edx
	jz int_signal
	pushq %rdi
	CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
	call syscall_trace_leave
	popq %rdi
	CFI_ADJUST_CFA_OFFSET -8
	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
	jmp int_restore_rest
	
int_signal:
+46 −95
Original line number Diff line number Diff line
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}

#ifdef CONFIG_X86_32

void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
{
	struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
	force_sig_info(SIGTRAP, &info, tsk);
}

/* notification of system call entry/exit
 * - triggered by current->work.syscall_trace
 */
int do_syscall_trace(struct pt_regs *regs, int entryexit)
{
	int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
	/*
	 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
	 * interception
	 */
	int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
	int ret = 0;

	/* do the secure computing check first */
	if (!entryexit)
		secure_computing(regs->orig_ax);

	if (unlikely(current->audit_context)) {
		if (entryexit)
			audit_syscall_exit(AUDITSC_RESULT(regs->ax),
						regs->ax);
		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
		 * not used, entry.S will call us only on syscall exit, not
		 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
		 * calling send_sigtrap() on syscall entry.
		 *
		 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
		 * is_singlestep is false, despite his name, so we will still do
		 * the correct thing.
		 */
		else if (is_singlestep)
			goto out;
	}

	if (!(current->ptrace & PT_PTRACED))
		goto out;

	/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
	 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
	 * here. We have to check this and return */
	if (is_sysemu && entryexit)
		return 0;

	/* Fake a debug trap */
	if (is_singlestep)
		send_sigtrap(current, regs, 0);

 	if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
		goto out;

	/* the 0x80 provides a way for the tracing parent to distinguish
	   between a syscall stop and SIGTRAP delivery */
	/* Note that the debugger could change the result of test_thread_flag!*/
	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));

	/*
	 * this isn't the same as continuing with a signal, but it will do
	 * for normal use.  strace only continues with a signal if the
	 * stopping signal is not SIGTRAP.  -brl
	 */
	if (current->exit_code) {
		send_sig(current->exit_code, current, 1);
		current->exit_code = 0;
	}
	ret = is_sysemu;
out:
	if (unlikely(current->audit_context) && !entryexit)
		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
				    regs->bx, regs->cx, regs->dx, regs->si);
	if (ret == 0)
		return 0;

	regs->orig_ax = -1; /* force skip of syscall restarting */
	if (unlikely(current->audit_context))
		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
	return 1;
}

#else  /* CONFIG_X86_64 */

static void syscall_trace(struct pt_regs *regs)
{
	if (!(current->ptrace & PT_PTRACED))
		return;

#if 0
	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs)
	}
}

asmlinkage void syscall_trace_enter(struct pt_regs *regs)
#ifdef CONFIG_X86_32
# define IS_IA32	1
#elif defined CONFIG_IA32_EMULATION
# define IS_IA32	test_thread_flag(TIF_IA32)
#else
# define IS_IA32	0
#endif

/*
 * We must return the syscall number to actually look up in the table.
 * This can be -1L to skip running any syscall at all.
 */
asmregparm long syscall_trace_enter(struct pt_regs *regs)
{
	long ret = 0;

	/* do the secure computing check first */
	secure_computing(regs->orig_ax);

	if (test_thread_flag(TIF_SYSCALL_TRACE)
	    && (current->ptrace & PT_PTRACED))
	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
		ret = -1L;

	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
		syscall_trace(regs);

	if (unlikely(current->audit_context)) {
		if (test_thread_flag(TIF_IA32)) {
		if (IS_IA32)
			audit_syscall_entry(AUDIT_ARCH_I386,
					    regs->orig_ax,
					    regs->bx, regs->cx,
					    regs->dx, regs->si);
		} else {
#ifdef CONFIG_X86_64
		else
			audit_syscall_entry(AUDIT_ARCH_X86_64,
					    regs->orig_ax,
					    regs->di, regs->si,
					    regs->dx, regs->r10);
#endif
	}
	}

	return ret ?: regs->orig_ax;
}

asmlinkage void syscall_trace_leave(struct pt_regs *regs)
asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
	if (unlikely(current->audit_context))
		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);

	if ((test_thread_flag(TIF_SYSCALL_TRACE)
	     || test_thread_flag(TIF_SINGLESTEP))
	    && (current->ptrace & PT_PTRACED))
	if (test_thread_flag(TIF_SYSCALL_TRACE))
		syscall_trace(regs);
}

#endif	/* CONFIG_X86_32 */
	/*
	 * If TIF_SYSCALL_EMU is set, we only get here because of
	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
	 * We already reported this syscall instruction in
	 * syscall_trace_enter(), so don't do any more now.
	 */
	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
		return;

	/*
	 * If we are single-stepping, synthesize a trap to follow the
	 * system call instruction.
	 */
	if (test_thread_flag(TIF_SINGLESTEP) &&
	    (current->ptrace & PT_PTRACED))
		send_sigtrap(current, regs, 0);
}
+4 −2
Original line number Diff line number Diff line
@@ -104,7 +104,7 @@
	.endif
	.endm

	.macro LOAD_ARGS offset
	.macro LOAD_ARGS offset, skiprax=0
	movq \offset(%rsp),    %r11
	movq \offset+8(%rsp),  %r10
	movq \offset+16(%rsp), %r9
@@ -113,7 +113,10 @@
	movq \offset+48(%rsp), %rdx
	movq \offset+56(%rsp), %rsi
	movq \offset+64(%rsp), %rdi
	.if \skiprax
	.else
	movq \offset+72(%rsp), %rax
	.endif
	.endm

#define REST_SKIP	6*8
@@ -165,4 +168,3 @@
	.macro icebp
	.byte 0xf1
	.endm
Loading