Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f02a38d8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branches 'perf-fixes-for-linus' and 'x86-fixes-for-linus' of...

Merge branches 'perf-fixes-for-linus' and 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  jump label: Add work around to i386 gcc asm goto bug
  x86, ftrace: Use safe noops, drop trap test
  jump_label: Fix unaligned traps on sparc.
  jump label: Make arch_jump_label_text_poke_early() optional
  jump label: Fix error with preempt disable holding mutex
  oprofile: Remove deprecated use of flush_scheduled_work()
  oprofile: Fix the hang while taking the cpu offline
  jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex
  jump label: Fix module __init section race

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: Check irq_remapped instead of remapping_enabled in destroy_irq()
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -42,6 +42,20 @@ config KPROBES
	  for kernel debugging, non-intrusive instrumentation and testing.
	  If in doubt, say "N".

config JUMP_LABEL
       bool "Optimize trace point call sites"
       depends on HAVE_ARCH_JUMP_LABEL
       help
         If it is detected that the compiler has support for "asm goto",
	 the kernel will compile trace point locations with just a
	 nop instruction. When trace points are enabled, the nop will
	 be converted to a jump to the trace function. This technique
	 lowers overhead and stress on the branch prediction of the
	 processor.

	 On i386, options added to the compiler flags may increase
	 the size of the kernel slightly.

config OPTPROBES
	def_bool y
	depends on KPROBES && HAVE_OPTPROBES
+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
			 "nop\n\t"				\
			 "nop\n\t"				\
			 ".pushsection __jump_table,  \"a\"\n\t"\
			 ".align 4\n\t"				\
			 ".word 1b, %l[" #label "], %c0\n\t"	\
			 ".popsection \n\t"			\
			 : :  "i" (key) :  : label);\
+12 −1
Original line number Diff line number Diff line
@@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

# Work around to a bug with asm goto with first implementations of it
# in gcc causing gcc to mess up the push and pop of the stack in some
# uses of asm goto.
ifeq ($(CONFIG_JUMP_LABEL), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)

# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
+15 −54
Original line number Diff line number Diff line
@@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
#ifdef CONFIG_X86_64
unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
#else
unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
#endif

void __init arch_init_ideal_nop5(void)
{
	extern const unsigned char ftrace_test_p6nop[];
	extern const unsigned char ftrace_test_nop5[];
	extern const unsigned char ftrace_test_jmp[];
	int faulted = 0;

	/*
	 * There is no good nop for all x86 archs.
	 * We will default to using the P6_NOP5, but first we
	 * will test to make sure that the nop will actually
	 * work on this CPU. If it faults, we will then
	 * go to a lesser efficient 5 byte nop. If that fails
	 * we then just use a jmp as our nop. This isn't the most
	 * efficient nop, but we can not use a multi part nop
	 * since we would then risk being preempted in the middle
	 * of that nop, and if we enabled tracing then, it might
	 * cause a system crash.
	 * There is no good nop for all x86 archs.  This selection
	 * algorithm should be unified with the one in find_nop_table(),
	 * but this should be good enough for now.
	 *
	 * TODO: check the cpuid to determine the best nop.
	 * For cases other than the ones below, use the safe (as in
	 * always functional) defaults above.
	 */
	asm volatile (
		"ftrace_test_jmp:"
		"jmp ftrace_test_p6nop\n"
		"nop\n"
		"nop\n"
		"nop\n"  /* 2 byte jmp + 3 bytes */
		"ftrace_test_p6nop:"
		P6_NOP5
		"jmp 1f\n"
		"ftrace_test_nop5:"
		".byte 0x66,0x66,0x66,0x66,0x90\n"
		"1:"
		".section .fixup, \"ax\"\n"
		"2:	movl $1, %0\n"
		"	jmp ftrace_test_nop5\n"
		"3:	movl $2, %0\n"
		"	jmp 1b\n"
		".previous\n"
		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
		_ASM_EXTABLE(ftrace_test_nop5, 3b)
		: "=r"(faulted) : "0" (faulted));

	switch (faulted) {
	case 0:
		pr_info("converting mcount calls to 0f 1f 44 00 00\n");
		memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
		break;
	case 1:
		pr_info("converting mcount calls to 66 66 66 66 90\n");
		memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
		break;
	case 2:
		pr_info("converting mcount calls to jmp . + 5\n");
		memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
		break;
	}

#ifdef CONFIG_X86_64
	/* Don't use these on 32 bits due to broken virtualizers */
	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
		memcpy(ideal_nop5, p6_nops[5], 5);
#endif
}
#endif
+1 −1
Original line number Diff line number Diff line
@@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq)

	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);

	if (intr_remapping_enabled)
	if (irq_remapped(cfg))
		free_irte(irq);
	raw_spin_lock_irqsave(&vector_lock, flags);
	__clear_irq_vector(irq, cfg);
Loading