Loading arch/Kconfig +1 −17 Original line number Diff line number Diff line Loading @@ -3,11 +3,9 @@ # config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE depends on TRACING_SUPPORT select TRACING select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help Loading @@ -17,20 +15,6 @@ config OPROFILE If unsure, say N. config OPROFILE_IBS bool "OProfile AMD IBS support (EXPERIMENTAL)" default n depends on OPROFILE && SMP && X86 help Instruction-Based Sampling (IBS) is a new profiling technique that provides rich, precise program performance information. IBS is introduced by AMD Family10h processors (AMD Opteron Quad-Core processor "Barcelona") to overcome the limitations of conventional performance counter sampling. If unsure, say N. config OPROFILE_EVENT_MULTIPLEX bool "OProfile multiplexing support (EXPERIMENTAL)" default n Loading arch/x86/oprofile/nmi_int.c +9 −8 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ static int nmi_setup_mux(void) for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).multiplex = kmalloc(multiplex_size, GFP_KERNEL); kzalloc(multiplex_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).multiplex) return 0; } Loading @@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) if (counter_config[i].enabled) { multiplex[i].saved = -(u64)counter_config[i].count; } else { multiplex[i].addr = 0; multiplex[i].saved = 0; } } Loading @@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); if (multiplex[virt].addr) rdmsrl(multiplex[virt].addr, multiplex[virt].saved); if (counters[i].addr) rdmsrl(counters[i].addr, multiplex[virt].saved); } } static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); if (multiplex[virt].addr) wrmsrl(multiplex[virt].addr, multiplex[virt].saved); if (counters[i].addr) wrmsrl(counters[i].addr, multiplex[virt].saved); } } Loading Loading @@ -303,11 +304,11 @@ static int allocate_msrs(void) int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).counters) return 0; per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) return 0; Loading arch/x86/oprofile/op_model_amd.c +158 −86 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> #include <asm/apic.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include "op_x86_model.h" #include "op_counter.h" Loading @@ -43,8 +46,6 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) Loading @@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 static int has_ibs; /* AMD Family10h and later */ static u32 ibs_caps; struct op_ibs_config { unsigned long op_enabled; Loading @@ -71,24 +72,52 @@ struct op_ibs_config { }; static struct op_ibs_config ibs_config; static u64 ibs_op_ctl; #endif /* * IBS cpuid feature detection */ #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX #define IBS_CPUID_FEATURES 0x8000001b /* * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but * bit 0 is used to indicate the existence of IBS. */ #define IBS_CAPS_AVAIL (1LL<<0) #define IBS_CAPS_RDWROPCNT (1LL<<3) #define IBS_CAPS_OPCNT (1LL<<4) /* * IBS randomization macros */ #define IBS_RANDOM_BITS 12 #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) static void op_mux_fill_in_addresses(struct op_msrs * const msrs) static u32 get_ibs_caps(void) { int i; u32 ibs_caps; unsigned int max_level; for (i = 0; i < NUM_VIRT_COUNTERS; i++) { int hw_counter = op_x86_virt_to_phys(i); if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->multiplex[i].addr = 0; } if (!boot_cpu_has(X86_FEATURE_IBS)) return 0; /* check IBS cpuid feature flags */ max_level = cpuid_eax(0x80000000); if (max_level < IBS_CPUID_FEATURES) return IBS_CAPS_AVAIL; ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); if (!(ibs_caps & IBS_CAPS_AVAIL)) /* cpuid flags not valid */ return IBS_CAPS_AVAIL; return ibs_caps; } #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { Loading @@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!counter_config[virt].enabled) if (!reset_value[virt]) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; Loading @@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, } } #else static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } #endif /* functions for op_amd_spec */ Loading @@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < NUM_COUNTERS; i++) { if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; else msrs->controls[i].addr = 0; } op_mux_fill_in_addresses(msrs); } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, Loading @@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* setup reset_value */ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { if (counter_config[i].enabled) if (counter_config[i].enabled && msrs->counters[op_x86_virt_to_phys(i)].addr) reset_value[i] = counter_config[i].count; else reset_value[i] = 0; Loading @@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { if (unlikely(!msrs->controls[i].addr)) if (unlikely(!msrs->controls[i].addr)) { if (counter_config[i].enabled && !smp_processor_id()) /* * counter is reserved, this is on all * cpus, so report only for cpu #0 */ op_x86_warn_reserved(i); continue; } rdmsrl(msrs->controls[i].addr, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } Loading @@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!counter_config[virt].enabled) continue; if (!msrs->counters[i].addr) if (!reset_value[virt]) continue; /* setup counter registers */ Loading @@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } #ifdef CONFIG_OPROFILE_IBS /* * 16-bit Linear Feedback Shift Register (LFSR) * * 16 14 13 11 * Feedback polynomial = X + X + X + X + 1 */ static unsigned int lfsr_random(void) { static unsigned int lfsr_value = 0xF00D; unsigned int bit; /* Compute next bit to shift in */ bit = ((lfsr_value >> 0) ^ (lfsr_value >> 2) ^ (lfsr_value >> 3) ^ (lfsr_value >> 5)) & 0x0001; /* Advance to next register value */ lfsr_value = (lfsr_value >> 1) | (bit << 15); return lfsr_value; } /* * IBS software randomization * * The IBS periodic op counter is randomized in software. The lower 12 * bits of the 20 bit counter are randomized. IbsOpCurCnt is * initialized with a 12 bit random value. */ static inline u64 op_amd_randomize_ibs_op(u64 val) { unsigned int random = lfsr_random(); if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) /* * Work around if the hw can not write to IbsOpCurCnt * * Randomize the lower 8 bits of the 16 bit * IbsOpMaxCnt [15:0] value in the range of -128 to * +127 by adding/subtracting an offset to the * maximum count (IbsOpMaxCnt). * * To avoid over or underflows and protect upper bits * starting at bit 16, the initial value for * IbsOpMaxCnt must fit in the range from 0x0081 to * 0xff80. */ val += (s8)(random >> 4); else val |= (u64)(random & IBS_RANDOM_MASK) << 32; return val; } static inline void op_amd_handle_ibs(struct pt_regs * const regs, Loading @@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, u64 val, ctl; struct op_entry entry; if (!has_ibs) if (!ibs_caps) return; if (ibs_config.fetch_enabled) { Loading Loading @@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_write_commit(&entry); /* reenable the IRQ */ ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; ctl |= IBS_OP_ENABLE; ctl = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } Loading @@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs, static inline void op_amd_start_ibs(void) { u64 val; if (has_ibs && ibs_config.fetch_enabled) { if (!ibs_caps) return; if (ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (has_ibs && ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; val |= IBS_OP_ENABLE; if (ibs_config.op_enabled) { ibs_op_ctl = ibs_config.max_cnt_op >> 4; if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { /* * IbsOpCurCnt not supported. See * op_amd_randomize_ibs_op() for details. */ ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); } else { /* * The start value is randomized with a * positive offset, we need to compensate it * with the half of the randomized range. Also * avoid underflows. */ ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, 0xFFFFULL); } if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) ibs_op_ctl |= IBS_OP_CNT_CTL; ibs_op_ctl |= IBS_OP_ENABLE; val = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, val); } } static void op_amd_stop_ibs(void) { if (has_ibs && ibs_config.fetch_enabled) if (!ibs_caps) return; if (ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); if (has_ibs && ibs_config.op_enabled) if (ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } #else static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } #endif static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { Loading Loading @@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } #ifdef CONFIG_OPROFILE_IBS static u8 ibs_eilvt_off; static inline void apic_init_ibs_nmi_per_cpu(void *arg) Loading Loading @@ -405,45 +498,36 @@ static int init_ibs_nmi(void) return 1; } #ifdef CONFIG_NUMA /* Sanity check */ /* Works only for 64bit with proper numa implementation. */ if (nodes != num_possible_nodes()) { printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " "found: %d, expected %d", nodes, num_possible_nodes()); return 1; } #endif return 0; } /* uninitialize the APIC for the IBS interrupts if needed */ static void clear_ibs_nmi(void) { if (has_ibs) if (ibs_caps) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } /* initialize the APIC for the IBS interrupts if available */ static void ibs_init(void) { has_ibs = boot_cpu_has(X86_FEATURE_IBS); ibs_caps = get_ibs_caps(); if (!has_ibs) if (!ibs_caps) return; if (init_ibs_nmi()) { has_ibs = 0; ibs_caps = 0; return; } printk(KERN_INFO "oprofile: AMD IBS detected\n"); printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", (unsigned)ibs_caps); } static void ibs_exit(void) { if (!has_ibs) if (!ibs_caps) return; clear_ibs_nmi(); Loading @@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) if (ret) return ret; if (!has_ibs) if (!ibs_caps) return ret; /* model specific files */ Loading @@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; ibs_config.op_enabled = 0; ibs_config.dispatched_ops = 1; ibs_config.dispatched_ops = 0; dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); oprofilefs_create_ulong(sb, dir, "enable", Loading @@ -488,6 +572,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) &ibs_config.op_enabled); oprofilefs_create_ulong(sb, dir, "max_count", &ibs_config.max_cnt_op); if (ibs_caps & IBS_CAPS_OPCNT) oprofilefs_create_ulong(sb, dir, "dispatched_ops", &ibs_config.dispatched_ops); Loading @@ -507,19 +592,6 @@ static void op_amd_exit(void) ibs_exit(); } #else /* no IBS support */ static int op_amd_init(struct oprofile_operations *ops) { return 0; } static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, Loading arch/x86/oprofile/op_model_p4.c +0 −6 Original line number Diff line number Diff line Loading @@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) setup_num_counters(); stag = get_stagger(); /* initialize some registers */ for (i = 0; i < num_counters; ++i) msrs->counters[i].addr = 0; for (i = 0; i < num_controls; ++i) msrs->controls[i].addr = 0; /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; Loading arch/x86/oprofile/op_model_ppro.c +11 −6 Original line number Diff line number Diff line Loading @@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; else msrs->counters[i].addr = 0; } for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; else msrs->controls[i].addr = 0; } } Loading @@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, int i; if (!reset_value) { reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; Loading @@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < num_counters; ++i) { if (unlikely(!msrs->controls[i].addr)) if (unlikely(!msrs->controls[i].addr)) { if (counter_config[i].enabled && !smp_processor_id()) /* * counter is reserved, this is on all * cpus, so report only for cpu #0 */ op_x86_warn_reserved(i); continue; } rdmsrl(msrs->controls[i].addr, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } Loading Loading
arch/Kconfig +1 −17 Original line number Diff line number Diff line Loading @@ -3,11 +3,9 @@ # config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE depends on TRACING_SUPPORT select TRACING select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help Loading @@ -17,20 +15,6 @@ config OPROFILE If unsure, say N. config OPROFILE_IBS bool "OProfile AMD IBS support (EXPERIMENTAL)" default n depends on OPROFILE && SMP && X86 help Instruction-Based Sampling (IBS) is a new profiling technique that provides rich, precise program performance information. IBS is introduced by AMD Family10h processors (AMD Opteron Quad-Core processor "Barcelona") to overcome the limitations of conventional performance counter sampling. If unsure, say N. config OPROFILE_EVENT_MULTIPLEX bool "OProfile multiplexing support (EXPERIMENTAL)" default n Loading
arch/x86/oprofile/nmi_int.c +9 −8 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ static int nmi_setup_mux(void) for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).multiplex = kmalloc(multiplex_size, GFP_KERNEL); kzalloc(multiplex_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).multiplex) return 0; } Loading @@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) if (counter_config[i].enabled) { multiplex[i].saved = -(u64)counter_config[i].count; } else { multiplex[i].addr = 0; multiplex[i].saved = 0; } } Loading @@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); if (multiplex[virt].addr) rdmsrl(multiplex[virt].addr, multiplex[virt].saved); if (counters[i].addr) rdmsrl(counters[i].addr, multiplex[virt].saved); } } static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); if (multiplex[virt].addr) wrmsrl(multiplex[virt].addr, multiplex[virt].saved); if (counters[i].addr) wrmsrl(counters[i].addr, multiplex[virt].saved); } } Loading Loading @@ -303,11 +304,11 @@ static int allocate_msrs(void) int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).counters) return 0; per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) return 0; Loading
arch/x86/oprofile/op_model_amd.c +158 −86 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> #include <asm/apic.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include "op_x86_model.h" #include "op_counter.h" Loading @@ -43,8 +46,6 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) Loading @@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 static int has_ibs; /* AMD Family10h and later */ static u32 ibs_caps; struct op_ibs_config { unsigned long op_enabled; Loading @@ -71,24 +72,52 @@ struct op_ibs_config { }; static struct op_ibs_config ibs_config; static u64 ibs_op_ctl; #endif /* * IBS cpuid feature detection */ #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX #define IBS_CPUID_FEATURES 0x8000001b /* * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but * bit 0 is used to indicate the existence of IBS. */ #define IBS_CAPS_AVAIL (1LL<<0) #define IBS_CAPS_RDWROPCNT (1LL<<3) #define IBS_CAPS_OPCNT (1LL<<4) /* * IBS randomization macros */ #define IBS_RANDOM_BITS 12 #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) static void op_mux_fill_in_addresses(struct op_msrs * const msrs) static u32 get_ibs_caps(void) { int i; u32 ibs_caps; unsigned int max_level; for (i = 0; i < NUM_VIRT_COUNTERS; i++) { int hw_counter = op_x86_virt_to_phys(i); if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->multiplex[i].addr = 0; } if (!boot_cpu_has(X86_FEATURE_IBS)) return 0; /* check IBS cpuid feature flags */ max_level = cpuid_eax(0x80000000); if (max_level < IBS_CPUID_FEATURES) return IBS_CAPS_AVAIL; ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); if (!(ibs_caps & IBS_CAPS_AVAIL)) /* cpuid flags not valid */ return IBS_CAPS_AVAIL; return ibs_caps; } #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { Loading @@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!counter_config[virt].enabled) if (!reset_value[virt]) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; Loading @@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, } } #else static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } #endif /* functions for op_amd_spec */ Loading @@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < NUM_COUNTERS; i++) { if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; else msrs->controls[i].addr = 0; } op_mux_fill_in_addresses(msrs); } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, Loading @@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* setup reset_value */ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { if (counter_config[i].enabled) if (counter_config[i].enabled && msrs->counters[op_x86_virt_to_phys(i)].addr) reset_value[i] = counter_config[i].count; else reset_value[i] = 0; Loading @@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { if (unlikely(!msrs->controls[i].addr)) if (unlikely(!msrs->controls[i].addr)) { if (counter_config[i].enabled && !smp_processor_id()) /* * counter is reserved, this is on all * cpus, so report only for cpu #0 */ op_x86_warn_reserved(i); continue; } rdmsrl(msrs->controls[i].addr, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } Loading @@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); if (!counter_config[virt].enabled) continue; if (!msrs->counters[i].addr) if (!reset_value[virt]) continue; /* setup counter registers */ Loading @@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } #ifdef CONFIG_OPROFILE_IBS /* * 16-bit Linear Feedback Shift Register (LFSR) * * 16 14 13 11 * Feedback polynomial = X + X + X + X + 1 */ static unsigned int lfsr_random(void) { static unsigned int lfsr_value = 0xF00D; unsigned int bit; /* Compute next bit to shift in */ bit = ((lfsr_value >> 0) ^ (lfsr_value >> 2) ^ (lfsr_value >> 3) ^ (lfsr_value >> 5)) & 0x0001; /* Advance to next register value */ lfsr_value = (lfsr_value >> 1) | (bit << 15); return lfsr_value; } /* * IBS software randomization * * The IBS periodic op counter is randomized in software. The lower 12 * bits of the 20 bit counter are randomized. IbsOpCurCnt is * initialized with a 12 bit random value. */ static inline u64 op_amd_randomize_ibs_op(u64 val) { unsigned int random = lfsr_random(); if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) /* * Work around if the hw can not write to IbsOpCurCnt * * Randomize the lower 8 bits of the 16 bit * IbsOpMaxCnt [15:0] value in the range of -128 to * +127 by adding/subtracting an offset to the * maximum count (IbsOpMaxCnt). * * To avoid over or underflows and protect upper bits * starting at bit 16, the initial value for * IbsOpMaxCnt must fit in the range from 0x0081 to * 0xff80. */ val += (s8)(random >> 4); else val |= (u64)(random & IBS_RANDOM_MASK) << 32; return val; } static inline void op_amd_handle_ibs(struct pt_regs * const regs, Loading @@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, u64 val, ctl; struct op_entry entry; if (!has_ibs) if (!ibs_caps) return; if (ibs_config.fetch_enabled) { Loading Loading @@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_write_commit(&entry); /* reenable the IRQ */ ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; ctl |= IBS_OP_ENABLE; ctl = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } Loading @@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs, static inline void op_amd_start_ibs(void) { u64 val; if (has_ibs && ibs_config.fetch_enabled) { if (!ibs_caps) return; if (ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (has_ibs && ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; val |= IBS_OP_ENABLE; if (ibs_config.op_enabled) { ibs_op_ctl = ibs_config.max_cnt_op >> 4; if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { /* * IbsOpCurCnt not supported. See * op_amd_randomize_ibs_op() for details. */ ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); } else { /* * The start value is randomized with a * positive offset, we need to compensate it * with the half of the randomized range. Also * avoid underflows. */ ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, 0xFFFFULL); } if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) ibs_op_ctl |= IBS_OP_CNT_CTL; ibs_op_ctl |= IBS_OP_ENABLE; val = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, val); } } static void op_amd_stop_ibs(void) { if (has_ibs && ibs_config.fetch_enabled) if (!ibs_caps) return; if (ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); if (has_ibs && ibs_config.op_enabled) if (ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } #else static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } #endif static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { Loading Loading @@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } #ifdef CONFIG_OPROFILE_IBS static u8 ibs_eilvt_off; static inline void apic_init_ibs_nmi_per_cpu(void *arg) Loading Loading @@ -405,45 +498,36 @@ static int init_ibs_nmi(void) return 1; } #ifdef CONFIG_NUMA /* Sanity check */ /* Works only for 64bit with proper numa implementation. */ if (nodes != num_possible_nodes()) { printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " "found: %d, expected %d", nodes, num_possible_nodes()); return 1; } #endif return 0; } /* uninitialize the APIC for the IBS interrupts if needed */ static void clear_ibs_nmi(void) { if (has_ibs) if (ibs_caps) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } /* initialize the APIC for the IBS interrupts if available */ static void ibs_init(void) { has_ibs = boot_cpu_has(X86_FEATURE_IBS); ibs_caps = get_ibs_caps(); if (!has_ibs) if (!ibs_caps) return; if (init_ibs_nmi()) { has_ibs = 0; ibs_caps = 0; return; } printk(KERN_INFO "oprofile: AMD IBS detected\n"); printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", (unsigned)ibs_caps); } static void ibs_exit(void) { if (!has_ibs) if (!ibs_caps) return; clear_ibs_nmi(); Loading @@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) if (ret) return ret; if (!has_ibs) if (!ibs_caps) return ret; /* model specific files */ Loading @@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; ibs_config.op_enabled = 0; ibs_config.dispatched_ops = 1; ibs_config.dispatched_ops = 0; dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); oprofilefs_create_ulong(sb, dir, "enable", Loading @@ -488,6 +572,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) &ibs_config.op_enabled); oprofilefs_create_ulong(sb, dir, "max_count", &ibs_config.max_cnt_op); if (ibs_caps & IBS_CAPS_OPCNT) oprofilefs_create_ulong(sb, dir, "dispatched_ops", &ibs_config.dispatched_ops); Loading @@ -507,19 +592,6 @@ static void op_amd_exit(void) ibs_exit(); } #else /* no IBS support */ static int op_amd_init(struct oprofile_operations *ops) { return 0; } static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, Loading
arch/x86/oprofile/op_model_p4.c +0 −6 Original line number Diff line number Diff line Loading @@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) setup_num_counters(); stag = get_stagger(); /* initialize some registers */ for (i = 0; i < num_counters; ++i) msrs->counters[i].addr = 0; for (i = 0; i < num_controls; ++i) msrs->controls[i].addr = 0; /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; Loading
arch/x86/oprofile/op_model_ppro.c +11 −6 Original line number Diff line number Diff line Loading @@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; else msrs->counters[i].addr = 0; } for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; else msrs->controls[i].addr = 0; } } Loading @@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, int i; if (!reset_value) { reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; Loading @@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < num_counters; ++i) { if (unlikely(!msrs->controls[i].addr)) if (unlikely(!msrs->controls[i].addr)) { if (counter_config[i].enabled && !smp_processor_id()) /* * counter is reserved, this is on all * cpus, so report only for cpu #0 */ op_x86_warn_reserved(i); continue; } rdmsrl(msrs->controls[i].addr, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } Loading