Loading arch/x86/kernel/cpu/perf_counter.c +235 −20 Original line number Diff line number Diff line Loading @@ -65,6 +65,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; /* * Not sure about some of these */ static const u64 p6_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, }; static u64 p6_pmu_event_map(int event) { return p6_perfmon_event_map[event]; } /* * Counter setting that is specified not to count anything. * We use this to effectively disable a counter. * * L2_RQSTS with 0 MESI unit mask. */ #define P6_NOP_COUNTER 0x0000002EULL static u64 p6_pmu_raw_event(u64 event) { #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL #define P6_EVNTSEL_INV_MASK 0x00800000ULL #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL #define P6_EVNTSEL_MASK \ (P6_EVNTSEL_EVENT_MASK | \ P6_EVNTSEL_UNIT_MASK | \ P6_EVNTSEL_EDGE_MASK | \ P6_EVNTSEL_INV_MASK | \ P6_EVNTSEL_COUNTER_MASK) return event & P6_EVNTSEL_MASK; } /* * Intel PerfMon v3. Used on Core2 and later. */ Loading Loading @@ -666,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_attr *attr = &counter->attr; struct hw_perf_counter *hwc = &counter->hw; u64 config; int err; if (!x86_pmu_initialized()) Loading Loading @@ -718,14 +765,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (attr->config >= x86_pmu.max_events) return -EINVAL; /* * The generic map: */ hwc->config |= x86_pmu.event_map(attr->config); config = x86_pmu.event_map(attr->config); if (config == 0) return -ENOENT; if (config == -1LL) return -EINVAL; hwc->config |= config; return 0; } static void p6_pmu_disable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val; if (!cpuc->enabled) return; cpuc->enabled = 0; barrier(); /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } static void intel_pmu_disable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); Loading Loading @@ -767,6 +840,23 @@ void hw_perf_disable(void) return x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); unsigned long val; if (cpuc->enabled) return; cpuc->enabled = 1; barrier(); /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } static void intel_pmu_enable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); Loading @@ -784,13 +874,13 @@ static void amd_pmu_enable_all(void) barrier(); for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_counter *counter = cpuc->counters[idx]; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) continue; val = counter->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } Loading Loading @@ -819,16 +909,13 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { int err; err = checking_wrmsrl(hwc->config_base + idx, (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { int err; err = checking_wrmsrl(hwc->config_base + idx, hwc->config); (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void Loading @@ -836,13 +923,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; int err; mask = 0xfULL << (idx * 4); rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; err = checking_wrmsrl(hwc->config_base, ctrl_val); (void)checking_wrmsrl(hwc->config_base, ctrl_val); } static inline void p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val = P6_NOP_COUNTER; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL0_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } static inline void Loading Loading @@ -943,6 +1041,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val; val = hwc->config; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL0_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { Loading @@ -959,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) if (cpuc->enabled) x86_pmu_enable_counter(hwc, idx); else x86_pmu_disable_counter(hwc, idx); } static int Loading Loading @@ -1176,6 +1285,49 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } static int p6_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; counter = cpuc->counters[idx]; hwc = &counter->hw; val = x86_perf_counter_update(counter, hwc, idx); if (val & (1ULL << (x86_pmu.counter_bits - 1))) continue; /* * counter overflow */ handled = 1; data.period = counter->hw.last_period; if (!x86_perf_counter_set_period(counter, hwc, idx)) continue; if (perf_counter_overflow(counter, 1, &data)) p6_pmu_disable_counter(hwc, idx); } if (handled) inc_irq_stat(apic_perf_irqs); return handled; } /* * This handler is triggered by the local APIC, so the APIC IRQ handling Loading @@ -1185,14 +1337,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; int bit, cpu, loops; int bit, loops; u64 ack, status; data.regs = regs; data.addr = 0; cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); status = intel_pmu_get_status(); Loading Loading @@ -1249,14 +1400,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; int cpu, idx, handled = 0; int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) Loading Loading @@ -1353,6 +1503,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; static struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_counter, .disable = p6_pmu_disable_counter, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, .raw_event = p6_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), .max_period = (1ULL << 31) - 1, .version = 0, .num_counters = 2, /* * Counters have 40 bits implemented. However they are designed such * that bits [32-39] are sign extensions of bit 31. As such the * effective width of a counter for P6-like PMU is 32 bits only. * * See IA-32 Intel Architecture Software developer manual Vol 3B */ .counter_bits = 32, .counter_mask = (1ULL << 32) - 1, }; static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, Loading Loading @@ -1392,6 +1568,39 @@ static struct x86_pmu amd_pmu = { .max_period = (1ULL << 47) - 1, }; static int p6_pmu_init(void) { int high, low; switch (boot_cpu_data.x86_model) { case 1: case 3: /* Pentium Pro */ case 5: case 6: /* Pentium II */ case 7: case 8: case 11: /* Pentium III */ break; case 9: case 13: /* Pentium M */ break; default: pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); return -ENODEV; } if (!cpu_has_apic) { pr_info("no Local APIC, try rebooting with lapic"); return -ENODEV; } x86_pmu = p6_pmu; return 0; } static int intel_pmu_init(void) { union cpuid10_edx edx; Loading @@ -1400,8 +1609,14 @@ static int intel_pmu_init(void) unsigned int ebx; int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { /* check for P6 processor family */ if (boot_cpu_data.x86 == 6) { return p6_pmu_init(); } else { return -ENODEV; } } /* * Check whether the Architectural PerfMon supports Loading kernel/perf_counter.c +17 −19 Original line number Diff line number Diff line Loading @@ -146,6 +146,14 @@ static void put_ctx(struct perf_counter_context *ctx) } } static void unclone_ctx(struct perf_counter_context *ctx) { if (ctx->parent_ctx) { put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } } /* * Get the perf_counter_context for a task and lock it. * This has to cope with with the fact that until it is locked, Loading Loading @@ -1463,10 +1471,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task) /* * Unclone this context if we enabled any counter. */ if (enabled && ctx->parent_ctx) { put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } if (enabled) unclone_ctx(ctx); spin_unlock(&ctx->lock); Loading Loading @@ -1526,7 +1532,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx, static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_counter_context *parent_ctx; struct perf_counter_context *ctx; struct perf_cpu_context *cpuctx; struct task_struct *task; Loading Loading @@ -1586,11 +1591,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) retry: ctx = perf_lock_task_context(task, &flags); if (ctx) { parent_ctx = ctx->parent_ctx; if (parent_ctx) { put_ctx(parent_ctx); ctx->parent_ctx = NULL; /* no longer a clone */ } unclone_ctx(ctx); spin_unlock_irqrestore(&ctx->lock, flags); } Loading Loading @@ -4262,15 +4263,12 @@ void perf_counter_exit_task(struct task_struct *child) */ spin_lock(&child_ctx->lock); child->perf_counter_ctxp = NULL; if (child_ctx->parent_ctx) { /* * This context is a clone; unclone it so it can't get * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all * the counters from it. */ put_ctx(child_ctx->parent_ctx); child_ctx->parent_ctx = NULL; } unclone_ctx(child_ctx); spin_unlock(&child_ctx->lock); local_irq_restore(flags); Loading tools/perf/Documentation/perf-report.txt +15 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,9 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. -n --show-nr-samples Show the number of samples for each symbol -C:: --comms=:: Only consider symbols in these comms. CSV that understands Loading @@ -33,6 +36,18 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. -w:: --field-width=:: Force each column width to the provided list, for large terminal readability. -t:: --field-separator=:: Use a special separator character and don't pad with spaces, replacing all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. SEE ALSO -------- linkperf:perf-stat[1] tools/perf/builtin-report.c +180 −47 Original line number Diff line number Diff line Loading @@ -33,8 +33,10 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; static char *dso_list_str, *comm_list_str, *sym_list_str; static char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; static struct strlist *dso_list, *comm_list, *sym_list; static char *field_sep; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; Loading @@ -49,6 +51,7 @@ static int verbose; static int modules; static int full_paths; static int show_nr_samples; static unsigned long page_size; static unsigned long mmap_window = 32; Loading Loading @@ -129,6 +132,33 @@ typedef union event_union { struct read_event read; } event_t; static int repsep_fprintf(FILE *fp, const char *fmt, ...) { int n; va_list ap; va_start(ap, fmt); if (!field_sep) n = vfprintf(fp, fmt, ap); else { char *bf = NULL; n = vasprintf(&bf, fmt, ap); if (n > 0) { char *sep = bf; while (1) { sep = strchr(sep, *field_sep); if (sep == NULL) break; *sep = '.'; } } fputs(bf, fp); free(bf); } va_end(ap); return n; } static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; Loading Loading @@ -360,12 +390,28 @@ static struct thread *thread__new(pid_t pid) return self; } static unsigned int dsos__col_width, comms__col_width, threads__col_width; static int thread__set_comm(struct thread *self, const char *comm) { if (self->comm) free(self->comm); self->comm = strdup(comm); return self->comm ? 0 : -ENOMEM; if (!self->comm) return -ENOMEM; if (!col_width_list_str && !field_sep && (!comm_list || strlist__has_entry(comm_list, comm))) { unsigned int slen = strlen(comm); if (slen > comms__col_width) { comms__col_width = slen; threads__col_width = slen + 6; } } return 0; } static size_t thread__fprintf(struct thread *self, FILE *fp) Loading Loading @@ -536,7 +582,9 @@ struct sort_entry { int64_t (*cmp)(struct hist_entry *, struct hist_entry *); int64_t (*collapse)(struct hist_entry *, struct hist_entry *); size_t (*print)(FILE *fp, struct hist_entry *); size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); unsigned int *width; bool elide; }; static int64_t cmp_null(void *l, void *r) Loading @@ -558,15 +606,17 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__thread_print(FILE *fp, struct hist_entry *self) sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) { return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); return repsep_fprintf(fp, "%*s:%5d", width - 6, self->thread->comm ?: "", self->thread->pid); } static struct sort_entry sort_thread = { .header = "Command: Pid", .cmp = sort__thread_cmp, .print = sort__thread_print, .width = &threads__col_width, }; /* --sort comm */ Loading @@ -590,9 +640,9 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) } static size_t sort__comm_print(FILE *fp, struct hist_entry *self) sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) { return fprintf(fp, "%16s", self->thread->comm); return repsep_fprintf(fp, "%*s", width, self->thread->comm); } static struct sort_entry sort_comm = { Loading @@ -600,6 +650,7 @@ static struct sort_entry sort_comm = { .cmp = sort__comm_cmp, .collapse = sort__comm_collapse, .print = sort__comm_print, .width = &comms__col_width, }; /* --sort dso */ Loading @@ -617,18 +668,19 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__dso_print(FILE *fp, struct hist_entry *self) sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) { if (self->dso) return fprintf(fp, "%-25s", self->dso->name); return repsep_fprintf(fp, "%-*s", width, self->dso->name); return fprintf(fp, "%016llx ", (u64)self->ip); return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); } static struct sort_entry sort_dso = { .header = "Shared Object", .cmp = sort__dso_cmp, .print = sort__dso_print, .width = &dsos__col_width, }; /* --sort symbol */ Loading @@ -648,22 +700,22 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__sym_print(FILE *fp, struct hist_entry *self) sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) { size_t ret = 0; if (verbose) ret += fprintf(fp, "%#018llx ", (u64)self->ip); ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); ret += repsep_fprintf(fp, "%s", self->sym->name); if (self->sym->module) ret += fprintf(fp, "\t[%s]", self->sym->module->name); ret += repsep_fprintf(fp, "\t[%s]", self->sym->module->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); } return ret; Loading @@ -690,19 +742,19 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__parent_print(FILE *fp, struct hist_entry *self) sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) { size_t ret = 0; ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); return ret; return repsep_fprintf(fp, "%-*s", width, self->parent ? self->parent->name : "[other]"); } static unsigned int parent_symbol__col_width; static struct sort_entry sort_parent = { .header = "Parent symbol", .cmp = sort__parent_cmp, .print = sort__parent_print, .width = &parent_symbol__col_width, }; static int sort__need_collapse = 0; Loading Loading @@ -967,17 +1019,25 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) return 0; if (total_samples) ret = percent_color_fprintf(fp, " %6.2f%%", ret = percent_color_fprintf(fp, field_sep ? "%.2f" : " %6.2f%%", (self->count * 100.0) / total_samples); else ret = fprintf(fp, "%12Ld ", self->count); ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); if (show_nr_samples) { if (field_sep) fprintf(fp, "%c%lld", *field_sep, self->count); else fprintf(fp, "%11lld", self->count); } list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " "); ret += se->print(fp, self); fprintf(fp, "%s", field_sep ?: " "); ret += se->print(fp, self, se->width ? *se->width : 0); } ret += fprintf(fp, "\n"); Loading @@ -992,6 +1052,18 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) * */ static void dso__calc_col_width(struct dso *self) { if (!col_width_list_str && !field_sep && (!dso_list || strlist__has_entry(dso_list, self->name))) { unsigned int slen = strlen(self->name); if (slen > dsos__col_width) dsos__col_width = slen; } self->slen_calculated = 1; } static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, struct dso **dsop, u64 *ipp) Loading @@ -1011,6 +1083,14 @@ resolve_symbol(struct thread *thread, struct map **mapp, map = thread__find_map(thread, ip); if (map != NULL) { /* * We have to do this here as we may have a dso * with no symbol hit that has a name longer than * the ones with symbols sampled. */ if (!sort_dso.elide && !map->dso->slen_calculated) dso__calc_col_width(map->dso); if (mapp) *mapp = map; got_map: Loading Loading @@ -1282,35 +1362,67 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) struct sort_entry *se; struct rb_node *nd; size_t ret = 0; unsigned int width; char *col_width = col_width_list_str; fprintf(fp, "\n"); fprintf(fp, "#\n"); fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); if (show_nr_samples) { if (field_sep) fprintf(fp, "%cSamples", *field_sep); else fputs(" Samples ", fp); } list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " %s", se->header); if (field_sep) { fprintf(fp, "%c%s", *field_sep, se->header); continue; } width = strlen(se->header); if (se->width) { if (col_width_list_str) { if (col_width) { *se->width = atoi(col_width); col_width = strchr(col_width, ','); if (col_width) ++col_width; } } width = *se->width = max(*se->width, width); } fprintf(fp, " %*s", width, se->header); } fprintf(fp, "\n"); if (field_sep) goto print_entries; fprintf(fp, "# ........"); if (show_nr_samples) fprintf(fp, " .........."); list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " "); for (i = 0; i < strlen(se->header); i++) if (se->width) width = *se->width; else width = strlen(se->header); for (i = 0; i < width; i++) fprintf(fp, "."); } fprintf(fp, "\n"); fprintf(fp, "#\n"); print_entries: for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node); ret += hist_entry__fprintf(fp, pos, total_samples); Loading Loading @@ -1883,6 +1995,8 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, Loading @@ -1900,6 +2014,12 @@ static const struct option options[] = { "only consider symbols in these comms"), OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", "only consider these symbols"), OPT_STRING('w', "column-widths", &col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), OPT_STRING('t', "field-separator", &field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_END() }; Loading @@ -1919,7 +2039,8 @@ static void setup_sorting(void) } static void setup_list(struct strlist **list, const char *list_str, const char *list_name) struct sort_entry *se, const char *list_name, FILE *fp) { if (list_str) { *list = strlist__new(true, list_str); Loading @@ -1928,6 +2049,11 @@ static void setup_list(struct strlist **list, const char *list_str, list_name); exit(129); } if (strlist__nr_entries(*list) == 1) { fprintf(fp, "# %s: %s\n", list_name, strlist__entry(*list, 0)->s); se->elide = true; } } } Loading @@ -1941,9 +2067,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) setup_sorting(); if (parent_pattern != default_parent_pattern) if (parent_pattern != default_parent_pattern) { sort_dimension__add("parent"); else sort_parent.elide = 1; } else exclude_other = 0; /* Loading @@ -1952,11 +2079,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); setup_list(&dso_list, dso_list_str, "dso"); setup_list(&comm_list, comm_list_str, "comm"); setup_list(&sym_list, sym_list_str, "symbol"); setup_pager(); setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout); setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout); setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout); if (field_sep && *field_sep == '.') { fputs("'.' is the only non valid --field-separator argument\n", stderr); exit(129); } return __cmd_report(); } tools/perf/perf.h +7 −1 Original line number Diff line number Diff line #ifndef _PERF_PERF_H #define _PERF_PERF_H #if defined(__x86_64__) || defined(__i386__) #if defined(__i386__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #endif #if defined(__x86_64__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); Loading Loading
arch/x86/kernel/cpu/perf_counter.c +235 −20 Original line number Diff line number Diff line Loading @@ -65,6 +65,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; /* * Not sure about some of these */ static const u64 p6_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, }; static u64 p6_pmu_event_map(int event) { return p6_perfmon_event_map[event]; } /* * Counter setting that is specified not to count anything. * We use this to effectively disable a counter. * * L2_RQSTS with 0 MESI unit mask. */ #define P6_NOP_COUNTER 0x0000002EULL static u64 p6_pmu_raw_event(u64 event) { #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL #define P6_EVNTSEL_INV_MASK 0x00800000ULL #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL #define P6_EVNTSEL_MASK \ (P6_EVNTSEL_EVENT_MASK | \ P6_EVNTSEL_UNIT_MASK | \ P6_EVNTSEL_EDGE_MASK | \ P6_EVNTSEL_INV_MASK | \ P6_EVNTSEL_COUNTER_MASK) return event & P6_EVNTSEL_MASK; } /* * Intel PerfMon v3. Used on Core2 and later. */ Loading Loading @@ -666,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_attr *attr = &counter->attr; struct hw_perf_counter *hwc = &counter->hw; u64 config; int err; if (!x86_pmu_initialized()) Loading Loading @@ -718,14 +765,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (attr->config >= x86_pmu.max_events) return -EINVAL; /* * The generic map: */ hwc->config |= x86_pmu.event_map(attr->config); config = x86_pmu.event_map(attr->config); if (config == 0) return -ENOENT; if (config == -1LL) return -EINVAL; hwc->config |= config; return 0; } static void p6_pmu_disable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val; if (!cpuc->enabled) return; cpuc->enabled = 0; barrier(); /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } static void intel_pmu_disable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); Loading Loading @@ -767,6 +840,23 @@ void hw_perf_disable(void) return x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); unsigned long val; if (cpuc->enabled) return; cpuc->enabled = 1; barrier(); /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } static void intel_pmu_enable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); Loading @@ -784,13 +874,13 @@ static void amd_pmu_enable_all(void) barrier(); for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_counter *counter = cpuc->counters[idx]; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) continue; val = counter->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } Loading Loading @@ -819,16 +909,13 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { int err; err = checking_wrmsrl(hwc->config_base + idx, (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { int err; err = checking_wrmsrl(hwc->config_base + idx, hwc->config); (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void Loading @@ -836,13 +923,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; int err; mask = 0xfULL << (idx * 4); rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; err = checking_wrmsrl(hwc->config_base, ctrl_val); (void)checking_wrmsrl(hwc->config_base, ctrl_val); } static inline void p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val = P6_NOP_COUNTER; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL0_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } static inline void Loading Loading @@ -943,6 +1041,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); u64 val; val = hwc->config; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL0_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { Loading @@ -959,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) if (cpuc->enabled) x86_pmu_enable_counter(hwc, idx); else x86_pmu_disable_counter(hwc, idx); } static int Loading Loading @@ -1176,6 +1285,49 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } static int p6_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; counter = cpuc->counters[idx]; hwc = &counter->hw; val = x86_perf_counter_update(counter, hwc, idx); if (val & (1ULL << (x86_pmu.counter_bits - 1))) continue; /* * counter overflow */ handled = 1; data.period = counter->hw.last_period; if (!x86_perf_counter_set_period(counter, hwc, idx)) continue; if (perf_counter_overflow(counter, 1, &data)) p6_pmu_disable_counter(hwc, idx); } if (handled) inc_irq_stat(apic_perf_irqs); return handled; } /* * This handler is triggered by the local APIC, so the APIC IRQ handling Loading @@ -1185,14 +1337,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; int bit, cpu, loops; int bit, loops; u64 ack, status; data.regs = regs; data.addr = 0; cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); status = intel_pmu_get_status(); Loading Loading @@ -1249,14 +1400,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; int cpu, idx, handled = 0; int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) Loading Loading @@ -1353,6 +1503,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; static struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_counter, .disable = p6_pmu_disable_counter, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, .raw_event = p6_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), .max_period = (1ULL << 31) - 1, .version = 0, .num_counters = 2, /* * Counters have 40 bits implemented. However they are designed such * that bits [32-39] are sign extensions of bit 31. As such the * effective width of a counter for P6-like PMU is 32 bits only. * * See IA-32 Intel Architecture Software developer manual Vol 3B */ .counter_bits = 32, .counter_mask = (1ULL << 32) - 1, }; static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, Loading Loading @@ -1392,6 +1568,39 @@ static struct x86_pmu amd_pmu = { .max_period = (1ULL << 47) - 1, }; static int p6_pmu_init(void) { int high, low; switch (boot_cpu_data.x86_model) { case 1: case 3: /* Pentium Pro */ case 5: case 6: /* Pentium II */ case 7: case 8: case 11: /* Pentium III */ break; case 9: case 13: /* Pentium M */ break; default: pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); return -ENODEV; } if (!cpu_has_apic) { pr_info("no Local APIC, try rebooting with lapic"); return -ENODEV; } x86_pmu = p6_pmu; return 0; } static int intel_pmu_init(void) { union cpuid10_edx edx; Loading @@ -1400,8 +1609,14 @@ static int intel_pmu_init(void) unsigned int ebx; int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { /* check for P6 processor family */ if (boot_cpu_data.x86 == 6) { return p6_pmu_init(); } else { return -ENODEV; } } /* * Check whether the Architectural PerfMon supports Loading
kernel/perf_counter.c +17 −19 Original line number Diff line number Diff line Loading @@ -146,6 +146,14 @@ static void put_ctx(struct perf_counter_context *ctx) } } static void unclone_ctx(struct perf_counter_context *ctx) { if (ctx->parent_ctx) { put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } } /* * Get the perf_counter_context for a task and lock it. * This has to cope with with the fact that until it is locked, Loading Loading @@ -1463,10 +1471,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task) /* * Unclone this context if we enabled any counter. */ if (enabled && ctx->parent_ctx) { put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } if (enabled) unclone_ctx(ctx); spin_unlock(&ctx->lock); Loading Loading @@ -1526,7 +1532,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx, static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_counter_context *parent_ctx; struct perf_counter_context *ctx; struct perf_cpu_context *cpuctx; struct task_struct *task; Loading Loading @@ -1586,11 +1591,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) retry: ctx = perf_lock_task_context(task, &flags); if (ctx) { parent_ctx = ctx->parent_ctx; if (parent_ctx) { put_ctx(parent_ctx); ctx->parent_ctx = NULL; /* no longer a clone */ } unclone_ctx(ctx); spin_unlock_irqrestore(&ctx->lock, flags); } Loading Loading @@ -4262,15 +4263,12 @@ void perf_counter_exit_task(struct task_struct *child) */ spin_lock(&child_ctx->lock); child->perf_counter_ctxp = NULL; if (child_ctx->parent_ctx) { /* * This context is a clone; unclone it so it can't get * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all * the counters from it. */ put_ctx(child_ctx->parent_ctx); child_ctx->parent_ctx = NULL; } unclone_ctx(child_ctx); spin_unlock(&child_ctx->lock); local_irq_restore(flags); Loading
tools/perf/Documentation/perf-report.txt +15 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,9 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. -n --show-nr-samples Show the number of samples for each symbol -C:: --comms=:: Only consider symbols in these comms. CSV that understands Loading @@ -33,6 +36,18 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. -w:: --field-width=:: Force each column width to the provided list, for large terminal readability. -t:: --field-separator=:: Use a special separator character and don't pad with spaces, replacing all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. SEE ALSO -------- linkperf:perf-stat[1]
tools/perf/builtin-report.c +180 −47 Original line number Diff line number Diff line Loading @@ -33,8 +33,10 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; static char *dso_list_str, *comm_list_str, *sym_list_str; static char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; static struct strlist *dso_list, *comm_list, *sym_list; static char *field_sep; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; Loading @@ -49,6 +51,7 @@ static int verbose; static int modules; static int full_paths; static int show_nr_samples; static unsigned long page_size; static unsigned long mmap_window = 32; Loading Loading @@ -129,6 +132,33 @@ typedef union event_union { struct read_event read; } event_t; static int repsep_fprintf(FILE *fp, const char *fmt, ...) { int n; va_list ap; va_start(ap, fmt); if (!field_sep) n = vfprintf(fp, fmt, ap); else { char *bf = NULL; n = vasprintf(&bf, fmt, ap); if (n > 0) { char *sep = bf; while (1) { sep = strchr(sep, *field_sep); if (sep == NULL) break; *sep = '.'; } } fputs(bf, fp); free(bf); } va_end(ap); return n; } static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; Loading Loading @@ -360,12 +390,28 @@ static struct thread *thread__new(pid_t pid) return self; } static unsigned int dsos__col_width, comms__col_width, threads__col_width; static int thread__set_comm(struct thread *self, const char *comm) { if (self->comm) free(self->comm); self->comm = strdup(comm); return self->comm ? 0 : -ENOMEM; if (!self->comm) return -ENOMEM; if (!col_width_list_str && !field_sep && (!comm_list || strlist__has_entry(comm_list, comm))) { unsigned int slen = strlen(comm); if (slen > comms__col_width) { comms__col_width = slen; threads__col_width = slen + 6; } } return 0; } static size_t thread__fprintf(struct thread *self, FILE *fp) Loading Loading @@ -536,7 +582,9 @@ struct sort_entry { int64_t (*cmp)(struct hist_entry *, struct hist_entry *); int64_t (*collapse)(struct hist_entry *, struct hist_entry *); size_t (*print)(FILE *fp, struct hist_entry *); size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); unsigned int *width; bool elide; }; static int64_t cmp_null(void *l, void *r) Loading @@ -558,15 +606,17 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__thread_print(FILE *fp, struct hist_entry *self) sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) { return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); return repsep_fprintf(fp, "%*s:%5d", width - 6, self->thread->comm ?: "", self->thread->pid); } static struct sort_entry sort_thread = { .header = "Command: Pid", .cmp = sort__thread_cmp, .print = sort__thread_print, .width = &threads__col_width, }; /* --sort comm */ Loading @@ -590,9 +640,9 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) } static size_t sort__comm_print(FILE *fp, struct hist_entry *self) sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) { return fprintf(fp, "%16s", self->thread->comm); return repsep_fprintf(fp, "%*s", width, self->thread->comm); } static struct sort_entry sort_comm = { Loading @@ -600,6 +650,7 @@ static struct sort_entry sort_comm = { .cmp = sort__comm_cmp, .collapse = sort__comm_collapse, .print = sort__comm_print, .width = &comms__col_width, }; /* --sort dso */ Loading @@ -617,18 +668,19 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__dso_print(FILE *fp, struct hist_entry *self) sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) { if (self->dso) return fprintf(fp, "%-25s", self->dso->name); return repsep_fprintf(fp, "%-*s", width, self->dso->name); return fprintf(fp, "%016llx ", (u64)self->ip); return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); } static struct sort_entry sort_dso = { .header = "Shared Object", .cmp = sort__dso_cmp, .print = sort__dso_print, .width = &dsos__col_width, }; /* --sort symbol */ Loading @@ -648,22 +700,22 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__sym_print(FILE *fp, struct hist_entry *self) sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) { size_t ret = 0; if (verbose) ret += fprintf(fp, "%#018llx ", (u64)self->ip); ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); ret += repsep_fprintf(fp, "%s", self->sym->name); if (self->sym->module) ret += fprintf(fp, "\t[%s]", self->sym->module->name); ret += repsep_fprintf(fp, "\t[%s]", self->sym->module->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); } return ret; Loading @@ -690,19 +742,19 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t sort__parent_print(FILE *fp, struct hist_entry *self) sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) { size_t ret = 0; ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); return ret; return repsep_fprintf(fp, "%-*s", width, self->parent ? self->parent->name : "[other]"); } static unsigned int parent_symbol__col_width; static struct sort_entry sort_parent = { .header = "Parent symbol", .cmp = sort__parent_cmp, .print = sort__parent_print, .width = &parent_symbol__col_width, }; static int sort__need_collapse = 0; Loading Loading @@ -967,17 +1019,25 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) return 0; if (total_samples) ret = percent_color_fprintf(fp, " %6.2f%%", ret = percent_color_fprintf(fp, field_sep ? "%.2f" : " %6.2f%%", (self->count * 100.0) / total_samples); else ret = fprintf(fp, "%12Ld ", self->count); ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); if (show_nr_samples) { if (field_sep) fprintf(fp, "%c%lld", *field_sep, self->count); else fprintf(fp, "%11lld", self->count); } list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " "); ret += se->print(fp, self); fprintf(fp, "%s", field_sep ?: " "); ret += se->print(fp, self, se->width ? *se->width : 0); } ret += fprintf(fp, "\n"); Loading @@ -992,6 +1052,18 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) * */ static void dso__calc_col_width(struct dso *self) { if (!col_width_list_str && !field_sep && (!dso_list || strlist__has_entry(dso_list, self->name))) { unsigned int slen = strlen(self->name); if (slen > dsos__col_width) dsos__col_width = slen; } self->slen_calculated = 1; } static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, struct dso **dsop, u64 *ipp) Loading @@ -1011,6 +1083,14 @@ resolve_symbol(struct thread *thread, struct map **mapp, map = thread__find_map(thread, ip); if (map != NULL) { /* * We have to do this here as we may have a dso * with no symbol hit that has a name longer than * the ones with symbols sampled. */ if (!sort_dso.elide && !map->dso->slen_calculated) dso__calc_col_width(map->dso); if (mapp) *mapp = map; got_map: Loading Loading @@ -1282,35 +1362,67 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) struct sort_entry *se; struct rb_node *nd; size_t ret = 0; unsigned int width; char *col_width = col_width_list_str; fprintf(fp, "\n"); fprintf(fp, "#\n"); fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); if (show_nr_samples) { if (field_sep) fprintf(fp, "%cSamples", *field_sep); else fputs(" Samples ", fp); } list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " %s", se->header); if (field_sep) { fprintf(fp, "%c%s", *field_sep, se->header); continue; } width = strlen(se->header); if (se->width) { if (col_width_list_str) { if (col_width) { *se->width = atoi(col_width); col_width = strchr(col_width, ','); if (col_width) ++col_width; } } width = *se->width = max(*se->width, width); } fprintf(fp, " %*s", width, se->header); } fprintf(fp, "\n"); if (field_sep) goto print_entries; fprintf(fp, "# ........"); if (show_nr_samples) fprintf(fp, " .........."); list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; if (exclude_other && (se == &sort_parent)) if (se->elide) continue; fprintf(fp, " "); for (i = 0; i < strlen(se->header); i++) if (se->width) width = *se->width; else width = strlen(se->header); for (i = 0; i < width; i++) fprintf(fp, "."); } fprintf(fp, "\n"); fprintf(fp, "#\n"); print_entries: for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node); ret += hist_entry__fprintf(fp, pos, total_samples); Loading Loading @@ -1883,6 +1995,8 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, Loading @@ -1900,6 +2014,12 @@ static const struct option options[] = { "only consider symbols in these comms"), OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", "only consider these symbols"), OPT_STRING('w', "column-widths", &col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), OPT_STRING('t', "field-separator", &field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_END() }; Loading @@ -1919,7 +2039,8 @@ static void setup_sorting(void) } static void setup_list(struct strlist **list, const char *list_str, const char *list_name) struct sort_entry *se, const char *list_name, FILE *fp) { if (list_str) { *list = strlist__new(true, list_str); Loading @@ -1928,6 +2049,11 @@ static void setup_list(struct strlist **list, const char *list_str, list_name); exit(129); } if (strlist__nr_entries(*list) == 1) { fprintf(fp, "# %s: %s\n", list_name, strlist__entry(*list, 0)->s); se->elide = true; } } } Loading @@ -1941,9 +2067,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) setup_sorting(); if (parent_pattern != default_parent_pattern) if (parent_pattern != default_parent_pattern) { sort_dimension__add("parent"); else sort_parent.elide = 1; } else exclude_other = 0; /* Loading @@ -1952,11 +2079,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); setup_list(&dso_list, dso_list_str, "dso"); setup_list(&comm_list, comm_list_str, "comm"); setup_list(&sym_list, sym_list_str, "symbol"); setup_pager(); setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout); setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout); setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout); if (field_sep && *field_sep == '.') { fputs("'.' is the only non valid --field-separator argument\n", stderr); exit(129); } return __cmd_report(); }
tools/perf/perf.h +7 −1 Original line number Diff line number Diff line #ifndef _PERF_PERF_H #define _PERF_PERF_H #if defined(__x86_64__) || defined(__i386__) #if defined(__i386__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #endif #if defined(__x86_64__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); Loading