Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8c292f11 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf changes from Ingo Molnar:
 "Main changes:

  Kernel side changes:

   - Add SNB/IVB/HSW client uncore memory controller support (Stephane
     Eranian)

   - Fix various x86/P4 PMU driver bugs (Don Zickus)

  Tooling, user visible changes:

   - Add several futex 'perf bench' microbenchmarks (Davidlohr Bueso)

   - Speed up thread map generation (Don Zickus)

   - Introduce 'perf kvm --list-cmds' command line option for use by
     scripts (Ramkumar Ramachandra)

   - Print the evsel name in the annotate stdio output, prep to fix
     support outputting annotation for multiple events, not just for the
     first one (Arnaldo Carvalho de Melo)

   - Allow setting preferred callchain method in .perfconfig (Jiri Olsa)

   - Show in what binaries/modules 'perf probe's are set (Masami
     Hiramatsu)

   - Support distro-style debuginfo for uprobe in 'perf probe' (Masami
     Hiramatsu)

  Tooling, internal changes and fixes:

   - Use tid in mmap/mmap2 events to find maps (Don Zickus)

   - Record the reason for filtering an address_location (Namhyung Kim)

   - Apply all filters to an addr_location (Namhyung Kim)

   - Merge al->filtered with hist_entry->filtered in report/hists
     (Namhyung Kim)

   - Fix memory leak when synthesizing thread records (Namhyung Kim)

   - Use ui__has_annotation() in 'report' (Namhyung Kim)

   - hists browser refactorings to reuse code accross UIs (Namhyung Kim)

   - Add support for the new DWARF unwinder library in elfutils (Jiri
     Olsa)

   - Fix build race in the generation of bison files (Jiri Olsa)

   - Further streamline the feature detection display, trimming it a bit
     to show just the libraries detected, using VF=1 gets a more verbose
     output, showing the less interesting feature checks as well (Jiri
     Olsa).

   - Check compatible symtab type before loading dso (Namhyung Kim)

   - Check return value of filename__read_debuglink() (Stephane Eranian)

   - Move some hashing and fs related code from tools/perf/util/ to
     tools/lib/ so that it can be used by more tools/ living utilities
     (Borislav Petkov)

   - Prepare DWARF unwinding code for using an elfutils alternative
     unwinding library (Jiri Olsa)

   - Fix DWARF unwind max_stack processing (Jiri Olsa)

   - Add dwarf unwind 'perf test' entry (Jiri Olsa)

   - 'perf probe' improvements including memory leak fixes, sharing the
     intlist class with other tools, uprobes/kprobes code sharing and
     use of ref_reloc_sym (Masami Hiramatsu)

   - Shorten sample symbol resolving by adding cpumode to struct
     addr_location (Arnaldo Carvalho de Melo)

   - Fix synthesizing mmaps for threads (Don Zickus)

   - Fix invalid output on event group stdio report (Namhyung Kim)

   - Fixup header alignment in 'perf sched latency' output (Ramkumar
     Ramachandra)

   - Fix off-by-one error in 'perf timechart record' argv handling
     (Ramkumar Ramachandra)

  Tooling, cleanups:

   - Remove unused thread__find_map function (Jiri Olsa)

   - Remove unused simple_strtoul() function (Ramkumar Ramachandra)

  Tooling, documentation updates:

   - Update function names in debug messages (Ramkumar Ramachandra)

   - Update some code references in design.txt (Ramkumar Ramachandra)

   - Clarify load-latency information in the 'perf mem' docs (Andi
     Kleen)

   - Clarify x86 register naming in 'perf probe' docs (Andi Kleen)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (96 commits)
  perf tools: Remove unused simple_strtoul() function
  perf tools: Update some code references in design.txt
  perf evsel: Update function names in debug messages
  perf tools: Remove thread__find_map function
  perf annotate: Print the evsel name in the stdio output
  perf report: Use ui__has_annotation()
  perf tools: Fix memory leak when synthesizing thread records
  perf tools: Use tid in mmap/mmap2 events to find maps
  perf report: Merge al->filtered with hist_entry->filtered
  perf symbols: Apply all filters to an addr_location
  perf symbols: Record the reason for filtering an address_location
  perf sched: Fixup header alignment in 'latency' output
  perf timechart: Fix off-by-one error in 'record' argv handling
  perf machine: Factor machine__find_thread to take tid argument
  perf tools: Speed up thread map generation
  perf kvm: introduce --list-cmds for use by scripts
  perf ui hists: Pass evsel to hpp->header/width functions explicitly
  perf symbols: Introduce thread__find_cpumode_addr_location
  perf session: Change header.misc dump from decimal to hex
  perf ui/tui: Reuse generic __hpp__fmt() code
  ...
parents d31605dc 538592ff
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
#ifndef _ASM_X86_NMI_H
#define _ASM_X86_NMI_H

#include <linux/irq_work.h>
#include <linux/pm.h>
#include <asm/irq.h>
#include <asm/io.h>
@@ -38,6 +39,8 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
struct nmiaction {
	struct list_head	list;
	nmi_handler_t		handler;
	u64			max_duration;
	struct irq_work		irq_work;
	unsigned long		flags;
	const char		*name;
};
+33 −14
Original line number Diff line number Diff line
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu)
		 * hw_perf_group_sched_in() or x86_pmu_enable()
		 *
		 * step1: save events moving to new counters
		 * step2: reprogram moved events into new counters
		 */
		for (i = 0; i < n_running; i++) {
			event = cpuc->event_list[i];
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu)
			x86_pmu_stop(event, PERF_EF_UPDATE);
		}

		/*
		 * step2: reprogram moved events into new counters
		 */
		for (i = 0; i < cpuc->n_events; i++) {
			event = cpuc->event_list[i];
			hwc = &event->hw;
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
	/*
	 * If group events scheduling transaction was started,
	 * skip the schedulability test here, it will be performed
	 * at commit time (->commit_txn) as a whole
	 * at commit time (->commit_txn) as a whole.
	 */
	if (cpuc->group_flag & PERF_EVENT_TXN)
		goto done_collect;
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags)
	memcpy(cpuc->assign, assign, n*sizeof(int));

done_collect:
	/*
	 * Commit the collect_events() state. See x86_pmu_del() and
	 * x86_pmu_*_txn().
	 */
	cpuc->n_events = n;
	cpuc->n_added += n - n0;
	cpuc->n_txn += n - n0;
@@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags)
	 * If we're called during a txn, we don't need to do anything.
	 * The events never got scheduled and ->cancel_txn will truncate
	 * the event_list.
	 *
	 * XXX assumes any ->del() called during a TXN will only be on
	 * an event added during that same TXN.
	 */
	if (cpuc->group_flag & PERF_EVENT_TXN)
		return;

	/*
	 * Not a TXN, therefore cleanup properly.
	 */
	x86_pmu_stop(event, PERF_EF_UPDATE);

	for (i = 0; i < cpuc->n_events; i++) {
		if (event == cpuc->event_list[i]) {
		if (event == cpuc->event_list[i])
			break;
	}

	if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
		return;

	/* If we have a newly added event; make sure to decrease n_added. */
	if (i >= cpuc->n_events - cpuc->n_added)
		--cpuc->n_added;

	if (x86_pmu.put_event_constraints)
		x86_pmu.put_event_constraints(cpuc, event);

	/* Delete the array entry. */
	while (++i < cpuc->n_events)
		cpuc->event_list[i-1] = cpuc->event_list[i];

	--cpuc->n_events;
			break;
		}
	}

	perf_event_update_userpage(event);
}

@@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
{
	__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
	/*
	 * Truncate the collected events.
	 * Truncate collected array by the number of events added in this
	 * transaction. See x86_pmu_add() and x86_pmu_*_txn().
	 */
	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
@@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
 * Commit group events scheduling transaction
 * Perform the group schedulability test as a whole
 * Return 0 if success
 *
 * Does not cancel the transaction on failure; expects the caller to do this.
 */
static int x86_pmu_commit_txn(struct pmu *pmu)
{
+5 −3
Original line number Diff line number Diff line
@@ -130,9 +130,11 @@ struct cpu_hw_events {
	unsigned long		running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
	int			enabled;

	int			n_events;
	int			n_added;
	int			n_txn;
	int			n_events; /* the # of events in the below arrays */
	int			n_added;  /* the # last events in the below arrays;
					     they've never been enabled yet */
	int			n_txn;    /* the # last events in the below arrays;
					     added in the current transaction */
	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
	u64			tags[X86_PMC_IDX_MAX];
	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+473 −71
Original line number Diff line number Diff line
@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");

static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
static void uncore_pmu_event_read(struct perf_event *event);

static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
{
	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
}

static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
	struct intel_uncore_box *box;

	box = *per_cpu_ptr(pmu->box, cpu);
	if (box)
		return box;

	raw_spin_lock(&uncore_box_lock);
	list_for_each_entry(box, &pmu->box_list, list) {
		if (box->phys_id == topology_physical_package_id(cpu)) {
			atomic_inc(&box->refcnt);
			*per_cpu_ptr(pmu->box, cpu) = box;
			break;
		}
	}
	raw_spin_unlock(&uncore_box_lock);

	return *per_cpu_ptr(pmu->box, cpu);
}

static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
	/*
	 * perf core schedules event on the basis of cpu, uncore events are
	 * collected by one of the cpus inside a physical package.
	 */
	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}

static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
	u64 count;
@@ -1639,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = {
	&snb_uncore_cbox,
	NULL,
};

enum {
	SNB_PCI_UNCORE_IMC,
};

static struct uncore_event_desc snb_uncore_imc_events[] = {
	INTEL_UNCORE_EVENT_DESC(data_reads,  "event=0x01"),
	INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),

	INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
	INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),

	{ /* end: all zeroes */ },
};

#define SNB_UNCORE_PCI_IMC_EVENT_MASK		0xff
#define SNB_UNCORE_PCI_IMC_BAR_OFFSET		0x48

/* page size multiple covering all config regs */
#define SNB_UNCORE_PCI_IMC_MAP_SIZE		0x6000

#define SNB_UNCORE_PCI_IMC_DATA_READS		0x1
#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE	0x5050
#define SNB_UNCORE_PCI_IMC_DATA_WRITES		0x2
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE	0x5054
#define SNB_UNCORE_PCI_IMC_CTR_BASE		SNB_UNCORE_PCI_IMC_DATA_READS_BASE

static struct attribute *snb_uncore_imc_formats_attr[] = {
	&format_attr_event.attr,
	NULL,
};

static struct attribute_group snb_uncore_imc_format_group = {
	.name = "format",
	.attrs = snb_uncore_imc_formats_attr,
};

static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
{
	struct pci_dev *pdev = box->pci_dev;
	int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
	resource_size_t addr;
	u32 pci_dword;

	pci_read_config_dword(pdev, where, &pci_dword);
	addr = pci_dword;

#ifdef CONFIG_PHYS_ADDR_T_64BIT
	pci_read_config_dword(pdev, where + 4, &pci_dword);
	addr |= ((resource_size_t)pci_dword << 32);
#endif

	addr &= ~(PAGE_SIZE - 1);

	box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
	box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}

static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}

static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
{}

static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{}

static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{}

static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
}

/*
 * custom event_init() function because we define our own fixed, free
 * running counters, so we do not want to conflict with generic uncore
 * logic. Also simplifies processing
 */
static int snb_uncore_imc_event_init(struct perf_event *event)
{
	struct intel_uncore_pmu *pmu;
	struct intel_uncore_box *box;
	struct hw_perf_event *hwc = &event->hw;
	u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
	int idx, base;

	if (event->attr.type != event->pmu->type)
		return -ENOENT;

	pmu = uncore_event_to_pmu(event);
	/* no device found for this pmu */
	if (pmu->func_id < 0)
		return -ENOENT;

	/* Sampling not supported yet */
	if (hwc->sample_period)
		return -EINVAL;

	/* unsupported modes and filters */
	if (event->attr.exclude_user   ||
	    event->attr.exclude_kernel ||
	    event->attr.exclude_hv     ||
	    event->attr.exclude_idle   ||
	    event->attr.exclude_host   ||
	    event->attr.exclude_guest  ||
	    event->attr.sample_period) /* no sampling */
		return -EINVAL;

	/*
	 * Place all uncore events for a particular physical package
	 * onto a single cpu
	 */
	if (event->cpu < 0)
		return -EINVAL;

	/* check only supported bits are set */
	if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
		return -EINVAL;

	box = uncore_pmu_to_box(pmu, event->cpu);
	if (!box || box->cpu < 0)
		return -EINVAL;

	event->cpu = box->cpu;

	event->hw.idx = -1;
	event->hw.last_tag = ~0ULL;
	event->hw.extra_reg.idx = EXTRA_REG_NONE;
	event->hw.branch_reg.idx = EXTRA_REG_NONE;
	/*
	 * check event is known (whitelist, determines counter)
	 */
	switch (cfg) {
	case SNB_UNCORE_PCI_IMC_DATA_READS:
		base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
		idx = UNCORE_PMC_IDX_FIXED;
		break;
	case SNB_UNCORE_PCI_IMC_DATA_WRITES:
		base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
		idx = UNCORE_PMC_IDX_FIXED + 1;
		break;
	default:
		return -EINVAL;
	}

	/* must be done before validate_group */
	event->hw.event_base = base;
	event->hw.config = cfg;
	event->hw.idx = idx;

	/* no group validation needed, we have free running counters */

	return 0;
}

static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
{
	return 0;
}

static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
{
	struct intel_uncore_box *box = uncore_event_to_box(event);
	u64 count;

	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
		return;

	event->hw.state = 0;
	box->n_active++;

	list_add_tail(&event->active_entry, &box->active_list);

	count = snb_uncore_imc_read_counter(box, event);
	local64_set(&event->hw.prev_count, count);

	if (box->n_active == 1)
		uncore_pmu_start_hrtimer(box);
}

static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
{
	struct intel_uncore_box *box = uncore_event_to_box(event);
	struct hw_perf_event *hwc = &event->hw;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		box->n_active--;

		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
		hwc->state |= PERF_HES_STOPPED;

		list_del(&event->active_entry);

		if (box->n_active == 0)
			uncore_pmu_cancel_hrtimer(box);
	}

	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
		/*
		 * Drain the remaining delta count out of a event
		 * that we are disabling:
		 */
		uncore_perf_event_update(box, event);
		hwc->state |= PERF_HES_UPTODATE;
	}
}

static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
{
	struct intel_uncore_box *box = uncore_event_to_box(event);
	struct hw_perf_event *hwc = &event->hw;

	if (!box)
		return -ENODEV;

	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
	if (!(flags & PERF_EF_START))
		hwc->state |= PERF_HES_ARCH;

	snb_uncore_imc_event_start(event, 0);

	box->n_events++;

	return 0;
}

static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
{
	struct intel_uncore_box *box = uncore_event_to_box(event);
	int i;

	snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);

	for (i = 0; i < box->n_events; i++) {
		if (event == box->event_list[i]) {
			--box->n_events;
			break;
		}
	}
}

static int snb_pci2phy_map_init(int devid)
{
	struct pci_dev *dev = NULL;
	int bus;

	dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
	if (!dev)
		return -ENOTTY;

	bus = dev->bus->number;

	pcibus_to_physid[bus] = 0;

	pci_dev_put(dev);

	return 0;
}

static struct pmu snb_uncore_imc_pmu = {
	.task_ctx_nr	= perf_invalid_context,
	.event_init	= snb_uncore_imc_event_init,
	.add		= snb_uncore_imc_event_add,
	.del		= snb_uncore_imc_event_del,
	.start		= snb_uncore_imc_event_start,
	.stop		= snb_uncore_imc_event_stop,
	.read		= uncore_pmu_event_read,
};

static struct intel_uncore_ops snb_uncore_imc_ops = {
	.init_box	= snb_uncore_imc_init_box,
	.enable_box	= snb_uncore_imc_enable_box,
	.disable_box	= snb_uncore_imc_disable_box,
	.disable_event	= snb_uncore_imc_disable_event,
	.enable_event	= snb_uncore_imc_enable_event,
	.hw_config	= snb_uncore_imc_hw_config,
	.read_counter	= snb_uncore_imc_read_counter,
};

static struct intel_uncore_type snb_uncore_imc = {
	.name		= "imc",
	.num_counters   = 2,
	.num_boxes	= 1,
	.fixed_ctr_bits	= 32,
	.fixed_ctr	= SNB_UNCORE_PCI_IMC_CTR_BASE,
	.event_descs	= snb_uncore_imc_events,
	.format_group	= &snb_uncore_imc_format_group,
	.perf_ctr	= SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
	.event_mask	= SNB_UNCORE_PCI_IMC_EVENT_MASK,
	.ops		= &snb_uncore_imc_ops,
	.pmu		= &snb_uncore_imc_pmu,
};

static struct intel_uncore_type *snb_pci_uncores[] = {
	[SNB_PCI_UNCORE_IMC]	= &snb_uncore_imc,
	NULL,
};

static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
	{ /* IMC */
		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
	},
	{ /* end: all zeroes */ },
};

static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
	{ /* IMC */
		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
	},
	{ /* end: all zeroes */ },
};

static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = {
	{ /* IMC */
		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
	},
	{ /* end: all zeroes */ },
};

static struct pci_driver snb_uncore_pci_driver = {
	.name		= "snb_uncore",
	.id_table	= snb_uncore_pci_ids,
};

static struct pci_driver ivb_uncore_pci_driver = {
	.name		= "ivb_uncore",
	.id_table	= ivb_uncore_pci_ids,
};

static struct pci_driver hsw_uncore_pci_driver = {
	.name		= "hsw_uncore",
	.id_table	= hsw_uncore_pci_ids,
};

/* end of Sandy Bridge uncore support */

/* Nehalem uncore support */
@@ -2789,6 +3173,7 @@ static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_e
static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
{
	struct intel_uncore_box *box;
	struct perf_event *event;
	unsigned long flags;
	int bit;

@@ -2801,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
	 */
	local_irq_save(flags);

	/*
	 * handle boxes with an active event list as opposed to active
	 * counters
	 */
	list_for_each_entry(event, &box->active_list, active_entry) {
		uncore_perf_event_update(box, event);
	}

	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
		uncore_perf_event_update(box, box->events[bit]);

	local_irq_restore(flags);

	hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
	return HRTIMER_RESTART;
}

static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
	__hrtimer_start_range_ns(&box->hrtimer,
			ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
			ns_to_ktime(box->hrtimer_duration), 0,
			HRTIMER_MODE_REL_PINNED, 0);
}

@@ -2847,43 +3240,12 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
	box->cpu = -1;
	box->phys_id = -1;

	return box;
}
	/* set default hrtimer timeout */
	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;

static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
	struct intel_uncore_box *box;
	INIT_LIST_HEAD(&box->active_list);

	box = *per_cpu_ptr(pmu->box, cpu);
	if (box)
	return box;

	raw_spin_lock(&uncore_box_lock);
	list_for_each_entry(box, &pmu->box_list, list) {
		if (box->phys_id == topology_physical_package_id(cpu)) {
			atomic_inc(&box->refcnt);
			*per_cpu_ptr(pmu->box, cpu) = box;
			break;
		}
	}
	raw_spin_unlock(&uncore_box_lock);

	return *per_cpu_ptr(pmu->box, cpu);
}

static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
{
	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
}

static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
	/*
	 * perf core schedules event on the basis of cpu, uncore events are
	 * collected by one of the cpus inside a physical package.
	 */
	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}

static int
@@ -3279,6 +3641,7 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
	int ret;

	if (!pmu->type->pmu) {
		pmu->pmu = (struct pmu) {
			.attr_groups	= pmu->type->attr_groups,
			.task_ctx_nr	= perf_invalid_context,
@@ -3289,6 +3652,10 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
			.stop		= uncore_pmu_event_stop,
			.read		= uncore_pmu_event_read,
		};
	} else {
		pmu->pmu = *pmu->type->pmu;
		pmu->pmu.attr_groups = pmu->type->attr_groups;
	}

	if (pmu->type->num_boxes == 1) {
		if (strlen(pmu->type->name) > 0)
@@ -3502,6 +3869,28 @@ static int __init uncore_pci_init(void)
		pci_uncores = ivt_pci_uncores;
		uncore_pci_driver = &ivt_uncore_pci_driver;
		break;
	case 42: /* Sandy Bridge */
		ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
		if (ret)
			return ret;
		pci_uncores = snb_pci_uncores;
		uncore_pci_driver = &snb_uncore_pci_driver;
		break;
	case 58: /* Ivy Bridge */
		ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
		if (ret)
			return ret;
		pci_uncores = snb_pci_uncores;
		uncore_pci_driver = &ivb_uncore_pci_driver;
		break;
	case 60: /* Haswell */
	case 69: /* Haswell Celeron */
		ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
		if (ret)
			return ret;
		pci_uncores = snb_pci_uncores;
		uncore_pci_driver = &hsw_uncore_pci_driver;
		break;
	default:
		return 0;
	}
@@ -3773,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy)

static int __init uncore_cpu_init(void)
{
	int ret, cpu, max_cores;
	int ret, max_cores;

	max_cores = boot_cpu_data.x86_max_cores;
	switch (boot_cpu_data.x86_model) {
@@ -3817,29 +4206,6 @@ static int __init uncore_cpu_init(void)
	if (ret)
		return ret;

	get_online_cpus();

	for_each_online_cpu(cpu) {
		int i, phys_id = topology_physical_package_id(cpu);

		for_each_cpu(i, &uncore_cpu_mask) {
			if (phys_id == topology_physical_package_id(i)) {
				phys_id = -1;
				break;
			}
		}
		if (phys_id < 0)
			continue;

		uncore_cpu_prepare(cpu, phys_id);
		uncore_event_init_cpu(cpu);
	}
	on_each_cpu(uncore_cpu_setup, NULL, 1);

	register_cpu_notifier(&uncore_cpu_nb);

	put_online_cpus();

	return 0;
}

@@ -3868,6 +4234,41 @@ static int __init uncore_pmus_register(void)
	return 0;
}

static void __init uncore_cpumask_init(void)
{
	int cpu;

	/*
	 * ony invoke once from msr or pci init code
	 */
	if (!cpumask_empty(&uncore_cpu_mask))
		return;

	get_online_cpus();

	for_each_online_cpu(cpu) {
		int i, phys_id = topology_physical_package_id(cpu);

		for_each_cpu(i, &uncore_cpu_mask) {
			if (phys_id == topology_physical_package_id(i)) {
				phys_id = -1;
				break;
			}
		}
		if (phys_id < 0)
			continue;

		uncore_cpu_prepare(cpu, phys_id);
		uncore_event_init_cpu(cpu);
	}
	on_each_cpu(uncore_cpu_setup, NULL, 1);

	register_cpu_notifier(&uncore_cpu_nb);

	put_online_cpus();
}


static int __init intel_uncore_init(void)
{
	int ret;
@@ -3886,6 +4287,7 @@ static int __init intel_uncore_init(void)
		uncore_pci_exit();
		goto fail;
	}
	uncore_cpumask_init();

	uncore_pmus_register();
	return 0;
+5 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@

#define UNCORE_PMU_NAME_LEN		32
#define UNCORE_PMU_HRTIMER_INTERVAL	(60LL * NSEC_PER_SEC)
#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)

#define UNCORE_FIXED_EVENT		0xff
#define UNCORE_PMC_IDX_MAX_GENERIC	8
@@ -440,6 +441,7 @@ struct intel_uncore_type {
	struct intel_uncore_ops *ops;
	struct uncore_event_desc *event_descs;
	const struct attribute_group *attr_groups[4];
	struct pmu *pmu; /* for custom pmu ops */
};

#define pmu_group attr_groups[0]
@@ -488,8 +490,11 @@ struct intel_uncore_box {
	u64 tags[UNCORE_PMC_IDX_MAX];
	struct pci_dev *pci_dev;
	struct intel_uncore_pmu *pmu;
	u64 hrtimer_duration; /* hrtimer timeout for this box */
	struct hrtimer hrtimer;
	struct list_head list;
	struct list_head active_list;
	void *io_addr;
	struct intel_uncore_extra_reg shared_regs[0];
};

Loading