Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 20f34165 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

Merge tag 'ras_for_4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into acpi-apei

Pull GHES changes for 4.2 from Borislav Petkov:

"GHES: Seriously speedup and cleanup NMI handler (Jiri Kosina and Borislav Petkov)

This is the result of us seeing this during boot

[   24.332560] INFO: NMI handler (ghes_notify_nmi) took too long to run: 3.265 msecs
[   24.332567] INFO: NMI handler (ghes_notify_nmi) took too long to run: 5.946 msecs
[   24.332568] INFO: NMI handler (ghes_notify_nmi) took too long to run: 5.948 msecs

and a report of people running perf stat and the machine softlocking.
"hest_disable" was helping in this case, which made us look at that
particular NMI handler. It was grabbing a lock each time it is run and
on each CPU. But this is not needed as the GHES sources are global and
they need only a single reader.

This patchset does that and cleans up the handler in the process."

* tag 'ras_for_4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  GHES: Make NMI handler have a single reader
  GHES: Elliminate double-loop in the NMI handler
  GHES: Panic right after detection
  GHES: Carve out the panic functionality
  GHES: Carve out error queueing in a separate function
parents 5ebe6afa 6fe9e7c2
Loading
Loading
Loading
Loading
+55 −53
Original line number Diff line number Diff line
@@ -729,10 +729,10 @@ static struct llist_head ghes_estatus_llist;
static struct irq_work ghes_proc_irq_work;

/*
 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
 * mutual exclusion.
 * NMI may be triggered on any CPU, so ghes_in_nmi is used for
 * having only one concurrent reader.
 */
static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
static atomic_t ghes_in_nmi = ATOMIC_INIT(0);

static LIST_HEAD(ghes_nmi);

@@ -797,73 +797,75 @@ static void ghes_print_queued_estatus(void)
	}
}

static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
/* Save estatus for further processing in IRQ context */
static void __process_error(struct ghes *ghes)
{
	struct ghes *ghes, *ghes_global = NULL;
	int sev, sev_global = -1;
	int ret = NMI_DONE;
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
	u32 len, node_len;
	struct ghes_estatus_node *estatus_node;
	struct acpi_hest_generic_status *estatus;

	raw_spin_lock(&ghes_nmi_lock);
	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
		if (ghes_read_estatus(ghes, 1)) {
			ghes_clear_estatus(ghes);
			continue;
		}
		sev = ghes_severity(ghes->estatus->error_severity);
		if (sev > sev_global) {
			sev_global = sev;
			ghes_global = ghes;
		}
		ret = NMI_HANDLED;
	}
	if (ghes_estatus_cached(ghes->estatus))
		return;

	if (ret == NMI_DONE)
		goto out;
	len = cper_estatus_len(ghes->estatus);
	node_len = GHES_ESTATUS_NODE_LEN(len);

	if (sev_global >= GHES_SEV_PANIC) {
	estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
	if (!estatus_node)
		return;

	estatus_node->ghes = ghes;
	estatus_node->generic = ghes->generic;
	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
	memcpy(estatus, ghes->estatus, len);
	llist_add(&estatus_node->llnode, &ghes_estatus_llist);
#endif
}

static void __ghes_panic(struct ghes *ghes)
{
	oops_begin();
	ghes_print_queued_estatus();
		__ghes_print_estatus(KERN_EMERG, ghes_global->generic,
				     ghes_global->estatus);
	__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);

	/* reboot to log the error! */
	if (panic_timeout == 0)
		panic_timeout = ghes_panic_timeout;
	panic("Fatal hardware error!");
}

static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
	struct ghes *ghes;
	int sev, ret = NMI_DONE;

	if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
		return ret;

	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
		u32 len, node_len;
		struct ghes_estatus_node *estatus_node;
		struct acpi_hest_generic_status *estatus;
#endif
		if (!(ghes->flags & GHES_TO_CLEAR))
		if (ghes_read_estatus(ghes, 1)) {
			ghes_clear_estatus(ghes);
			continue;
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
		if (ghes_estatus_cached(ghes->estatus))
			goto next;
		/* Save estatus for further processing in IRQ context */
		len = cper_estatus_len(ghes->estatus);
		node_len = GHES_ESTATUS_NODE_LEN(len);
		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
						      node_len);
		if (estatus_node) {
			estatus_node->ghes = ghes;
			estatus_node->generic = ghes->generic;
			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
			memcpy(estatus, ghes->estatus, len);
			llist_add(&estatus_node->llnode, &ghes_estatus_llist);
		}
next:
#endif

		sev = ghes_severity(ghes->estatus->error_severity);
		if (sev >= GHES_SEV_PANIC)
			__ghes_panic(ghes);

		if (!(ghes->flags & GHES_TO_CLEAR))
			continue;

		__process_error(ghes);
		ghes_clear_estatus(ghes);

		ret = NMI_HANDLED;
	}

#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
	irq_work_queue(&ghes_proc_irq_work);
#endif

out:
	raw_spin_unlock(&ghes_nmi_lock);
	atomic_dec(&ghes_in_nmi);
	return ret;
}