Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1d44e828 authored by Jack Steiner's avatar Jack Steiner Committed by Ingo Molnar
Browse files

x86, UV: Fix NMI handler for UV platforms



This fixes problems seen on UV systems handling NMIs from the
node controller.

I isolated the "dazed..." messages that I saw earlier to a bug in
the BMC on our platform. It was sending NMIs w/o properly setting
a register that indicated the source of NMI.

So rather than _assuming_ any unhandled NMI came from the UV system
maintenance console (SMC), add a check to verify that the SMC actually
sent the NMI.

Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Cc: gorcunov@gmail.com
Cc: dzickus@redhat.com
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 693d92a1
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -398,6 +398,8 @@ struct uv_blade_info {
	unsigned short	nr_online_cpus;
	unsigned short	nr_online_cpus;
	unsigned short	pnode;
	unsigned short	pnode;
	short		memory_nid;
	short		memory_nid;
	spinlock_t	nmi_lock;
	unsigned long	nmi_count;
};
};
extern struct uv_blade_info *uv_blade_info;
extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade;
extern short *uv_node_to_blade;
+15 −1
Original line number Original line Diff line number Diff line
@@ -5,7 +5,7 @@
 *
 *
 * SGI UV MMR definitions
 * SGI UV MMR definitions
 *
 *
 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
 */
 */


#ifndef _ASM_X86_UV_UV_MMRS_H
#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
    } s;
    } s;
};
};


/* ========================================================================= */
/*                               UVH_SCRATCH5                                */
/* ========================================================================= */
#define UVH_SCRATCH5 0x2d0200UL
#define UVH_SCRATCH5_32 0x00778

#define UVH_SCRATCH5_SCRATCH5_SHFT 0
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
union uvh_scratch5_u {
    unsigned long	v;
    struct uvh_scratch5_s {
	unsigned long	scratch5 : 64;  /* RW, W1CS */
    } s;
};


#endif /* __ASM_UV_MMRS_X86_H__ */
#endif /* __ASM_UV_MMRS_X86_H__ */
+43 −5
Original line number Original line Diff line number Diff line
@@ -37,6 +37,13 @@
#include <asm/smp.h>
#include <asm/smp.h>
#include <asm/x86_init.h>
#include <asm/x86_init.h>
#include <asm/emergency-restart.h>
#include <asm/emergency-restart.h>
#include <asm/nmi.h>

/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR				UVH_SCRATCH5
#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
#define UV_NMI_PENDING_MASK			(1UL << 63)
DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);


DEFINE_PER_CPU(int, x2apic_extra_bits);
DEFINE_PER_CPU(int, x2apic_extra_bits);


@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
 */
 */
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
{
	unsigned long real_uv_nmi;
	int bid;

	if (reason != DIE_NMIUNKNOWN)
	if (reason != DIE_NMIUNKNOWN)
		return NOTIFY_OK;
		return NOTIFY_OK;


	if (in_crash_kexec)
	if (in_crash_kexec)
		/* do nothing if entering the crash kernel */
		/* do nothing if entering the crash kernel */
		return NOTIFY_OK;
		return NOTIFY_OK;

	/*
	 * Each blade has an MMR that indicates when an NMI has been sent
	 * to cpus on the blade. If an NMI is detected, atomically
	 * clear the MMR and update a per-blade NMI count used to
	 * cause each cpu on the blade to notice a new NMI.
	 */
	bid = uv_numa_blade_id();
	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);

	if (unlikely(real_uv_nmi)) {
		spin_lock(&uv_blade_info[bid].nmi_lock);
		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
		if (real_uv_nmi) {
			uv_blade_info[bid].nmi_count++;
			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
		}
		spin_unlock(&uv_blade_info[bid].nmi_lock);
	}

	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
		return NOTIFY_DONE;

	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;

	/*
	/*
	 * Use a lock so only one cpu prints at a time
	 * Use a lock so only one cpu prints at a time.
	 * to prevent intermixed output.
	 * This prevents intermixed output.
	 */
	 */
	spin_lock(&uv_nmi_lock);
	spin_lock(&uv_nmi_lock);
	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
	dump_stack();
	dump_stack();
	spin_unlock(&uv_nmi_lock);
	spin_unlock(&uv_nmi_lock);


@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
}
}


static struct notifier_block uv_dump_stack_nmi_nb = {
static struct notifier_block uv_dump_stack_nmi_nb = {
	.notifier_call	= uv_handle_nmi
	.notifier_call	= uv_handle_nmi,
	.priority = NMI_LOCAL_LOW_PRIOR - 1,
};
};


void uv_register_nmi_notifier(void)
void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());


	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
	BUG_ON(!uv_blade_info);
	BUG_ON(!uv_blade_info);

	for (blade = 0; blade < uv_num_possible_blades(); blade++)
	for (blade = 0; blade < uv_num_possible_blades(); blade++)
		uv_blade_info[blade].memory_nid = -1;
		uv_blade_info[blade].memory_nid = -1;


@@ -747,6 +784,7 @@ void __init uv_system_init(void)
			uv_blade_info[blade].pnode = pnode;
			uv_blade_info[blade].pnode = pnode;
			uv_blade_info[blade].nr_possible_cpus = 0;
			uv_blade_info[blade].nr_possible_cpus = 0;
			uv_blade_info[blade].nr_online_cpus = 0;
			uv_blade_info[blade].nr_online_cpus = 0;
			spin_lock_init(&uv_blade_info[blade].nmi_lock);
			max_pnode = max(pnode, max_pnode);
			max_pnode = max(pnode, max_pnode);
			blade++;
			blade++;
		}
		}