Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a450e8f5 authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt
Browse files

powerpc/eeh: Dump PHB diag-data early



On PowerNV platform, PHB diag-data is dumped after stopping device
drivers. In case of recursive EEH errors, the kernel is usually
crashed before dumping PHB diag-data for the second EEH error. It's
hard to locate the root cause of the second EEH error without PHB
diag-data.

The patch adds one more EEH option "eeh=early_log", which helps
dumping PHB diag-data immediately once frozen PE is detected, in
order to get the PHB diag-data for the second EEH error.

Signed-off-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent b1d76a7d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct device_node;
#define EEH_PROBE_MODE_DEV	0x04	/* From PCI device	*/
#define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree	*/
#define EEH_ENABLE_IO_FOR_LOG	0x10	/* Enable IO for log	*/
#define EEH_EARLY_DUMP_LOG	0x20	/* Dump log immediately	*/

/*
 * Delay for PE reset, all in ms
+2 −0
Original line number Diff line number Diff line
@@ -143,6 +143,8 @@ static int __init eeh_setup(char *str)
{
	if (!strcmp(str, "off"))
		eeh_add_flag(EEH_FORCE_DISABLED);
	else if (!strcmp(str, "early_log"))
		eeh_add_flag(EEH_EARLY_DUMP_LOG);

	return 1;
}
+12 −1
Original line number Diff line number Diff line
@@ -353,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
	} else if (!(pe->state & EEH_PE_ISOLATED)) {
		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
		ioda_eeh_phb_diag(pe);

		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
	}

	return result;
@@ -451,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)

		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
		ioda_eeh_phb_diag(pe);

		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
	}

	return result;
@@ -730,6 +736,7 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
			    char *drv_log, unsigned long len)
{
	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);

	return 0;
@@ -1086,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
		    !((*pe)->state & EEH_PE_ISOLATED)) {
			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
			ioda_eeh_phb_diag(*pe);

			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
				pnv_pci_dump_phb_diag_data((*pe)->phb,
							   (*pe)->data);
		}

		/*