Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 357b2f3d authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt
Browse files

powerpc/eeh: Dump PE location code



As Ben suggested, it's meaningful to dump PE's location code
for site engineers when hitting EEH errors. The patch introduces
function eeh_pe_loc_get() to retireve the location code from
dev-tree so that we can output it when hitting EEH errors.

If primary PE bus is root bus, the PHB's dev-node would be tried
prior to root port's dev-node. Otherwise, the upstream bridge's
dev-node of the primary PE bus will be check for the location code
directly.

Signed-off-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent d4e58e59
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root,
void *eeh_pe_dev_traverse(struct eeh_pe *root,
		eeh_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);

void *eeh_dev_init(struct device_node *dn, void *data);
+8 −5
Original line number Diff line number Diff line
@@ -330,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
	eeh_serialize_unlock(flags);

	pr_err("EEH: PHB#%x failure detected\n",
		phb_pe->phb->global_number);
	pr_err("EEH: PHB#%x failure detected, location: %s\n",
		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
	dump_stack();
	eeh_send_failure_event(phb_pe);

@@ -362,7 +362,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
	unsigned long flags;
	struct device_node *dn;
	struct pci_dev *dev;
	struct eeh_pe *pe, *parent_pe;
	struct eeh_pe *pe, *parent_pe, *phb_pe;
	int rc = 0;
	const char *location;

@@ -481,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
	 * a stack trace will help the device-driver authors figure
	 * out what happened.  So print that out.
	 */
	pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
		pe->addr, pe->phb->global_number);
	phb_pe = eeh_phb_pe_get(pe->phb);
	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
	       pe->phb->global_number, pe->addr);
	pr_err("EEH: PE location: %s, PHB location: %s\n",
	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
	dump_stack();

	eeh_send_failure_event(pe);
+60 −0
Original line number Diff line number Diff line
@@ -791,6 +791,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
	eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
}

/**
 * eeh_pe_loc_get - Retrieve location code binding to the given PE
 * @pe: EEH PE
 *
 * Retrieve the location code of the given PE. If the primary PE bus
 * is root bus, we will grab location code from PHB device tree node
 * or root port. Otherwise, the upstream bridge's device tree node
 * of the primary PE bus will be checked for the location code.
 */
const char *eeh_pe_loc_get(struct eeh_pe *pe)
{
	struct pci_controller *hose;
	struct pci_bus *bus = eeh_pe_bus_get(pe);
	struct pci_dev *pdev;
	struct device_node *dn;
	const char *loc;

	if (!bus)
		return "N/A";

	/* PHB PE or root PE ? */
	if (pci_is_root_bus(bus)) {
		hose = pci_bus_to_host(bus);
		loc = of_get_property(hose->dn,
				"ibm,loc-code", NULL);
		if (loc)
			return loc;
		loc = of_get_property(hose->dn,
				"ibm,io-base-loc-code", NULL);
		if (loc)
			return loc;

		pdev = pci_get_slot(bus, 0x0);
	} else {
		pdev = bus->self;
	}

	if (!pdev) {
		loc = "N/A";
		goto out;
	}

	dn = pci_device_to_OF_node(pdev);
	if (!dn) {
		loc = "N/A";
		goto out;
	}

	loc = of_get_property(dn, "ibm,loc-code", NULL);
	if (!loc)
		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
	if (!loc)
		loc = "N/A";

out:
	if (pci_is_root_bus(bus) && pdev)
		pci_dev_put(pdev);
	return loc;
}

/**
 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
 * @pe: EEH PE
+15 −6
Original line number Diff line number Diff line
@@ -774,19 +774,24 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
		case OPAL_EEH_PHB_ERROR:
			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
				*pe = phb_pe;
				pr_err("EEH: dead PHB#%x detected\n",
					hose->global_number);
				pr_err("EEH: dead PHB#%x detected, "
				       "location: %s\n",
				       hose->global_number,
				       eeh_pe_loc_get(phb_pe));
				ret = EEH_NEXT_ERR_DEAD_PHB;
			} else if (be16_to_cpu(severity) ==
						OPAL_EEH_SEV_PHB_FENCED) {
				*pe = phb_pe;
				pr_err("EEH: fenced PHB#%x detected\n",
					hose->global_number);
				pr_err("EEH: Fenced PHB#%x detected, "
				       "location: %s\n",
				       hose->global_number,
				       eeh_pe_loc_get(phb_pe));
				ret = EEH_NEXT_ERR_FENCED_PHB;
			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
				pr_info("EEH: PHB#%x informative error "
					"detected\n",
					hose->global_number);
					"detected, location: %s\n",
					hose->global_number,
					eeh_pe_loc_get(phb_pe));
				ioda_eeh_phb_diag(hose);
				ret = EEH_NEXT_ERR_NONE;
			}
@@ -802,6 +807,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
				/* Try best to clear it */
				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
					hose->global_number, frozen_pe_no);
				pr_info("EEH: PHB location: %s\n",
					eeh_pe_loc_get(phb_pe));
				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
				ret = EEH_NEXT_ERR_NONE;
@@ -810,6 +817,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
			} else {
				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
					(*pe)->addr, (*pe)->phb->global_number);
				pr_err("EEH: PE location: %s, PHB location: %s\n",
					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
				ret = EEH_NEXT_ERR_FROZEN_PE;
			}