Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 05ba75f8 authored by Gavin Shan's avatar Gavin Shan Committed by Michael Ellerman
Browse files

powerpc/eeh: Fix stale cached primary bus



When PE is created, its primary bus is cached to pe->bus. At later
point, the cached primary bus is returned from eeh_pe_bus_get().
However, we could get stale cached primary bus and run into kernel
crash in one case: full hotplug as part of fenced PHB error recovery
releases all PCI busses under the PHB at unplugging time and recreate
them at plugging time. pe->bus is still dereferencing the PCI bus
that was released.

This adds another PE flag (EEH_PE_PRI_BUS) to represent the validity
of pe->bus. pe->bus is updated when its first child EEH device is
online and the flag is set. Before unplugging in full hotplug for
error recovery, the flag is cleared.

Fixes: 8cdb2833 ("powerpc/eeh: Trace PCI bus from PE")
Cc: stable@vger.kernel.org #v3.11+
Reported-by: default avatarAndrew Donnellan <andrew.donnellan@au1.ibm.com>
Reported-by: default avatarPradipta Ghosh <pradghos@in.ibm.com>
Signed-off-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: default avatarAndrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 126df08c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ struct pci_dn;
#define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
#define EEH_PE_CFG_RESTRICTED	(1 << 9)	/* Block config on error */
#define EEH_PE_REMOVED		(1 << 10)	/* Removed permanently	*/
#define EEH_PE_PRI_BUS		(1 << 11)	/* Cached primary bus   */

struct eeh_pe {
	int type;			/* PE type: PHB/Bus/Device	*/
+3 −0
Original line number Diff line number Diff line
@@ -564,6 +564,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
	 */
	eeh_pe_state_mark(pe, EEH_PE_KEEP);
	if (bus) {
		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
		pci_lock_rescan_remove();
		pcibios_remove_pci_devices(bus);
		pci_unlock_rescan_remove();
@@ -803,6 +804,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
	 * the their PCI config any more.
	 */
	if (frozen_bus) {
		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);

		pci_lock_rescan_remove();
@@ -886,6 +888,7 @@ static void eeh_handle_special_event(void)
					continue;

				/* Notify all devices to be down */
				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
				bus = eeh_pe_bus_get(phb_pe);
				eeh_pe_dev_traverse(pe,
					eeh_report_failure, NULL);
+1 −1
Original line number Diff line number Diff line
@@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
		bus = pe->phb->bus;
	} else if (pe->type & EEH_PE_BUS ||
		   pe->type & EEH_PE_DEVICE) {
		if (pe->bus) {
		if (pe->state & EEH_PE_PRI_BUS) {
			bus = pe->bus;
			goto out;
		}
+4 −1
Original line number Diff line number Diff line
@@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
	 * PCI devices of the PE are expected to be removed prior
	 * to PE reset.
	 */
	if (!edev->pe->bus)
	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
		edev->pe->bus = pci_find_bus(hose->global_number,
					     pdn->busno);
		if (edev->pe->bus)
			edev->pe->state |= EEH_PE_PRI_BUS;
	}

	/*
	 * Enable EEH explicitly so that we will do EEH check