Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 49564a80 authored by Luca Coelho's avatar Luca Coelho
Browse files

iwlwifi: pcie: remove non-responsive device



If we fail to to grab NIC access because the device is not responding
(i.e. CSR_GP_CNTRL returns 0xFFFFFFFF), remove the device from the PCI
bus, to avoid any further damage, and to let the user space rescan.

In order to inform the userspace that a rescan is needed, we send a
kobject uevent with "INACCESSIBLE".

This functionality is disabled by default, but can be enabled via a
new module parameter called "remove_when_gone".  In the future we may
change this module parameter to include 3 modes instead: do nothing;
auto-rescan or; send uevent.

Signed-off-by: default avatarLuca Coelho <luciano.coelho@intel.com>
Signed-off-by: default avatarRajat Jain <rajatja@google.com>
parent de460ddd
Loading
Loading
Loading
Loading
+6 −0
Original line number Original line Diff line number Diff line
@@ -1850,3 +1850,9 @@ MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");


module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");

module_param_named(remove_when_gone,
		   iwlwifi_mod_params.remove_when_gone, bool,
		   0444);
MODULE_PARM_DESC(remove_when_gone,
		 "Remove dev from PCIe bus if it is deemed inaccessible (default: false)");
+2 −0
Original line number Original line Diff line number Diff line
@@ -122,6 +122,7 @@ enum iwl_uapsd_disable {
 * @lar_disable: disable LAR (regulatory), default = 0
 * @lar_disable: disable LAR (regulatory), default = 0
 * @fw_monitor: allow to use firmware monitor
 * @fw_monitor: allow to use firmware monitor
 * @disable_11ac: disable VHT capabilities, default = false.
 * @disable_11ac: disable VHT capabilities, default = false.
 * @remove_when_gone: remove an inaccessible device from the PCIe bus.
 */
 */
struct iwl_mod_params {
struct iwl_mod_params {
	int swcrypto;
	int swcrypto;
@@ -143,6 +144,7 @@ struct iwl_mod_params {
	bool lar_disable;
	bool lar_disable;
	bool fw_monitor;
	bool fw_monitor;
	bool disable_11ac;
	bool disable_11ac;
	bool remove_when_gone;
};
};


#endif /* #__iwl_modparams_h__ */
#endif /* #__iwl_modparams_h__ */
+5 −0
Original line number Original line Diff line number Diff line
@@ -383,6 +383,8 @@ struct iwl_self_init_dram {
 * @hw_init_mask: initial unmasked hw causes
 * @hw_init_mask: initial unmasked hw causes
 * @fh_mask: current unmasked fh causes
 * @fh_mask: current unmasked fh causes
 * @hw_mask: current unmasked hw causes
 * @hw_mask: current unmasked hw causes
 * @in_rescan: true if we have triggered a device rescan
 * @scheduled_for_removal: true if we have scheduled a device removal
 */
 */
struct iwl_trans_pcie {
struct iwl_trans_pcie {
	struct iwl_rxq *rxq;
	struct iwl_rxq *rxq;
@@ -464,6 +466,9 @@ struct iwl_trans_pcie {
	u32 fh_mask;
	u32 fh_mask;
	u32 hw_mask;
	u32 hw_mask;
	cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
	cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
	u16 tx_cmd_queue_size;
	bool in_rescan;
	bool scheduled_for_removal;
};
};


static inline struct iwl_trans_pcie *
static inline struct iwl_trans_pcie *
+71 −3
Original line number Original line Diff line number Diff line
@@ -75,6 +75,7 @@
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/vmalloc.h>
#include <linux/pm_runtime.h>
#include <linux/pm_runtime.h>
#include <linux/module.h>


#include "iwl-drv.h"
#include "iwl-drv.h"
#include "iwl-trans.h"
#include "iwl-trans.h"
@@ -1935,6 +1936,29 @@ static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state)
		clear_bit(STATUS_TPOWER_PMI, &trans->status);
		clear_bit(STATUS_TPOWER_PMI, &trans->status);
}
}


struct iwl_trans_pcie_removal {
	struct pci_dev *pdev;
	struct work_struct work;
};

static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
{
	struct iwl_trans_pcie_removal *removal =
		container_of(wk, struct iwl_trans_pcie_removal, work);
	struct pci_dev *pdev = removal->pdev;
	char *prop[] = {"EVENT=INACCESSIBLE", NULL};

	dev_err(&pdev->dev, "Device gone - attempting removal\n");
	kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop);
	pci_lock_rescan_remove();
	pci_dev_put(pdev);
	pci_stop_and_remove_bus_device(pdev);
	pci_unlock_rescan_remove();

	kfree(removal);
	module_put(THIS_MODULE);
}

static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
					   unsigned long *flags)
					   unsigned long *flags)
{
{
@@ -1977,11 +2001,55 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
			   (BIT(trans->cfg->csr->flag_mac_clock_ready) |
			   (BIT(trans->cfg->csr->flag_mac_clock_ready) |
			    CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
			    CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
	if (unlikely(ret < 0)) {
	if (unlikely(ret < 0)) {
		iwl_trans_pcie_dump_regs(trans);
		u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
		iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);

		WARN_ONCE(1,
		WARN_ONCE(1,
			  "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
			  "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
			  iwl_read32(trans, CSR_GP_CNTRL));
			  cntrl);

		iwl_trans_pcie_dump_regs(trans);

		if (iwlwifi_mod_params.remove_when_gone && cntrl == ~0U) {
			struct iwl_trans_pcie_removal *removal;

			if (trans_pcie->scheduled_for_removal)
				goto err;

			IWL_ERR(trans, "Device gone - scheduling removal!\n");

			/*
			 * get a module reference to avoid doing this
			 * while unloading anyway and to avoid
			 * scheduling a work with code that's being
			 * removed.
			 */
			if (!try_module_get(THIS_MODULE)) {
				IWL_ERR(trans,
					"Module is being unloaded - abort\n");
				goto err;
			}

			removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
			if (!removal) {
				module_put(THIS_MODULE);
				goto err;
			}
			/*
			 * we don't need to clear this flag, because
			 * the trans will be freed and reallocated.
			*/
			trans_pcie->scheduled_for_removal = true;

			removal->pdev = to_pci_dev(trans->dev);
			INIT_WORK(&removal->work, iwl_trans_pcie_removal_wk);
			pci_dev_get(removal->pdev);
			schedule_work(&removal->work);
		} else {
			iwl_write32(trans, CSR_RESET,
				    CSR_RESET_REG_FLAG_FORCE_NMI);
		}

err:
		spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
		spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
		return false;
		return false;
	}
	}