Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e1c00e10 authored by Majd Dibbiny's avatar Majd Dibbiny Committed by David S. Miller
Browse files

net/mlx4_core: New init and exit flow for mlx4_core



In the new flow, we separate the pci initialization and teardown
from the initialization and teardown of the other resources.

__mlx4_init_one handles the pci resources initialization. It then
calls mlx4_load_one to initialize the remainder of the resources.

When removing a device, mlx4_remove_one is invoked. However, now
mlx4_remove_one calls mlx4_unload_one to free all the resources except the pci
resources. When mlx4_unload_one returns, mlx4_remove_one then frees the
pci resources.

The above separation will allow us to implement 'reset flow' in the future.
It will also enable more EQs for VFs and is a pre-step to the modern API to
enable/disable SRIOV.

Also added nvfs; an integer array of size MLX4_MAX_PORTS + 1; to the mlx4_dev
struct. This new field is used to avoid parsing the num_vfs module parameter
each time the mlx4_restart_one is called.

Signed-off-by: default avatarMajd Dibbiny <majd@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bbb07af4
Loading
Loading
Loading
Loading
+219 −174
Original line number Diff line number Diff line
@@ -2259,116 +2259,18 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
	iounmap(owner);
}

static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
{
	struct mlx4_priv *priv;
	struct mlx4_dev *dev;
	unsigned sum = 0;
	int err;
	int port;
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
	unsigned total_vfs = 0;
	int sriov_initialized = 0;
	unsigned int i;
	int i;
	int existing_vfs = 0;

	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));

	err = pci_enable_device(pdev);
	if (err) {
		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
		return err;
	}

	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
	 * per port, we must limit the number of VFs to 63 (since their are
	 * 128 MACs)
	 */
	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
		if (nvfs[i] < 0) {
			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
			return -EINVAL;
		}
	}
	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
	     i++) {
		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
			return -EINVAL;
		}
	}
	if (total_vfs >= MLX4_MAX_NUM_VF) {
		dev_err(&pdev->dev,
			"Requested more VF's (%d) than allowed (%d)\n",
			total_vfs, MLX4_MAX_NUM_VF - 1);
		return -EINVAL;
	}

	for (i = 0; i < MLX4_MAX_PORTS; i++) {
		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
			dev_err(&pdev->dev,
				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
				nvfs[i] + nvfs[2], i + 1,
				MLX4_MAX_NUM_VF_P_PORT - 1);
			return -EINVAL;
		}
	}


	/*
	 * Check for BARs.
	 */
	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
			pci_dev_data, pci_resource_flags(pdev, 0));
		err = -ENODEV;
		goto err_disable_pdev;
	}
	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing UAR, aborting\n");
		err = -ENODEV;
		goto err_disable_pdev;
	}

	err = pci_request_regions(pdev, DRV_NAME);
	if (err) {
		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
		goto err_disable_pdev;
	}

	pci_set_master(pdev);

	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}
	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}

	/* Allow large DMA segments, up to the firmware limit of 1 GB */
	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
	dev = &priv->dev;

	dev       = pci_get_drvdata(pdev);
	priv      = mlx4_priv(dev);
	dev->pdev = pdev;
	INIT_LIST_HEAD(&priv->ctx_list);
	spin_lock_init(&priv->ctx_lock);

@@ -2382,28 +2284,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)

	dev->rev_id = pdev->revision;
	dev->numa_node = dev_to_node(&pdev->dev);

	/* Detect if this device is a virtual function */
	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
		/* When acting as pf, we normally skip vfs unless explicitly
		 * requested to probe them. */
		if (total_vfs) {
			unsigned vfs_offset = 0;
			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
				     vfs_offset + nvfs[i] < extended_func_num(pdev);
			     vfs_offset += nvfs[i], i++)
				;
			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
				err = -ENODEV;
				goto err_free_dev;
			}
			if ((extended_func_num(pdev) - vfs_offset)
			    > prb_vf[i]) {
				mlx4_warn(dev, "Skipping virtual function:%d\n",
					  extended_func_num(pdev));
				err = -ENODEV;
				goto err_free_dev;
			}
		}
		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
		dev->flags |= MLX4_FLAG_SLAVE;
	} else {
@@ -2413,11 +2296,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
		err = mlx4_get_ownership(dev);
		if (err) {
			if (err < 0)
				goto err_free_dev;
				return err;
			else {
				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
				err = -EINVAL;
				goto err_free_dev;
				return -EINVAL;
			}
		}

@@ -2429,7 +2311,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
				GFP_KERNEL);
			if (NULL == dev->dev_vfs) {
				mlx4_err(dev, "Failed to allocate memory for VFs\n");
				err = 0;
				err = -ENOMEM;
				goto err_free_own;
			} else {
				atomic_inc(&pf_loading);
				existing_vfs = pci_num_vf(pdev);
@@ -2445,13 +2328,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
						 err);
					atomic_dec(&pf_loading);
					err = 0;
				} else {
					mlx4_warn(dev, "Running in master mode\n");
					dev->flags |= MLX4_FLAG_SRIOV |
						MLX4_FLAG_MASTER;
					dev->num_vfs = total_vfs;
					sriov_initialized = 1;
				}
			}
		}
@@ -2467,7 +2348,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
		err = mlx4_reset(dev);
		if (err) {
			mlx4_err(dev, "Failed to reset HCA, aborting\n");
			goto err_rel_own;
			goto err_sriov;
		}
	}

@@ -2517,14 +2398,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
	/* In master functions, the communication channel must be initialized
	 * after obtaining its address from fw */
	if (mlx4_is_master(dev)) {
		unsigned sum = 0;
		err = mlx4_multi_func_init(dev);
		if (err) {
			mlx4_err(dev, "Failed to init master mfunc interface, aborting\n");
			goto err_close;
		}
		if (sriov_initialized) {
		int ib_ports = 0;

		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
			ib_ports++;

@@ -2533,17 +2408,35 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
			mlx4_err(dev,
				 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
			err = -EINVAL;
				goto err_master_mfunc;
			goto err_close;
		}
		if (dev->caps.num_ports < 2 &&
		    num_vfs_argc > 1) {
			err = -EINVAL;
			mlx4_err(dev,
				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
				 dev->caps.num_ports);
			goto err_close;
		}
			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) {
		memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));

		for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
			unsigned j;
				for (j = 0; j < nvfs[i]; ++sum, ++j) {
					dev->dev_vfs[sum].min_port =
						i < 2 ? i + 1 : 1;

			for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
					dev->caps.num_ports;
			}
		}

		/* In master functions, the communication channel
		 * must be initialized after obtaining its address from fw
		 */
		err = mlx4_multi_func_init(dev);
		if (err) {
			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
			goto err_close;
		}
	}

@@ -2565,7 +2458,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
	if (!mlx4_is_slave(dev)) {
		err = mlx4_init_steering(dev);
		if (err)
			goto err_free_eq;
			goto err_disable_msix;
	}

	err = mlx4_setup_hca(dev);
@@ -2625,6 +2518,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
	if (!mlx4_is_slave(dev))
		mlx4_clear_steering(dev);

err_disable_msix:
	if (dev->flags & MLX4_FLAG_MSI_X)
		pci_disable_msix(pdev);

err_free_eq:
	mlx4_free_eq_table(dev);

@@ -2641,9 +2538,6 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
	}

err_close:
	if (dev->flags & MLX4_FLAG_MSI_X)
		pci_disable_msix(pdev);

	mlx4_close_hca(dev);

err_mfunc:
@@ -2657,17 +2551,151 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
		pci_disable_sriov(pdev);

err_rel_own:
	if (!mlx4_is_slave(dev))
		mlx4_free_ownership(dev);

	if (mlx4_is_master(dev) && dev->num_vfs)
		atomic_dec(&pf_loading);

	kfree(priv->dev.dev_vfs);

err_free_dev:
	kfree(priv);
err_free_own:
	if (!mlx4_is_slave(dev))
		mlx4_free_ownership(dev);

	return err;
}

static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
			   struct mlx4_priv *priv)
{
	int err;
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
	unsigned total_vfs = 0;
	unsigned int i;

	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));

	err = pci_enable_device(pdev);
	if (err) {
		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
		return err;
	}

	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
	 * per port, we must limit the number of VFs to 63 (since their are
	 * 128 MACs)
	 */
	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
		if (nvfs[i] < 0) {
			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}
	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
	     i++) {
		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}
	if (total_vfs >= MLX4_MAX_NUM_VF) {
		dev_err(&pdev->dev,
			"Requested more VF's (%d) than allowed (%d)\n",
			total_vfs, MLX4_MAX_NUM_VF - 1);
		err = -EINVAL;
		goto err_disable_pdev;
	}

	for (i = 0; i < MLX4_MAX_PORTS; i++) {
		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
			dev_err(&pdev->dev,
				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
				nvfs[i] + nvfs[2], i + 1,
				MLX4_MAX_NUM_VF_P_PORT - 1);
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}

	/* Check for BARs. */
	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
			pci_dev_data, pci_resource_flags(pdev, 0));
		err = -ENODEV;
		goto err_disable_pdev;
	}
	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing UAR, aborting\n");
		err = -ENODEV;
		goto err_disable_pdev;
	}

	err = pci_request_regions(pdev, DRV_NAME);
	if (err) {
		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
		goto err_disable_pdev;
	}

	pci_set_master(pdev);

	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}
	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}

	/* Allow large DMA segments, up to the firmware limit of 1 GB */
	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
	/* Detect if this device is a virtual function */
	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
		/* When acting as pf, we normally skip vfs unless explicitly
		 * requested to probe them.
		 */
		if (total_vfs) {
			unsigned vfs_offset = 0;

			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
			     vfs_offset + nvfs[i] < extended_func_num(pdev);
			     vfs_offset += nvfs[i], i++)
				;
			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
				err = -ENODEV;
				goto err_release_regions;
			}
			if ((extended_func_num(pdev) - vfs_offset)
			    > prb_vf[i]) {
				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
					 extended_func_num(pdev));
				err = -ENODEV;
				goto err_release_regions;
			}
		}
	}

	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
	if (err)
		goto err_release_regions;
	return 0;

err_release_regions:
	pci_release_regions(pdev);
@@ -2682,6 +2710,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
	struct mlx4_priv *priv;
	struct mlx4_dev *dev;
	int ret;

	printk_once(KERN_INFO "%s", mlx4_version);

@@ -2690,13 +2719,18 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
		return -ENOMEM;

	dev       = &priv->dev;
	dev->pdev = pdev;
	pci_set_drvdata(pdev, dev);
	priv->pci_dev_data = id->driver_data;

	return __mlx4_init_one(pdev, id->driver_data);
	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
	if (ret)
		kfree(priv);

	return ret;
}

static void __mlx4_remove_one(struct pci_dev *pdev)
static void mlx4_unload_one(struct pci_dev *pdev)
{
	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2775,8 +2809,6 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
	kfree(dev->caps.qp1_proxy);
	kfree(dev->dev_vfs);

	pci_release_regions(pdev);
	pci_disable_device(pdev);
	memset(priv, 0, sizeof(*priv));
	priv->pci_dev_data = pci_dev_data;
	priv->removed = 1;
@@ -2787,7 +2819,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
	struct mlx4_priv *priv = mlx4_priv(dev);

	__mlx4_remove_one(pdev);
	mlx4_unload_one(pdev);
	pci_release_regions(pdev);
	pci_disable_device(pdev);
	kfree(priv);
	pci_set_drvdata(pdev, NULL);
}
@@ -2796,11 +2830,22 @@ int mlx4_restart_one(struct pci_dev *pdev)
{
	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
	struct mlx4_priv *priv = mlx4_priv(dev);
	int		  pci_dev_data;
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int pci_dev_data, err, total_vfs;

	pci_dev_data = priv->pci_dev_data;
	__mlx4_remove_one(pdev);
	return __mlx4_init_one(pdev, pci_dev_data);
	total_vfs = dev->num_vfs;
	memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));

	mlx4_unload_one(pdev);
	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
	if (err) {
		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
			 __func__, pci_name(pdev), err);
		return err;
	}

	return err;
}

static const struct pci_device_id mlx4_pci_table[] = {
@@ -2854,7 +2899,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
					      pci_channel_state_t state)
{
	__mlx4_remove_one(pdev);
	mlx4_unload_one(pdev);

	return state == pci_channel_io_perm_failure ?
		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -2866,7 +2911,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
	struct mlx4_priv *priv = mlx4_priv(dev);
	int               ret;

	ret = __mlx4_init_one(pdev, priv->pci_dev_data);
	ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);

	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
@@ -2880,7 +2925,7 @@ static struct pci_driver mlx4_driver = {
	.name		= DRV_NAME,
	.id_table	= mlx4_pci_table,
	.probe		= mlx4_init_one,
	.shutdown	= __mlx4_remove_one,
	.shutdown	= mlx4_unload_one,
	.remove		= mlx4_remove_one,
	.err_handler    = &mlx4_err_handler,
};
+1 −0
Original line number Diff line number Diff line
@@ -707,6 +707,7 @@ struct mlx4_dev {
	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
	struct mlx4_vf_dev     *dev_vfs;
	int                     nvfs[MLX4_MAX_PORTS + 1];
};

struct mlx4_eqe {