Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bc579ae5 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-next'



Or Gerlitz says:

====================
mlx4: Fix and enhance the device reset flow

This series from Yishai Hadas fixes the device reset flow and adds SRIOV support.

Reset flows are required whenever a device experiences errors, is unresponsive,
or is not in a deterministic state. In such cases, the driver is expected to
reset the HW and continue operation. When SRIOV is enabled, these requirements
apply both to PF and VF devices.

Currently, the mlx4 reset flow doesn't work properly: when a fatal error is
detected on the FW internal buffer the chip is not reset and stays in its
bad state. There are cases that assumed to be fatal such as non-responsive FW,
errors via closing commands but are not handled today.

The AER mechanism should also be fixed:
- It should use mlx4_load_one instead of __mlx4_init_one which is done
  upon HCA probing.
- It must be aligned with concurrent catas flow, mark device to be in
  an error state, reset chip, etc.
- Port types should be restored to their original values before error occurred.

In addition, there the SRIOV use-case isn't supported.

In above cases when the device state becomes fatal we must act as follows:
1) Reset the chip and mark the HW device state as in fatal error.
2) Wake up any pending commands, preventing new ones to come in.
3) Restart the software stack.

We also address the SRIOV mode as follows: In case the PF detects a fatal error,
it lets VFs know about that, then both itself and VFs are restarted asynchronously.
However, in case only the VF encountered a fatal case or forced to be reset, they
reset the VF stuff and then restart software.

changes from V0:

No need to call pci_disable_device upon permanent PCI error. This will
be done as part of mlx4_remove_one which is called later once we
return PCI_ERS_RESULT_DISCONNECT from the pci error handler.

Initial toggle value should use only the T bit and not the whole byte value.
Not doing so sometimes broke SRIOV as of junky value seen by the VF as a
non-ready comm channel
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7aee42c6 0cd93027
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
			continue;

		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
		if (slave_id >= dev->dev->num_vfs + 1)
		if (slave_id >= dev->dev->persist->num_vfs + 1)
			return;
		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
		form_cache_ag = get_cached_alias_guid(dev, port_num,
+2 −1
Original line number Diff line number Diff line
@@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
	ctx->ib_dev = &dev->ib_dev;

	for (i = 0;
	     i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
	     i < min(dev->dev->caps.sqp_demux,
	     (u16)(dev->dev->persist->num_vfs + 1));
	     i++) {
		struct mlx4_active_ports actv_ports =
			mlx4_get_active_ports(dev->dev, i);
+10 −7
Original line number Diff line number Diff line
@@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,

	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
		0xffffff;
	props->vendor_part_id	   = dev->dev->pdev->device;
	props->vendor_part_id	   = dev->dev->persist->pdev->device;
	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);

@@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
{
	struct mlx4_ib_dev *dev =
		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
	return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}

static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
@@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
	int i;

	if (mlx4_is_master(ibdev->dev)) {
		for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
		     ++slave) {
			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
				for (i = 0;
				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
@@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
		for (j = 0; j < eq_per_port; j++) {
			snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
				 i, j, dev->pdev->bus->name);
				 i, j, dev->persist->pdev->bus->name);
			/* Set IRQ for specific name (per ring) */
			if (mlx4_assign_eq(dev, name, NULL,
					   &ibdev->eq_table[eq])) {
@@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
	if (!ibdev) {
		dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
		dev_err(&dev->persist->pdev->dev,
			"Device struct alloc failed\n");
		return NULL;
	}

@@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
	ibdev->num_ports		= num_ports;
	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
	ibdev->ib_dev.dma_device	= &dev->persist->pdev->dev;

	if (dev->caps.userspace_caps)
		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
				sizeof(long),
				GFP_KERNEL);
		if (!ibdev->ib_uc_qpns_bitmap) {
			dev_err(&dev->pdev->dev, "bit map alloc failed\n");
			dev_err(&dev->persist->pdev->dev,
				"bit map alloc failed\n");
			goto err_steer_qp_release;
		}

+4 −2
Original line number Diff line number Diff line
@@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
	if (!mfrpl->ibfrpl.page_list)
		goto err_free;

	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
						     pdev->dev,
						     size, &mfrpl->map,
						     GFP_KERNEL);
	if (!mfrpl->mapped_page_list)
@@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
	int size = page_list->max_page_list_len * sizeof (u64);

	dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
	dma_free_coherent(&dev->dev->persist->pdev->dev, size,
			  mfrpl->mapped_page_list,
			  mfrpl->map);
	kfree(mfrpl->ibfrpl.page_list);
	kfree(mfrpl);
+3 −3
Original line number Diff line number Diff line
@@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
	char base_name[9];

	/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
	strlcpy(name, pci_name(dev->dev->pdev), max);
	strlcpy(name, pci_name(dev->dev->persist->pdev), max);
	strncpy(base_name, name, 8); /*till xxxx:yy:*/
	base_name[8] = '\0';
	/* with no ARI only 3 last bits are used so when the fn is higher than 8
@@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
	if (!mlx4_is_master(device->dev))
		return 0;

	for (i = 0; i <= device->dev->num_vfs; ++i)
	for (i = 0; i <= device->dev->persist->num_vfs; ++i)
		register_one_pkey_tree(device, i);

	return 0;
@@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
	if (!mlx4_is_master(device->dev))
		return;

	for (slave = device->dev->num_vfs; slave >= 0; --slave) {
	for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
		list_for_each_entry_safe(p, t,
					 &device->pkeys.pkey_port_list[slave],
					 entry) {
Loading