Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f49a5b5 authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA/netdev: Use priv_destructor for netdev cleanup



Now that the unregister_netdev flow for IPoIB no longer relies on external
code we can now introduce the use of priv_destructor and
needs_free_netdev.

The rdma_netdev flow is switched to use the netdev common priv_destructor
instead of the special free_rdma_netdev and the IPOIB ULP adjusted:
 - priv_destructor needs to switch to point to the ULP's destructor
   which will then call the rdma_ndev's in the right order
 - We need to be careful around the error unwind of register_netdev
   as it sometimes calls priv_destructor on failure
 - ULPs need to use ndo_init/uninit to ensure proper ordering
   of failures around register_netdev

Switching to priv_destructor is a necessary pre-requisite to using
the rtnl new_link mechanism.

The VNIC user for rdma_netdev should also be revised, but that is left for
another patch.

Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarDenis Drozdov <denisd@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
parent eaeb3984
Loading
Loading
Loading
Loading
+0 −10
Original line number Diff line number Diff line
@@ -5157,11 +5157,6 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
	return num_counters;
}

static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
{
	return mlx5_rdma_netdev_free(netdev);
}

static struct net_device*
mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
			  u8 port_num,
@@ -5171,17 +5166,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
			  void (*setup)(struct net_device *))
{
	struct net_device *netdev;
	struct rdma_netdev *rn;

	if (type != RDMA_NETDEV_IPOIB)
		return ERR_PTR(-EOPNOTSUPP);

	netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
					name, setup);
	if (likely(!IS_ERR_OR_NULL(netdev))) {
		rn = netdev_priv(netdev);
		rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
	}
	return netdev;
}

+2 −0
Original line number Diff line number Diff line
@@ -323,6 +323,7 @@ struct ipoib_dev_priv {
	spinlock_t lock;

	struct net_device *dev;
	void (*next_priv_destructor)(struct net_device *dev);

	struct napi_struct send_napi;
	struct napi_struct recv_napi;
@@ -481,6 +482,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
	kref_put(&ah->ref, ipoib_free_ah);
}
int ipoib_open(struct net_device *dev);
void ipoib_intf_free(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);

+64 −37
Original line number Diff line number Diff line
@@ -2062,6 +2062,13 @@ void ipoib_setup_common(struct net_device *dev)
	netif_keep_dst(dev);

	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);

	/*
	 * unregister_netdev always frees the netdev, we use this mode
	 * consistently to unify all the various unregister paths, including
	 * those connected to rtnl_link_ops which require it.
	 */
	dev->needs_free_netdev = true;
}

static void ipoib_build_priv(struct net_device *dev)
@@ -2116,9 +2123,7 @@ static struct net_device
	rn->send = ipoib_send;
	rn->attach_mcast = ipoib_mcast_attach;
	rn->detach_mcast = ipoib_mcast_detach;
	rn->free_rdma_netdev = free_netdev;
	rn->hca = hca;

	dev->netdev_ops = &ipoib_netdev_default_pf;

	return dev;
@@ -2173,6 +2178,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,

	rn = netdev_priv(dev);
	rn->clnt_priv = priv;

	/*
	 * Only the child register_netdev flows can handle priv_destructor
	 * being set, so we force it to NULL here and handle manually until it
	 * is safe to turn on.
	 */
	priv->next_priv_destructor = dev->priv_destructor;
	dev->priv_destructor = NULL;

	ipoib_build_priv(dev);

	return priv;
@@ -2181,6 +2195,27 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
	return NULL;
}

void ipoib_intf_free(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
	struct rdma_netdev *rn = netdev_priv(dev);

	dev->priv_destructor = priv->next_priv_destructor;
	if (dev->priv_destructor)
		dev->priv_destructor(dev);

	/*
	 * There are some error flows around register_netdev failing that may
	 * attempt to call priv_destructor twice, prevent that from happening.
	 */
	dev->priv_destructor = NULL;

	/* unregister/destroy is very complicated. Make bugs more obvious. */
	rn->clnt_priv = NULL;

	kfree(priv);
}

static ssize_t show_pkey(struct device *dev,
			 struct device_attribute *attr, char *buf)
{
@@ -2341,7 +2376,7 @@ static struct net_device *ipoib_add_port(const char *format,
					 struct ib_device *hca, u8 port)
{
	struct ipoib_dev_priv *priv;
	struct rdma_netdev *rn;
	struct net_device *ndev;
	int result;

	priv = ipoib_intf_alloc(hca, port, format);
@@ -2349,6 +2384,7 @@ static struct net_device *ipoib_add_port(const char *format,
		pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
		return ERR_PTR(-ENOMEM);
	}
	ndev = priv->dev;

	INIT_IB_EVENT_HANDLER(&priv->event_handler,
			      priv->ca, ipoib_event);
@@ -2357,38 +2393,43 @@ static struct net_device *ipoib_add_port(const char *format,
	/* call event handler to ensure pkey in sync */
	queue_work(ipoib_workqueue, &priv->flush_heavy);

	result = register_netdev(priv->dev);
	result = register_netdev(ndev);
	if (result) {
		pr_warn("%s: couldn't register ipoib port %d; error %d\n",
			hca->name, port, result);
		ipoib_parent_unregister_pre(priv->dev);
		goto device_init_failed;

		ipoib_parent_unregister_pre(ndev);
		ipoib_intf_free(ndev);
		free_netdev(ndev);

		return ERR_PTR(result);
	}

	result = -ENOMEM;
	if (ipoib_cm_add_mode_attr(priv->dev))
	/*
	 * We cannot set priv_destructor before register_netdev because we
	 * need priv to be always valid during the error flow to execute
	 * ipoib_parent_unregister_pre(). Instead handle it manually and only
	 * enter priv_destructor mode once we are completely registered.
	 */
	ndev->priv_destructor = ipoib_intf_free;

	if (ipoib_cm_add_mode_attr(ndev))
		goto sysfs_failed;
	if (ipoib_add_pkey_attr(priv->dev))
	if (ipoib_add_pkey_attr(ndev))
		goto sysfs_failed;
	if (ipoib_add_umcast_attr(priv->dev))
	if (ipoib_add_umcast_attr(ndev))
		goto sysfs_failed;
	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
	if (device_create_file(&ndev->dev, &dev_attr_create_child))
		goto sysfs_failed;
	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
	if (device_create_file(&ndev->dev, &dev_attr_delete_child))
		goto sysfs_failed;

	return priv->dev;
	return ndev;

sysfs_failed:
	ipoib_parent_unregister_pre(priv->dev);
	unregister_netdev(priv->dev);

device_init_failed:
	rn = netdev_priv(priv->dev);
	rn->free_rdma_netdev(priv->dev);
	kfree(priv);

	return ERR_PTR(result);
	ipoib_parent_unregister_pre(ndev);
	unregister_netdev(ndev);
	return ERR_PTR(-ENOMEM);
}

static void ipoib_add_one(struct ib_device *device)
@@ -2426,33 +2467,19 @@ static void ipoib_add_one(struct ib_device *device)

static void ipoib_remove_one(struct ib_device *device, void *client_data)
{
	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
	struct ipoib_dev_priv *priv, *tmp;
	struct list_head *dev_list = client_data;

	if (!dev_list)
		return;

	list_for_each_entry_safe(priv, tmp, dev_list, list) {
		struct rdma_netdev *parent_rn = netdev_priv(priv->dev);

		ipoib_parent_unregister_pre(priv->dev);

		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
		mutex_lock(&priv->sysfs_mutex);
		unregister_netdev(priv->dev);
		mutex_unlock(&priv->sysfs_mutex);

		parent_rn->free_rdma_netdev(priv->dev);

		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
			struct rdma_netdev *child_rn;

			child_rn = netdev_priv(cpriv->dev);
			child_rn->free_rdma_netdev(cpriv->dev);
			kfree(cpriv);
		}

		kfree(priv);
	}

	kfree(dev_list);
+39 −29
Original line number Diff line number Diff line
@@ -50,31 +50,53 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
}
static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);

/*
 * NOTE: If this function fails then the priv->dev will remain valid, however
 * priv can have been freed and must not be touched by caller in the error
 * case.
 *
 * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
 * free the net_device (just as rtnl_newlink does) otherwise the net_device
 * will be freed when the rtnl is unlocked.
 */
int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
		     u16 pkey, int type)
{
	struct net_device *ndev = priv->dev;
	int result;

	ASSERT_RTNL();

	priv->parent = ppriv->dev;
	priv->pkey = pkey;
	priv->child_type = type;

	result = register_netdevice(priv->dev);
	/* We do not need to touch priv if register_netdevice fails */
	ndev->priv_destructor = ipoib_intf_free;

	result = register_netdevice(ndev);
	if (result) {
		ipoib_warn(priv, "failed to initialize; error %i", result);

		/*
		 * register_netdevice sometimes calls priv_destructor,
		 * sometimes not. Make sure it was done.
		 */
		if (ndev->priv_destructor)
			ndev->priv_destructor(ndev);
		return result;
	}

	/* RTNL childs don't need proprietary sysfs entries */
	if (type == IPOIB_LEGACY_CHILD) {
		if (ipoib_cm_add_mode_attr(priv->dev))
		if (ipoib_cm_add_mode_attr(ndev))
			goto sysfs_failed;
		if (ipoib_add_pkey_attr(priv->dev))
		if (ipoib_add_pkey_attr(ndev))
			goto sysfs_failed;
		if (ipoib_add_umcast_attr(priv->dev))
		if (ipoib_add_umcast_attr(ndev))
			goto sysfs_failed;

		if (device_create_file(&priv->dev->dev, &dev_attr_parent))
		if (device_create_file(&ndev->dev, &dev_attr_parent))
			goto sysfs_failed;
	}

@@ -91,6 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
{
	struct ipoib_dev_priv *ppriv, *priv;
	char intf_name[IFNAMSIZ];
	struct net_device *ndev;
	struct ipoib_dev_priv *tpriv;
	int result;

@@ -122,12 +145,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
		return restart_syscall();
	}

	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
	if (!priv) {
		result = -ENOMEM;
		goto out;
	}

	/*
	 * First ensure this isn't a duplicate. We check the parent device and
	 * then all of the legacy child interfaces to make sure the Pkey
@@ -146,21 +163,23 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
		}
	}

	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
	if (!priv) {
		result = -ENOMEM;
		goto out;
	}
	ndev = priv->dev;

	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);

	if (result && ndev->reg_state == NETREG_UNINITIALIZED)
		free_netdev(ndev);

out:
	up_write(&ppriv->vlan_rwsem);
	rtnl_unlock();
	mutex_unlock(&ppriv->sysfs_mutex);

	if (result && priv) {
		struct rdma_netdev *rn;

		rn = netdev_priv(priv->dev);
		rn->free_rdma_netdev(priv->dev);
		kfree(priv);
	}

	return result;
}

@@ -212,14 +231,5 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
	rtnl_unlock();
	mutex_unlock(&ppriv->sysfs_mutex);

	if (dev) {
		struct rdma_netdev *rn;

		rn = netdev_priv(dev);
		rn->free_rdma_netdev(priv->dev);
		kfree(priv);
		return 0;
	}

	return -ENODEV;
	return (dev) ? 0 : -ENODEV;
}
+19 −18
Original line number Diff line number Diff line
@@ -580,6 +580,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
	return 0;
}

static void mlx5_rdma_netdev_free(struct net_device *netdev)
{
	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
	struct mlx5i_priv *ipriv = priv->ppriv;
	const struct mlx5e_profile *profile = priv->profile;

	mlx5e_detach_netdev(priv);
	profile->cleanup(priv);
	destroy_workqueue(priv->wq);

	if (!ipriv->sub_interface) {
		mlx5i_pkey_qpn_ht_cleanup(netdev);
		mlx5e_destroy_mdev_resources(priv->mdev);
	}
}

struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
					  struct ib_device *ibdev,
					  const char *name,
@@ -653,6 +669,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
	rn->detach_mcast = mlx5i_detach_mcast;
	rn->set_id = mlx5i_set_pkey_index;

	netdev->priv_destructor = mlx5_rdma_netdev_free;
	netdev->needs_free_netdev = 1;

	return netdev;

destroy_ht:
@@ -665,21 +684,3 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
	return NULL;
}
EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);

void mlx5_rdma_netdev_free(struct net_device *netdev)
{
	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
	struct mlx5i_priv *ipriv = priv->ppriv;
	const struct mlx5e_profile *profile = priv->profile;

	mlx5e_detach_netdev(priv);
	profile->cleanup(priv);
	destroy_workqueue(priv->wq);

	if (!ipriv->sub_interface) {
		mlx5i_pkey_qpn_ht_cleanup(netdev);
		mlx5e_destroy_mdev_resources(priv->mdev);
	}
	free_netdev(netdev);
}
EXPORT_SYMBOL(mlx5_rdma_netdev_free);
Loading