Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53e82697 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'hv_netvsc-fix-races-during-shutdown-and-changes'



Stephen Hemminger says:

====================
hv_netvsc: fix races during shutdown and changes

This set of patches fixes issues identified by Vitaly Kuznetsov and
Mohammed Gamal related to state changes in Hyper-v network driver.

A lot of the issues are because setting up the netvsc device requires
a second step (in work queue) to get all the sub-channels running.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 68e2ffde 7b2ee50c
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -212,7 +212,6 @@ void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);

void rndis_set_subchannel(struct work_struct *w);
bool rndis_filter_opened(const struct netvsc_device *nvdev);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+26 −26
Original line number Diff line number Diff line
@@ -90,6 +90,11 @@ static void free_netvsc_device(struct rcu_head *head)
		= container_of(head, struct netvsc_device, rcu);
	int i;

	kfree(nvdev->extension);
	vfree(nvdev->recv_buf);
	vfree(nvdev->send_buf);
	kfree(nvdev->send_section_map);

	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
		vfree(nvdev->chan_table[i].mrc.slots);

@@ -211,12 +216,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
		vfree(net_device->recv_buf);
		net_device->recv_buf = NULL;
	}

	if (net_device->send_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(device->channel,
					   net_device->send_buf_gpadl_handle);
@@ -231,12 +230,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
		}
		net_device->send_buf_gpadl_handle = 0;
	}
	if (net_device->send_buf) {
		/* Free up the send buffer */
		vfree(net_device->send_buf);
		net_device->send_buf = NULL;
	}
	kfree(net_device->send_section_map);
}

int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
@@ -562,27 +555,30 @@ void netvsc_device_remove(struct hv_device *device)
		= rtnl_dereference(net_device_ctx->nvdev);
	int i;

	cancel_work_sync(&net_device->subchan_work);

	netvsc_revoke_buf(device, net_device);

	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);

	/* And disassociate NAPI context from device */
	for (i = 0; i < net_device->num_chn; i++)
		netif_napi_del(&net_device->chan_table[i].napi);

	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
	netdev_dbg(ndev, "net device safe to remove\n");

	/* older versions require that buffer be revoked before close */
	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
		netvsc_teardown_gpadl(device, net_device);

	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

	if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
		netvsc_teardown_gpadl(device, net_device);

	/* And dissassociate NAPI context from device */
	for (i = 0; i < net_device->num_chn; i++)
		netif_napi_del(&net_device->chan_table[i].napi);

	/* Release all resources */
	free_netvsc_device_rcu(net_device);
}
@@ -645,16 +641,20 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
	queue_sends =
		atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);

	if (net_device->destroy && queue_sends == 0)
	if (unlikely(net_device->destroy)) {
		if (queue_sends == 0)
			wake_up(&net_device->wait_drain);
	} else {
		struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);

	if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
		if (netif_tx_queue_stopped(txq) &&
		    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
		     queue_sends < 1)) {
		netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
			netif_tx_wake_queue(txq);
			ndev_ctx->eth_stats.wake_queue++;
		}
	}
}

static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct vmbus_channel *incoming_channel,
+155 −123
Original line number Diff line number Diff line
@@ -47,6 +47,9 @@
#include "hyperv_net.h"

#define RING_SIZE_MIN	64
#define RETRY_US_LO	5000
#define RETRY_US_HI	10000
#define RETRY_MAX	2000	/* >10 sec */

#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
@@ -123,10 +126,8 @@ static int netvsc_open(struct net_device *net)
	}

	rdev = nvdev->extension;
	if (!rdev->link_state) {
	if (!rdev->link_state)
		netif_carrier_on(net);
		netif_tx_wake_all_queues(net);
	}

	if (vf_netdev) {
		/* Setting synthetic device up transparently sets
@@ -142,36 +143,25 @@ static int netvsc_open(struct net_device *net)
	return 0;
}

static int netvsc_close(struct net_device *net)
static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct net_device *vf_netdev
		= rtnl_dereference(net_device_ctx->vf_netdev);
	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
	int ret = 0;
	u32 aread, i, msec = 10, retry = 0, retry_max = 20;
	struct vmbus_channel *chn;

	netif_tx_disable(net);

	/* No need to close rndis filter if it is removed already */
	if (!nvdev)
		goto out;

	ret = rndis_filter_close(nvdev);
	if (ret != 0) {
		netdev_err(net, "unable to close device (ret %d).\n", ret);
		return ret;
	}
	unsigned int retry = 0;
	int i;

	/* Ensure pending bytes in ring are read */
	while (true) {
		aread = 0;
	for (;;) {
		u32 aread = 0;

		for (i = 0; i < nvdev->num_chn; i++) {
			chn = nvdev->chan_table[i].channel;
			struct vmbus_channel *chn
				= nvdev->chan_table[i].channel;

			if (!chn)
				continue;

			/* make sure receive not running now */
			napi_synchronize(&nvdev->chan_table[i].napi);

			aread = hv_get_bytes_to_read(&chn->inbound);
			if (aread)
				break;
@@ -181,22 +171,40 @@ static int netvsc_close(struct net_device *net)
				break;
		}

		retry++;
		if (retry > retry_max || aread == 0)
			break;
		if (aread == 0)
			return 0;

		msleep(msec);
		if (++retry > RETRY_MAX)
			return -ETIMEDOUT;

		if (msec < 1000)
			msec *= 2;
		usleep_range(RETRY_US_LO, RETRY_US_HI);
	}
}

	if (aread) {
		netdev_err(net, "Ring buffer not empty after closing rndis\n");
		ret = -ETIMEDOUT;
static int netvsc_close(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct net_device *vf_netdev
		= rtnl_dereference(net_device_ctx->vf_netdev);
	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
	int ret;

	netif_tx_disable(net);

	/* No need to close rndis filter if it is removed already */
	if (!nvdev)
		return 0;

	ret = rndis_filter_close(nvdev);
	if (ret != 0) {
		netdev_err(net, "unable to close device (ret %d).\n", ret);
		return ret;
	}

out:
	ret = netvsc_wait_until_empty(nvdev);
	if (ret)
		netdev_err(net, "Ring buffer not empty after closing rndis\n");

	if (vf_netdev)
		dev_close(vf_netdev);

@@ -845,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net,
	}
}

static int netvsc_detach(struct net_device *ndev,
			 struct netvsc_device *nvdev)
{
	struct net_device_context *ndev_ctx = netdev_priv(ndev);
	struct hv_device *hdev = ndev_ctx->device_ctx;
	int ret;

	/* Don't try continuing to try and setup sub channels */
	if (cancel_work_sync(&nvdev->subchan_work))
		nvdev->num_chn = 1;

	/* If device was up (receiving) then shutdown */
	if (netif_running(ndev)) {
		netif_tx_disable(ndev);

		ret = rndis_filter_close(nvdev);
		if (ret) {
			netdev_err(ndev,
				   "unable to close device (ret %d).\n", ret);
			return ret;
		}

		ret = netvsc_wait_until_empty(nvdev);
		if (ret) {
			netdev_err(ndev,
				   "Ring buffer not empty after closing rndis\n");
			return ret;
		}
	}

	netif_device_detach(ndev);

	rndis_filter_device_remove(hdev, nvdev);

	return 0;
}

static int netvsc_attach(struct net_device *ndev,
			 struct netvsc_device_info *dev_info)
{
	struct net_device_context *ndev_ctx = netdev_priv(ndev);
	struct hv_device *hdev = ndev_ctx->device_ctx;
	struct netvsc_device *nvdev;
	struct rndis_device *rdev;
	int ret;

	nvdev = rndis_filter_device_add(hdev, dev_info);
	if (IS_ERR(nvdev))
		return PTR_ERR(nvdev);

	/* Note: enable and attach happen when sub-channels setup */

	netif_carrier_off(ndev);

	if (netif_running(ndev)) {
		ret = rndis_filter_open(nvdev);
		if (ret)
			return ret;

		rdev = nvdev->extension;
		if (!rdev->link_state)
			netif_carrier_on(ndev);
	}

	return 0;
}

static int netvsc_set_channels(struct net_device *net,
			       struct ethtool_channels *channels)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_device *dev = net_device_ctx->device_ctx;
	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
	unsigned int orig, count = channels->combined_count;
	struct netvsc_device_info device_info;
	bool was_opened;
	int ret = 0;
	int ret;

	/* We do not support separate count for rx, tx, or other */
	if (count == 0 ||
@@ -871,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net,
		return -EINVAL;

	orig = nvdev->num_chn;
	was_opened = rndis_filter_opened(nvdev);
	if (was_opened)
		rndis_filter_close(nvdev);

	memset(&device_info, 0, sizeof(device_info));
	device_info.num_chn = count;
@@ -882,27 +952,16 @@ static int netvsc_set_channels(struct net_device *net,
	device_info.recv_sections = nvdev->recv_section_cnt;
	device_info.recv_section_size = nvdev->recv_section_size;

	rndis_filter_device_remove(dev, nvdev);
	ret = netvsc_detach(net, nvdev);
	if (ret)
		return ret;

	nvdev = rndis_filter_device_add(dev, &device_info);
	if (IS_ERR(nvdev)) {
		ret = PTR_ERR(nvdev);
	ret = netvsc_attach(net, &device_info);
	if (ret) {
		device_info.num_chn = orig;
		nvdev = rndis_filter_device_add(dev, &device_info);

		if (IS_ERR(nvdev)) {
			netdev_err(net, "restoring channel setting failed: %ld\n",
				   PTR_ERR(nvdev));
			return ret;
		if (netvsc_attach(net, &device_info))
			netdev_err(net, "restoring channel setting failed\n");
	}
	}

	if (was_opened)
		rndis_filter_open(nvdev);

	/* We may have missed link change notifications */
	net_device_ctx->last_reconfig = 0;
	schedule_delayed_work(&net_device_ctx->dwork, 0);

	return ret;
}
@@ -969,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
	struct hv_device *hdev = ndevctx->device_ctx;
	int orig_mtu = ndev->mtu;
	struct netvsc_device_info device_info;
	bool was_opened;
	int ret = 0;

	if (!nvdev || nvdev->destroy)
@@ -985,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
			return ret;
	}

	netif_device_detach(ndev);
	was_opened = rndis_filter_opened(nvdev);
	if (was_opened)
		rndis_filter_close(nvdev);

	memset(&device_info, 0, sizeof(device_info));
	device_info.num_chn = nvdev->num_chn;
	device_info.send_sections = nvdev->send_section_cnt;
@@ -997,36 +1049,28 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
	device_info.recv_sections = nvdev->recv_section_cnt;
	device_info.recv_section_size = nvdev->recv_section_size;

	rndis_filter_device_remove(hdev, nvdev);
	ret = netvsc_detach(ndev, nvdev);
	if (ret)
		goto rollback_vf;

	ndev->mtu = mtu;

	nvdev = rndis_filter_device_add(hdev, &device_info);
	if (IS_ERR(nvdev)) {
		ret = PTR_ERR(nvdev);
	ret = netvsc_attach(ndev, &device_info);
	if (ret)
		goto rollback;

	return 0;

rollback:
	/* Attempt rollback to original MTU */
	ndev->mtu = orig_mtu;
		nvdev = rndis_filter_device_add(hdev, &device_info);

	if (netvsc_attach(ndev, &device_info))
		netdev_err(ndev, "restoring mtu failed\n");
rollback_vf:
	if (vf_netdev)
		dev_set_mtu(vf_netdev, orig_mtu);

		if (IS_ERR(nvdev)) {
			netdev_err(ndev, "restoring mtu failed: %ld\n",
				   PTR_ERR(nvdev));
			return ret;
		}
	}

	if (was_opened)
		rndis_filter_open(nvdev);

	netif_device_attach(ndev);

	/* We may have missed link change notifications */
	schedule_delayed_work(&ndevctx->dwork, 0);

	return ret;
}

@@ -1531,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
	struct hv_device *hdev = ndevctx->device_ctx;
	struct netvsc_device_info device_info;
	struct ethtool_ringparam orig;
	u32 new_tx, new_rx;
	bool was_opened;
	int ret = 0;

	if (!nvdev || nvdev->destroy)
@@ -1560,34 +1602,18 @@ static int netvsc_set_ringparam(struct net_device *ndev,
	device_info.recv_sections = new_rx;
	device_info.recv_section_size = nvdev->recv_section_size;

	netif_device_detach(ndev);
	was_opened = rndis_filter_opened(nvdev);
	if (was_opened)
		rndis_filter_close(nvdev);

	rndis_filter_device_remove(hdev, nvdev);

	nvdev = rndis_filter_device_add(hdev, &device_info);
	if (IS_ERR(nvdev)) {
		ret = PTR_ERR(nvdev);
	ret = netvsc_detach(ndev, nvdev);
	if (ret)
		return ret;

	ret = netvsc_attach(ndev, &device_info);
	if (ret) {
		device_info.send_sections = orig.tx_pending;
		device_info.recv_sections = orig.rx_pending;
		nvdev = rndis_filter_device_add(hdev, &device_info);
		if (IS_ERR(nvdev)) {
			netdev_err(ndev, "restoring ringparam failed: %ld\n",
				   PTR_ERR(nvdev));
			return ret;
		}
	}

	if (was_opened)
		rndis_filter_open(nvdev);
	netif_device_attach(ndev);

	/* We may have missed link change notifications */
	ndevctx->last_reconfig = 0;
	schedule_delayed_work(&ndevctx->dwork, 0);
		if (netvsc_attach(ndev, &device_info))
			netdev_err(ndev, "restoring ringparam failed");
	}

	return ret;
}
@@ -2072,8 +2098,8 @@ static int netvsc_probe(struct hv_device *dev,
static int netvsc_remove(struct hv_device *dev)
{
	struct net_device_context *ndev_ctx;
	struct net_device *vf_netdev;
	struct net_device *net;
	struct net_device *vf_netdev, *net;
	struct netvsc_device *nvdev;

	net = hv_get_drvdata(dev);
	if (net == NULL) {
@@ -2083,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev)

	ndev_ctx = netdev_priv(net);

	netif_device_detach(net);

	cancel_delayed_work_sync(&ndev_ctx->dwork);

	rcu_read_lock();
	nvdev = rcu_dereference(ndev_ctx->nvdev);

	if  (nvdev)
		cancel_work_sync(&nvdev->subchan_work);

	/*
	 * Call to the vsc driver to let it know that the device is being
	 * removed. Also blocks mtu and channel changes.
@@ -2096,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev)
	if (vf_netdev)
		netvsc_unregister_vf(vf_netdev);

	if (nvdev)
		rndis_filter_device_remove(dev, nvdev);

	unregister_netdevice(net);

	rndis_filter_device_remove(dev,
				   rtnl_dereference(ndev_ctx->nvdev));
	rtnl_unlock();
	rcu_read_unlock();

	hv_set_drvdata(dev, NULL);

+23 −33
Original line number Diff line number Diff line
@@ -264,13 +264,23 @@ static void rndis_set_link_state(struct rndis_device *rdev,
	}
}

static void rndis_filter_receive_response(struct rndis_device *dev,
				       struct rndis_message *resp)
static void rndis_filter_receive_response(struct net_device *ndev,
					  struct netvsc_device *nvdev,
					  const struct rndis_message *resp)
{
	struct rndis_device *dev = nvdev->extension;
	struct rndis_request *request = NULL;
	bool found = false;
	unsigned long flags;
	struct net_device *ndev = dev->ndev;

	/* This should never happen, it means control message
	 * response received after device removed.
	 */
	if (dev->state == RNDIS_DEV_UNINITIALIZED) {
		netdev_err(ndev,
			   "got rndis message uninitialized\n");
		return;
	}

	spin_lock_irqsave(&dev->request_lock, flags);
	list_for_each_entry(request, &dev->req_list, list_ent) {
@@ -352,7 +362,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)

static int rndis_filter_receive_data(struct net_device *ndev,
				     struct netvsc_device *nvdev,
				     struct rndis_device *dev,
				     struct rndis_message *msg,
				     struct vmbus_channel *channel,
				     void *data, u32 data_buflen)
@@ -372,7 +381,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
	 * should be the data packet size plus the trailer padding size
	 */
	if (unlikely(data_buflen < rndis_pkt->data_len)) {
		netdev_err(dev->ndev, "rndis message buffer "
		netdev_err(ndev, "rndis message buffer "
			   "overflow detected (got %u, min %u)"
			   "...dropping this message!\n",
			   data_buflen, rndis_pkt->data_len);
@@ -400,35 +409,20 @@ int rndis_filter_receive(struct net_device *ndev,
			 void *data, u32 buflen)
{
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct rndis_device *rndis_dev = net_dev->extension;
	struct rndis_message *rndis_msg = data;

	/* Make sure the rndis device state is initialized */
	if (unlikely(!rndis_dev)) {
		netif_dbg(net_device_ctx, rx_err, ndev,
			  "got rndis message but no rndis device!\n");
		return NVSP_STAT_FAIL;
	}

	if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
		netif_dbg(net_device_ctx, rx_err, ndev,
			  "got rndis message uninitialized\n");
		return NVSP_STAT_FAIL;
	}

	if (netif_msg_rx_status(net_device_ctx))
		dump_rndis_message(ndev, rndis_msg);

	switch (rndis_msg->ndis_msg_type) {
	case RNDIS_MSG_PACKET:
		return rndis_filter_receive_data(ndev, net_dev,
						 rndis_dev, rndis_msg,
		return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
						 channel, data, buflen);
	case RNDIS_MSG_INIT_C:
	case RNDIS_MSG_QUERY_C:
	case RNDIS_MSG_SET_C:
		/* completion msgs */
		rndis_filter_receive_response(rndis_dev, rndis_msg);
		rndis_filter_receive_response(ndev, net_dev, rndis_msg);
		break;

	case RNDIS_MSG_INDICATE:
@@ -1124,6 +1118,7 @@ void rndis_set_subchannel(struct work_struct *w)
	for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
		ndev_ctx->tx_table[i] = i % nvdev->num_chn;

	netif_device_attach(ndev);
	rtnl_unlock();
	return;

@@ -1134,6 +1129,8 @@ void rndis_set_subchannel(struct work_struct *w)

	nvdev->max_chn = 1;
	nvdev->num_chn = 1;

	netif_device_attach(ndev);
unlock:
	rtnl_unlock();
}
@@ -1336,6 +1333,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
		net_device->num_chn = 1;
	}

	/* No sub channels, device is ready */
	if (net_device->num_chn == 1)
		netif_device_attach(net);

	return net_device;

err_dev_remv:
@@ -1348,16 +1349,12 @@ void rndis_filter_device_remove(struct hv_device *dev,
{
	struct rndis_device *rndis_dev = net_dev->extension;

	/* Don't try and setup sub channels if about to halt */
	cancel_work_sync(&net_dev->subchan_work);

	/* Halt and release the rndis device */
	rndis_filter_halt_device(rndis_dev);

	net_dev->extension = NULL;

	netvsc_device_remove(dev);
	kfree(rndis_dev);
}

int rndis_filter_open(struct netvsc_device *nvdev)
@@ -1375,10 +1372,3 @@ int rndis_filter_close(struct netvsc_device *nvdev)

	return rndis_filter_close_device(nvdev->extension);
}

bool rndis_filter_opened(const struct netvsc_device *nvdev)
{
	const struct rndis_device *dev = nvdev->extension;

	return dev->state == RNDIS_DEV_DATAINITIALIZED;
}