Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 81fa846a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'netvsc-NAPI'



Stephen Hemminger says:

====================
NAPI support for Hyper-V

These patches enable NAPI, GRO and napi_alloc_skb for Hyper-V netvsc
driver.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1a4691b2 e91e7dd7
Loading
Loading
Loading
Loading
+93 −1
Original line number Diff line number Diff line
@@ -32,6 +32,8 @@

#include "hyperv_vmbus.h"

#define VMBUS_PKT_TRAILER	8

/*
 * When we write to the ring buffer, check if the host needs to
 * be signaled. Here is the details of this protocol:
@@ -336,6 +338,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
	return 0;
}

static inline void
init_cached_read_index(struct hv_ring_buffer_info *rbi)
{
	rbi->cached_read_index = rbi->ring_buffer->read_index;
}

int hv_ringbuffer_read(struct vmbus_channel *channel,
		       void *buffer, u32 buflen, u32 *buffer_actual_len,
		       u64 *requestid, bool raw)
@@ -366,7 +374,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
		return ret;
	}

	init_cached_read_index(channel);
	init_cached_read_index(inring_info);

	next_read_location = hv_get_next_read_location(inring_info);
	next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
						    sizeof(desc),
@@ -410,3 +419,86 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,

	return ret;
}

/*
 * Determine number of bytes available in ring buffer after
 * the current iterator (priv_read_index) location.
 *
 * This is similar to hv_get_bytes_to_read but with private
 * read index instead.
 */
static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
{
	u32 priv_read_loc = rbi->priv_read_index;
	u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);

	if (write_loc >= priv_read_loc)
		return write_loc - priv_read_loc;
	else
		return (rbi->ring_datasize - priv_read_loc) + write_loc;
}

/*
 * Get first vmbus packet from ring buffer after read_index
 *
 * If ring buffer is empty, returns NULL and no other action needed.
 */
struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
{
	struct hv_ring_buffer_info *rbi = &channel->inbound;

	/* set state for later hv_signal_on_read() */
	init_cached_read_index(rbi);

	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
		return NULL;

	return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_first);

/*
 * Get next vmbus packet from ring buffer.
 *
 * Advances the current location (priv_read_index) and checks for more
 * data. If the end of the ring buffer is reached, then return NULL.
 */
struct vmpacket_descriptor *
__hv_pkt_iter_next(struct vmbus_channel *channel,
		   const struct vmpacket_descriptor *desc)
{
	struct hv_ring_buffer_info *rbi = &channel->inbound;
	u32 packetlen = desc->len8 << 3;
	u32 dsize = rbi->ring_datasize;

	/* bump offset to next potential packet */
	rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
	if (rbi->priv_read_index >= dsize)
		rbi->priv_read_index -= dsize;

	/* more data? */
	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
		return NULL;
	else
		return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
}
EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);

/*
 * Update host ring buffer after iterating over packets.
 */
void hv_pkt_iter_close(struct vmbus_channel *channel)
{
	struct hv_ring_buffer_info *rbi = &channel->inbound;

	/*
	 * Make sure all reads are done before we update the read index since
	 * the writer may start writing to the read area once the read index
	 * is updated.
	 */
	virt_rmb();
	rbi->ring_buffer->read_index = rbi->priv_read_index;

	hv_signal_on_read(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
+2 −0
Original line number Diff line number Diff line
@@ -196,6 +196,7 @@ int netvsc_recv_callback(struct net_device *net,
			 const struct ndis_tcp_ip_checksum_info *csum_info,
			 const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
int rndis_filter_device_add(struct hv_device *dev,
@@ -720,6 +721,7 @@ struct net_device_context {
/* Per channel data */
struct netvsc_channel {
	struct vmbus_channel *channel;
	struct napi_struct napi;
	struct multi_send_data msd;
	struct multi_recv_comp mrc;
	atomic_t queue_sends;
+107 −65
Original line number Diff line number Diff line
@@ -556,6 +556,7 @@ void netvsc_device_remove(struct hv_device *device)
	struct net_device *ndev = hv_get_drvdata(device);
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct netvsc_device *net_device = net_device_ctx->nvdev;
	int i;

	netvsc_disconnect_vsp(device);

@@ -570,6 +571,9 @@ void netvsc_device_remove(struct hv_device *device)
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
		napi_disable(&net_device->chan_table[0].napi);

	/* Release all resources */
	free_netvsc_device(net_device);
}
@@ -600,9 +604,9 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
static void netvsc_send_tx_complete(struct netvsc_device *net_device,
				    struct vmbus_channel *incoming_channel,
				    struct hv_device *device,
				    struct vmpacket_descriptor *packet)
				    const struct vmpacket_descriptor *desc)
{
	struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
	struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
	struct net_device *ndev = hv_get_drvdata(device);
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct vmbus_channel *channel = device->channel;
@@ -647,14 +651,11 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct vmbus_channel *incoming_channel,
				   struct hv_device *device,
				   struct vmpacket_descriptor *packet)
				   const struct vmpacket_descriptor *desc)
{
	struct nvsp_message *nvsp_packet;
	struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
	struct net_device *ndev = hv_get_drvdata(device);

	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
					      (packet->offset8 << 3));

	switch (nvsp_packet->hdr.msg_type) {
	case NVSP_MSG_TYPE_INIT_COMPLETE:
	case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -668,7 +669,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,

	case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
		netvsc_send_tx_complete(net_device, incoming_channel,
					device, packet);
					device, desc);
		break;

	default:
@@ -1066,28 +1067,29 @@ static inline struct recv_comp_data *get_recv_comp_slot(
	return rcd;
}

static void netvsc_receive(struct net_device *ndev,
static int netvsc_receive(struct net_device *ndev,
		   struct netvsc_device *net_device,
		   struct net_device_context *net_device_ctx,
		   struct hv_device *device,
		   struct vmbus_channel *channel,
		   struct vmtransfer_page_packet_header *vmxferpage_packet,
		   const struct vmpacket_descriptor *desc,
		   struct nvsp_message *nvsp)
{
	const struct vmtransfer_page_packet_header *vmxferpage_packet
		= container_of(desc, const struct vmtransfer_page_packet_header, d);
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	char *recv_buf = net_device->recv_buf;
	u32 status = NVSP_STAT_SUCCESS;
	int i;
	int count = 0;
	int ret;
	struct recv_comp_data *rcd;
	u16 q_idx = channel->offermsg.offer.sub_channel_index;

	/* Make sure this is a valid nvsp packet */
	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
		netif_err(net_device_ctx, rx_err, ndev,
			  "Unknown nvsp packet type received %u\n",
			  nvsp->hdr.msg_type);
		return;
		return 0;
	}

	if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
@@ -1095,7 +1097,7 @@ static void netvsc_receive(struct net_device *ndev,
			  "Invalid xfer page set id - expecting %x got %x\n",
			  NETVSC_RECEIVE_BUFFER_ID,
			  vmxferpage_packet->xfer_pageset_id);
		return;
		return 0;
	}

	count = vmxferpage_packet->range_cnt;
@@ -1111,26 +1113,26 @@ static void netvsc_receive(struct net_device *ndev,
					      channel, data, buflen);
	}

	if (!net_device->chan_table[q_idx].mrc.buf) {
	if (net_device->chan_table[q_idx].mrc.buf) {
		struct recv_comp_data *rcd;

		rcd = get_recv_comp_slot(net_device, channel, q_idx);
		if (rcd) {
			rcd->tid = vmxferpage_packet->d.trans_id;
			rcd->status = status;
		} else {
			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
				   q_idx, vmxferpage_packet->d.trans_id);
		}
	} else {
		ret = netvsc_send_recv_completion(channel,
						  vmxferpage_packet->d.trans_id,
						  status);
		if (ret)
			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
				   q_idx, vmxferpage_packet->d.trans_id, ret);
		return;
	}

	rcd = get_recv_comp_slot(net_device, channel, q_idx);

	if (!rcd) {
		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
			   q_idx, vmxferpage_packet->d.trans_id);
		return;
	}

	rcd->tid = vmxferpage_packet->d.trans_id;
	rcd->status = status;
	return count;
}

static void netvsc_send_table(struct hv_device *hdev,
@@ -1180,17 +1182,15 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
	}
}

static void netvsc_process_raw_pkt(struct hv_device *device,
static int netvsc_process_raw_pkt(struct hv_device *device,
				  struct vmbus_channel *channel,
				  struct netvsc_device *net_device,
				  struct net_device *ndev,
				  u64 request_id,
				   struct vmpacket_descriptor *desc)
				  const struct vmpacket_descriptor *desc)
{
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct nvsp_message *nvmsg
		= (struct nvsp_message *)((unsigned long)desc
					  + (desc->offset8 << 3));
	struct nvsp_message *nvmsg = hv_pkt_data(desc);

	switch (desc->type) {
	case VM_PKT_COMP:
@@ -1198,10 +1198,8 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
		break;

	case VM_PKT_DATA_USING_XFER_PAGES:
		netvsc_receive(ndev, net_device, net_device_ctx,
			       device, channel,
			       (struct vmtransfer_page_packet_header *)desc,
			       nvmsg);
		return netvsc_receive(ndev, net_device, net_device_ctx,
				      device, channel, desc, nvmsg);
		break;

	case VM_PKT_DATA_INBAND:
@@ -1213,22 +1211,63 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
			   desc->type, request_id);
		break;
	}

	return 0;
}

static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
{
	struct vmbus_channel *primary = channel->primary_channel;

	return primary ? primary->device_obj : channel->device_obj;
}

int netvsc_poll(struct napi_struct *napi, int budget)
{
	struct netvsc_channel *nvchan
		= container_of(napi, struct netvsc_channel, napi);
	struct vmbus_channel *channel = nvchan->channel;
	struct hv_device *device = netvsc_channel_to_device(channel);
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	struct net_device *ndev = hv_get_drvdata(device);
	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
	const struct vmpacket_descriptor *desc;
	int work_done = 0;

	desc = hv_pkt_iter_first(channel);
	while (desc) {
		int count;

		count = netvsc_process_raw_pkt(device, channel, net_device,
					       ndev, desc->trans_id, desc);
		work_done += count;
		desc = __hv_pkt_iter_next(channel, desc);

		/* If receive packet budget is exhausted, reschedule */
		if (work_done >= budget) {
			work_done = budget;
			break;
		}
	}
	hv_pkt_iter_close(channel);

	/* If ring is empty and NAPI is not doing polling */
	if (work_done < budget &&
	    napi_complete_done(napi, work_done) &&
	    hv_end_read(&channel->inbound) != 0)
		napi_reschedule(napi);

	netvsc_chk_recv_comp(net_device, channel, q_idx);
	return work_done;
}

void netvsc_channel_cb(void *context)
{
	struct vmbus_channel *channel = context;
	struct hv_device *device = netvsc_channel_to_device(channel);
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	struct hv_device *device;
	struct netvsc_device *net_device;
	struct vmpacket_descriptor *desc;
	struct net_device *ndev;
	bool need_to_commit = false;

	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

	ndev = hv_get_drvdata(device);
	if (unlikely(!ndev))
@@ -1239,21 +1278,9 @@ void netvsc_channel_cb(void *context)
	    netvsc_channel_idle(net_device, q_idx))
		return;

	/* commit_rd_index() -> hv_signal_on_read() needs this. */
	init_cached_read_index(channel);

	while ((desc = get_next_pkt_raw(channel)) != NULL) {
		netvsc_process_raw_pkt(device, channel, net_device,
				       ndev, desc->trans_id, desc);

		put_pkt_raw(channel, desc);
		need_to_commit = true;
	}

	if (need_to_commit)
		commit_rd_index(channel);

	netvsc_chk_recv_comp(net_device, channel, q_idx);
	/* disable interupts from host */
	hv_begin_read(&channel->inbound);
	napi_schedule(&net_device->chan_table[q_idx].napi);
}

/*
@@ -1275,6 +1302,11 @@ int netvsc_device_add(struct hv_device *device,

	net_device->ring_size = ring_size;

	/* Because the device uses NAPI, all the interrupt batching and
	 * control is done via Net softirq, not the channel handling
	 */
	set_channel_read_mode(device->channel, HV_CALL_ISR);

	/* Open the channel */
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
@@ -1292,8 +1324,16 @@ int netvsc_device_add(struct hv_device *device,
	 * chn_table with the default channel to use it before subchannels are
	 * opened.
	 */
	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
		net_device->chan_table[i].channel = device->channel;
	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
		struct netvsc_channel *nvchan = &net_device->chan_table[i];

		nvchan->channel = device->channel;
		netif_napi_add(ndev, &nvchan->napi,
			       netvsc_poll, NAPI_POLL_WEIGHT);
	}

	/* Enable NAPI handler for init callbacks */
	napi_enable(&net_device->chan_table[0].napi);

	/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
	 * populated.
@@ -1313,6 +1353,8 @@ int netvsc_device_add(struct hv_device *device,
	return ret;

close:
	napi_disable(&net_device->chan_table[0].napi);

	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

+8 −11
Original line number Diff line number Diff line
@@ -589,13 +589,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
}

static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
					     struct napi_struct *napi,
					     const struct ndis_tcp_ip_checksum_info *csum_info,
					     const struct ndis_pkt_8021q_info *vlan,
					     void *data, u32 buflen)
{
	struct sk_buff *skb;

	skb = netdev_alloc_skb_ip_align(net, buflen);
	skb = napi_alloc_skb(napi, buflen);
	if (!skb)
		return skb;

@@ -642,11 +643,11 @@ int netvsc_recv_callback(struct net_device *net,
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct netvsc_device *net_device = net_device_ctx->nvdev;
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
	struct net_device *vf_netdev;
	struct sk_buff *skb;
	struct netvsc_stats *rx_stats;
	u16 q_idx = channel->offermsg.offer.sub_channel_index;


	if (net->reg_state != NETREG_REGISTERED)
		return NVSP_STAT_FAIL;
@@ -664,7 +665,8 @@ int netvsc_recv_callback(struct net_device *net,
		net = vf_netdev;

	/* Allocate a skb - TODO direct I/O to pages? */
	skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len);
	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
				    csum_info, vlan, data, len);
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		rcu_read_unlock();
@@ -679,7 +681,7 @@ int netvsc_recv_callback(struct net_device *net,
	 * on the synthetic device because modifying the VF device
	 * statistics will not work correctly.
	 */
	rx_stats = &net_device->chan_table[q_idx].rx_stats;
	rx_stats = &nvchan->rx_stats;
	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->packets++;
	rx_stats->bytes += len;
@@ -690,12 +692,7 @@ int netvsc_recv_callback(struct net_device *net,
		++rx_stats->multicast;
	u64_stats_update_end(&rx_stats->syncp);

	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
	 * is done.
	 * TODO - use NAPI?
	 */
	netif_receive_skb(skb);
	napi_gro_receive(&nvchan->napi, skb);
	rcu_read_unlock();

	return 0;
+2 −0
Original line number Diff line number Diff line
@@ -1012,6 +1012,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
	if (ret == 0)
		nvscdev->chan_table[chn_index].channel = new_sc;

	napi_enable(&nvscdev->chan_table[chn_index].napi);

	spin_lock_irqsave(&nvscdev->sc_lock, flags);
	nvscdev->num_sc_offered--;
	spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
Loading