Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5b54dac8 authored by Haiyang Zhang's avatar Haiyang Zhang Committed by David S. Miller
Browse files

hyperv: Add support for virtual Receive Side Scaling (vRSS)



This feature allows multiple channels to be used by each virtual NIC.
It is available on Hyper-V host 2012 R2.

Signed-off-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: default avatarK. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 86fd14ad
Loading
Loading
Loading
Loading
+109 −1
Original line number Original line Diff line number Diff line
@@ -28,6 +28,96 @@
#include <linux/hyperv.h>
#include <linux/hyperv.h>
#include <linux/rndis.h>
#include <linux/rndis.h>


/* RSS related */
#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */
#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */

#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89

#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2

struct ndis_obj_header {
	u8 type;
	u8 rev;
	u16 size;
} __packed;

/* ndis_recv_scale_cap/cap_flag */
#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR       0x02000000
#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC       0x04000000
#define NDIS_RSS_CAPS_USING_MSI_X                 0x08000000
#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS      0x10000000
#define NDIS_RSS_CAPS_SUPPORTS_MSI_X              0x20000000
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4          0x00000100
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6          0x00000200
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX       0x00000400

struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
	struct ndis_obj_header hdr;
	u32 cap_flag;
	u32 num_int_msg;
	u32 num_recv_que;
	u16 num_indirect_tabent;
} __packed;


/* ndis_recv_scale_param flags */
#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED     0x0001
#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED    0x0002
#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED       0x0004
#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED     0x0008
#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS            0x0010

/* Hash info bits */
#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
#define NDIS_HASH_IPV4          0x00000100
#define NDIS_HASH_TCP_IPV4      0x00000200
#define NDIS_HASH_IPV6          0x00000400
#define NDIS_HASH_IPV6_EX       0x00000800
#define NDIS_HASH_TCP_IPV6      0x00001000
#define NDIS_HASH_TCP_IPV6_EX   0x00002000

#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40

#define ITAB_NUM 128
#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
extern u8 netvsc_hash_key[];

struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
	struct ndis_obj_header hdr;

	/* Qualifies the rest of the information */
	u16 flag;

	/* The base CPU number to do receive processing. not used */
	u16 base_cpu_number;

	/* This describes the hash function and type being enabled */
	u32 hashinfo;

	/* The size of indirection table array */
	u16 indirect_tabsize;

	/* The offset of the indirection table from the beginning of this
	 * structure
	 */
	u32 indirect_taboffset;

	/* The size of the hash secret key */
	u16 hashkey_size;

	/* The offset of the secret key from the beginning of this structure */
	u32 kashkey_offset;

	u32 processor_masks_offset;
	u32 num_processor_masks;
	u32 processor_masks_entry_size;
};

/* Fwd declaration */
/* Fwd declaration */
struct hv_netvsc_packet;
struct hv_netvsc_packet;
struct ndis_tcp_ip_checksum_info;
struct ndis_tcp_ip_checksum_info;
@@ -39,6 +129,8 @@ struct xferpage_packet {


	/* # of netvsc packets this xfer packet contains */
	/* # of netvsc packets this xfer packet contains */
	u32 count;
	u32 count;

	struct vmbus_channel *channel;
};
};


/*
/*
@@ -54,6 +146,9 @@ struct hv_netvsc_packet {
	bool is_data_pkt;
	bool is_data_pkt;
	u16 vlan_tci;
	u16 vlan_tci;


	u16 q_idx;
	struct vmbus_channel *channel;

	/*
	/*
	 * Valid only for receives when we break a xfer page packet
	 * Valid only for receives when we break a xfer page packet
	 * into multiple netvsc packets
	 * into multiple netvsc packets
@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
int netvsc_recv_callback(struct hv_device *device_obj,
int netvsc_recv_callback(struct hv_device *device_obj,
			struct hv_netvsc_packet *packet,
			struct hv_netvsc_packet *packet,
			struct ndis_tcp_ip_checksum_info *csum_info);
			struct ndis_tcp_ip_checksum_info *csum_info);
void netvsc_channel_cb(void *context);
int rndis_filter_open(struct hv_device *dev);
int rndis_filter_open(struct hv_device *dev);
int rndis_filter_close(struct hv_device *dev);
int rndis_filter_close(struct hv_device *dev);
int rndis_filter_device_add(struct hv_device *dev,
int rndis_filter_device_add(struct hv_device *dev,
@@ -522,6 +618,8 @@ struct nvsp_message {


#define NETVSC_PACKET_SIZE                      2048
#define NETVSC_PACKET_SIZE                      2048


#define VRSS_SEND_TAB_SIZE 16

/* Per netvsc channel-specific */
/* Per netvsc channel-specific */
struct netvsc_device {
struct netvsc_device {
	struct hv_device *dev;
	struct hv_device *dev;
@@ -555,10 +653,20 @@ struct netvsc_device {


	struct net_device *ndev;
	struct net_device *ndev;


	struct vmbus_channel *chn_table[NR_CPUS];
	u32 send_table[VRSS_SEND_TAB_SIZE];
	u32 num_chn;
	atomic_t queue_sends[NR_CPUS];

	/* Holds rndis device info */
	/* Holds rndis device info */
	void *extension;
	void *extension;
	/* The recive buffer for this device */

	int ring_size;

	/* The primary channel callback buffer */
	unsigned char cb_buffer[NETVSC_PACKET_SIZE];
	unsigned char cb_buffer[NETVSC_PACKET_SIZE];
	/* The sub channel callback buffer */
	unsigned char *sub_cb_buf;
};
};


/* NdisInitialize message */
/* NdisInitialize message */
+109 −27
Original line number Original line Diff line number Diff line
@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
		kfree(netvsc_packet);
		kfree(netvsc_packet);
	}
	}


	if (net_device->sub_cb_buf)
		vfree(net_device->sub_cb_buf);

	kfree(net_device);
	kfree(net_device);
	return 0;
	return 0;
}
}
@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
	    (nvsp_packet->hdr.msg_type ==
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
		/* Copy the response back */
		/* Copy the response back */
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
		       sizeof(struct nvsp_message));
		       sizeof(struct nvsp_message));
@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
	} else if (nvsp_packet->hdr.msg_type ==
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
		int num_outstanding_sends;
		int num_outstanding_sends;
		u16 q_idx = 0;
		struct vmbus_channel *channel = device->channel;
		int queue_sends;


		/* Get the send context */
		/* Get the send context */
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
			packet->trans_id;
			packet->trans_id;


		/* Notify the layer above us */
		/* Notify the layer above us */
		if (nvsc_packet)
		if (nvsc_packet) {
			q_idx = nvsc_packet->q_idx;
			channel = nvsc_packet->channel;
			nvsc_packet->completion.send.send_completion(
			nvsc_packet->completion.send.send_completion(
				nvsc_packet->completion.send.
				nvsc_packet->completion.send.
				send_completion_ctx);
				send_completion_ctx);
		}


		num_outstanding_sends =
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
			atomic_dec_return(&net_device->num_outstanding_sends);
		queue_sends = atomic_dec_return(&net_device->
						queue_sends[q_idx]);


		if (net_device->destroy && num_outstanding_sends == 0)
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);
			wake_up(&net_device->wait_drain);


		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
			(hv_ringbuf_avail_percent(&device->channel->outbound)
		    !net_device->start_remove &&
			> RING_AVAIL_PERCENT_HIWATER ||
		    (hv_ringbuf_avail_percent(&channel->outbound) >
			num_outstanding_sends < 1))
		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
				netif_wake_queue(ndev);
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, q_idx));
	} else {
	} else {
		netdev_err(ndev, "Unknown send completion packet type- "
		netdev_err(ndev, "Unknown send completion packet type- "
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
	int ret = 0;
	int ret = 0;
	struct nvsp_message sendMessage;
	struct nvsp_message sendMessage;
	struct net_device *ndev;
	struct net_device *ndev;
	struct vmbus_channel *out_channel = NULL;
	u64 req_id;
	u64 req_id;


	net_device = get_outbound_net_device(device);
	net_device = get_outbound_net_device(device);
@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
	else
	else
		req_id = 0;
		req_id = 0;


	out_channel = net_device->chn_table[packet->q_idx];
	if (out_channel == NULL)
		out_channel = device->channel;
	packet->channel = out_channel;

	if (packet->page_buf_cnt) {
	if (packet->page_buf_cnt) {
		ret = vmbus_sendpacket_pagebuffer(device->channel,
		ret = vmbus_sendpacket_pagebuffer(out_channel,
						  packet->page_buf,
						  packet->page_buf,
						  packet->page_buf_cnt,
						  packet->page_buf_cnt,
						  &sendMessage,
						  &sendMessage,
						  sizeof(struct nvsp_message),
						  sizeof(struct nvsp_message),
						  req_id);
						  req_id);
	} else {
	} else {
		ret = vmbus_sendpacket(device->channel, &sendMessage,
		ret = vmbus_sendpacket(out_channel, &sendMessage,
				sizeof(struct nvsp_message),
				sizeof(struct nvsp_message),
				req_id,
				req_id,
				VM_PKT_DATA_INBAND,
				VM_PKT_DATA_INBAND,
@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,


	if (ret == 0) {
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
		atomic_inc(&net_device->num_outstanding_sends);
		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
		atomic_inc(&net_device->queue_sends[packet->q_idx]);

		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
			RING_AVAIL_PERCENT_LOWATER) {
			RING_AVAIL_PERCENT_LOWATER) {
			netif_stop_queue(ndev);
			netif_tx_stop_queue(netdev_get_tx_queue(
					    ndev, packet->q_idx));

			if (atomic_read(&net_device->
			if (atomic_read(&net_device->
				num_outstanding_sends) < 1)
				queue_sends[packet->q_idx]) < 1)
				netif_wake_queue(ndev);
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, packet->q_idx));
		}
		}
	} else if (ret == -EAGAIN) {
	} else if (ret == -EAGAIN) {
		netif_stop_queue(ndev);
		netif_tx_stop_queue(netdev_get_tx_queue(
		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
				    ndev, packet->q_idx));
			netif_wake_queue(ndev);
		if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
			netif_tx_wake_queue(netdev_get_tx_queue(
					    ndev, packet->q_idx));
			ret = -ENOSPC;
			ret = -ENOSPC;
		}
		}
	} else {
	} else {
@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
}
}


static void netvsc_send_recv_completion(struct hv_device *device,
static void netvsc_send_recv_completion(struct hv_device *device,
					struct vmbus_channel *channel,
					struct netvsc_device *net_device,
					struct netvsc_device *net_device,
					u64 transaction_id, u32 status)
					u64 transaction_id, u32 status)
{
{
@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,


retry_send_cmplt:
retry_send_cmplt:
	/* Send the completion */
	/* Send the completion */
	ret = vmbus_sendpacket(device->channel, &recvcompMessage,
	ret = vmbus_sendpacket(channel, &recvcompMessage,
			       sizeof(struct nvsp_message), transaction_id,
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
			       VM_PKT_COMP, 0);
	if (ret == 0) {
	if (ret == 0) {
@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
{
{
	struct hv_netvsc_packet *packet = context;
	struct hv_netvsc_packet *packet = context;
	struct hv_device *device = packet->device;
	struct hv_device *device = packet->device;
	struct vmbus_channel *channel;
	struct netvsc_device *net_device;
	struct netvsc_device *net_device;
	u64 transaction_id = 0;
	u64 transaction_id = 0;
	bool fsend_receive_comp = false;
	bool fsend_receive_comp = false;
@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
	 */
	 */
	if (packet->xfer_page_pkt->count == 0) {
	if (packet->xfer_page_pkt->count == 0) {
		fsend_receive_comp = true;
		fsend_receive_comp = true;
		channel = packet->xfer_page_pkt->channel;
		transaction_id = packet->completion.recv.recv_completion_tid;
		transaction_id = packet->completion.recv.recv_completion_tid;
		status = packet->xfer_page_pkt->status;
		status = packet->xfer_page_pkt->status;
		list_add_tail(&packet->xfer_page_pkt->list_ent,
		list_add_tail(&packet->xfer_page_pkt->list_ent,
@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)


	/* Send a receive completion for the xfer page packet */
	/* Send a receive completion for the xfer page packet */
	if (fsend_receive_comp)
	if (fsend_receive_comp)
		netvsc_send_recv_completion(device, net_device, transaction_id,
		netvsc_send_recv_completion(device, channel, net_device,
					status);
					    transaction_id, status);


}
}


static void netvsc_receive(struct netvsc_device *net_device,
static void netvsc_receive(struct netvsc_device *net_device,
			struct vmbus_channel *channel,
			struct hv_device *device,
			struct hv_device *device,
			struct vmpacket_descriptor *packet)
			struct vmpacket_descriptor *packet)
{
{
@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
				       flags);
				       flags);


		netvsc_send_recv_completion(device, net_device,
		netvsc_send_recv_completion(device, channel, net_device,
					    vmxferpage_packet->d.trans_id,
					    vmxferpage_packet->d.trans_id,
					    NVSP_STAT_FAIL);
					    NVSP_STAT_FAIL);


@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
	xferpage_packet = (struct xferpage_packet *)listHead.next;
	xferpage_packet = (struct xferpage_packet *)listHead.next;
	list_del(&xferpage_packet->list_ent);
	list_del(&xferpage_packet->list_ent);
	xferpage_packet->status = NVSP_STAT_SUCCESS;
	xferpage_packet->status = NVSP_STAT_SUCCESS;
	xferpage_packet->channel = channel;


	/* This is how much we can satisfy */
	/* This is how much we can satisfy */
	xferpage_packet->count = count - 1;
	xferpage_packet->count = count - 1;
@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,


}
}


static void netvsc_channel_cb(void *context)

static void netvsc_send_table(struct hv_device *hdev,
			      struct vmpacket_descriptor *vmpkt)
{
	struct netvsc_device *nvscdev;
	struct net_device *ndev;
	struct nvsp_message *nvmsg;
	int i;
	u32 count, *tab;

	nvscdev = get_outbound_net_device(hdev);
	if (!nvscdev)
		return;
	ndev = nvscdev->ndev;

	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
					(vmpkt->offset8 << 3));

	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
		return;

	count = nvmsg->msg.v5_msg.send_table.count;
	if (count != VRSS_SEND_TAB_SIZE) {
		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
		return;
	}

	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
		      nvmsg->msg.v5_msg.send_table.offset);

	for (i = 0; i < count; i++)
		nvscdev->send_table[i] = tab[i];
}

void netvsc_channel_cb(void *context)
{
{
	int ret;
	int ret;
	struct hv_device *device = context;
	struct vmbus_channel *channel = (struct vmbus_channel *)context;
	struct hv_device *device;
	struct netvsc_device *net_device;
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u32 bytes_recvd;
	u64 request_id;
	u64 request_id;
@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
	int bufferlen = NETVSC_PACKET_SIZE;
	int bufferlen = NETVSC_PACKET_SIZE;
	struct net_device *ndev;
	struct net_device *ndev;


	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

	net_device = get_inbound_net_device(device);
	net_device = get_inbound_net_device(device);
	if (!net_device)
	if (!net_device)
		return;
		return;
	ndev = net_device->ndev;
	ndev = net_device->ndev;
	buffer = net_device->cb_buffer;
	buffer = get_per_channel_state(channel);


	do {
	do {
		ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
					   &bytes_recvd, &request_id);
					   &bytes_recvd, &request_id);
		if (ret == 0) {
		if (ret == 0) {
			if (bytes_recvd > 0) {
			if (bytes_recvd > 0) {
@@ -831,10 +903,14 @@ static void netvsc_channel_cb(void *context)
					break;
					break;


				case VM_PKT_DATA_USING_XFER_PAGES:
				case VM_PKT_DATA_USING_XFER_PAGES:
					netvsc_receive(net_device,
					netvsc_receive(net_device, channel,
						       device, desc);
						       device, desc);
					break;
					break;


				case VM_PKT_DATA_INBAND:
					netvsc_send_table(device, desc);
					break;

				default:
				default:
					netdev_err(ndev,
					netdev_err(ndev,
						   "unhandled packet type %d, "
						   "unhandled packet type %d, "
@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
		goto cleanup;
		goto cleanup;
	}
	}


	net_device->ring_size = ring_size;

	/*
	/*
	 * Coming into this function, struct net_device * is
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * registered as the driver private data.
@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
	}
	}
	init_completion(&net_device->channel_init_wait);
	init_completion(&net_device->channel_init_wait);


	set_per_channel_state(device->channel, net_device->cb_buffer);

	/* Open the channel */
	/* Open the channel */
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
			 ring_size * PAGE_SIZE, NULL, 0,
			 netvsc_channel_cb, device);
			 netvsc_channel_cb, device->channel);


	if (ret != 0) {
	if (ret != 0) {
		netdev_err(ndev, "unable to open channel: %d\n", ret);
		netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
	/* Channel is opened */
	/* Channel is opened */
	pr_info("hv_netvsc channel opened successfully\n");
	pr_info("hv_netvsc channel opened successfully\n");


	net_device->chn_table[0] = device->channel;

	/* Connect with the NetVsp */
	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
	if (ret != 0) {
+100 −3
Original line number Original line Diff line number Diff line
@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
		return ret;
		return ret;
	}
	}


	netif_start_queue(net);
	netif_tx_start_all_queues(net);


	nvdev = hv_get_drvdata(device_obj);
	nvdev = hv_get_drvdata(device_obj);
	rdev = nvdev->extension;
	rdev = nvdev->extension;
@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
	return ppi;
	return ppi;
}
}


union sub_key {
	u64 k;
	struct {
		u8 pad[3];
		u8 kb;
		u32 ka;
	};
};

/* Toeplitz hash function
 * data: network byte order
 * return: host byte order
 */
static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
{
	union sub_key subk;
	int k_next = 4;
	u8 dt;
	int i, j;
	u32 ret = 0;

	subk.k = 0;
	subk.ka = ntohl(*(u32 *)key);

	for (i = 0; i < dlen; i++) {
		subk.kb = key[k_next];
		k_next = (k_next + 1) % klen;
		dt = data[i];
		for (j = 0; j < 8; j++) {
			if (dt & 0x80)
				ret ^= subk.ka;
			dt <<= 1;
			subk.k <<= 1;
		}
	}

	return ret;
}

static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
{
	struct iphdr *iphdr;
	int data_len;
	bool ret = false;

	if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
		return false;

	iphdr = ip_hdr(skb);

	if (iphdr->version == 4) {
		if (iphdr->protocol == IPPROTO_TCP)
			data_len = 12;
		else
			data_len = 8;
		*hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
				  (u8 *)&iphdr->saddr, data_len);
		ret = true;
	}

	return ret;
}

static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
			void *accel_priv, select_queue_fallback_t fallback)
{
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct hv_device *hdev =  net_device_ctx->device_ctx;
	struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
	u32 hash;
	u16 q_idx = 0;

	if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
		return 0;

	if (netvsc_set_hash(&hash, skb))
		q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
			ndev->real_num_tx_queues;

	return q_idx;
}

static void netvsc_xmit_completion(void *context)
static void netvsc_xmit_completion(void *context)
{
{
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
@@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)


	packet->vlan_tci = skb->vlan_tci;
	packet->vlan_tci = skb->vlan_tci;


	packet->q_idx = skb_get_queue_mapping(skb);

	packet->is_data_pkt = true;
	packet->is_data_pkt = true;
	packet->total_data_buflen = skb->len;
	packet->total_data_buflen = skb->len;


@@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
				       packet->vlan_tci);
				       packet->vlan_tci);


	skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
			    offermsg.offer.sub_channel_index %
			    net->real_num_rx_queues);

	net->stats.rx_packets++;
	net->stats.rx_packets++;
	net->stats.rx_bytes += packet->total_data_buflen;
	net->stats.rx_bytes += packet->total_data_buflen;


@@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
	hv_set_drvdata(hdev, ndev);
	hv_set_drvdata(hdev, ndev);
	device_info.ring_size = ring_size;
	device_info.ring_size = ring_size;
	rndis_filter_device_add(hdev, &device_info);
	rndis_filter_device_add(hdev, &device_info);
	netif_wake_queue(ndev);
	netif_tx_wake_all_queues(ndev);


	return 0;
	return 0;
}
}
@@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = {
	.ndo_change_mtu =		netvsc_change_mtu,
	.ndo_change_mtu =		netvsc_change_mtu,
	.ndo_validate_addr =		eth_validate_addr,
	.ndo_validate_addr =		eth_validate_addr,
	.ndo_set_mac_address =		netvsc_set_mac_addr,
	.ndo_set_mac_address =		netvsc_set_mac_addr,
	.ndo_select_queue =		netvsc_select_queue,
};
};


/*
/*
@@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev,
	struct net_device *net = NULL;
	struct net_device *net = NULL;
	struct net_device_context *net_device_ctx;
	struct net_device_context *net_device_ctx;
	struct netvsc_device_info device_info;
	struct netvsc_device_info device_info;
	struct netvsc_device *nvdev;
	int ret;
	int ret;


	net = alloc_etherdev(sizeof(struct net_device_context));
	net = alloc_etherdev_mq(sizeof(struct net_device_context),
				num_online_cpus());
	if (!net)
	if (!net)
		return -ENOMEM;
		return -ENOMEM;


@@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev,
	}
	}
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);


	nvdev = hv_get_drvdata(dev);
	netif_set_real_num_tx_queues(net, nvdev->num_chn);
	netif_set_real_num_rx_queues(net, nvdev->num_chn);
	dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
		 net->real_num_tx_queues, net->real_num_rx_queues);

	ret = register_netdev(net);
	ret = register_netdev(net);
	if (ret != 0) {
	if (ret != 0) {
		pr_err("Unable to register netdev.\n");
		pr_err("Unable to register netdev.\n");
+186 −3
Original line number Original line Diff line number Diff line
@@ -31,7 +31,7 @@
#include "hyperv_net.h"
#include "hyperv_net.h"




#define RNDIS_EXT_LEN 100
#define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request {
struct rndis_request {
	struct list_head list_ent;
	struct list_head list_ent;
	struct completion  wait_event;
	struct completion  wait_event;
@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
	rndis_msg->ndis_msg_type = msg_type;
	rndis_msg->ndis_msg_type = msg_type;
	rndis_msg->msg_len = msg_len;
	rndis_msg->msg_len = msg_len;


	request->pkt.q_idx = 0;

	/*
	/*
	 * Set the request id. This field is always after the rndis header for
	 * Set the request id. This field is always after the rndis header for
	 * request/response packet types so we just used the SetRequest as a
	 * request/response packet types so we just used the SetRequest as a
@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
	query->info_buflen = 0;
	query->info_buflen = 0;
	query->dev_vc_handle = 0;
	query->dev_vc_handle = 0;


	if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
		struct ndis_recv_scale_cap *cap;

		request->request_msg.msg_len +=
			sizeof(struct ndis_recv_scale_cap);
		query->info_buflen = sizeof(struct ndis_recv_scale_cap);
		cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
						     query->info_buf_offset);
		cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
		cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
		cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
	}

	ret = rndis_filter_send_request(dev, request);
	ret = rndis_filter_send_request(dev, request);
	if (ret != 0)
	if (ret != 0)
		goto cleanup;
		goto cleanup;
@@ -695,6 +710,89 @@ int rndis_filter_set_offload_params(struct hv_device *hdev,
	return ret;
	return ret;
}
}


u8 netvsc_hash_key[HASH_KEYLEN] = {
	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};

int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
{
	struct net_device *ndev = rdev->net_dev->ndev;
	struct rndis_request *request;
	struct rndis_set_request *set;
	struct rndis_set_complete *set_complete;
	u32 extlen = sizeof(struct ndis_recv_scale_param) +
		     4*ITAB_NUM + HASH_KEYLEN;
	struct ndis_recv_scale_param *rssp;
	u32 *itab;
	u8 *keyp;
	int i, t, ret;

	request = get_rndis_request(
			rdev, RNDIS_MSG_SET,
			RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
	if (!request)
		return -ENOMEM;

	set = &request->request_msg.msg.set_req;
	set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
	set->info_buflen = extlen;
	set->info_buf_offset = sizeof(struct rndis_set_request);
	set->dev_vc_handle = 0;

	rssp = (struct ndis_recv_scale_param *)(set + 1);
	rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
	rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
	rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
	rssp->flag = 0;
	rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
			 NDIS_HASH_TCP_IPV4;
	rssp->indirect_tabsize = 4*ITAB_NUM;
	rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
	rssp->hashkey_size = HASH_KEYLEN;
	rssp->kashkey_offset = rssp->indirect_taboffset +
			       rssp->indirect_tabsize;

	/* Set indirection table entries */
	itab = (u32 *)(rssp + 1);
	for (i = 0; i < ITAB_NUM; i++)
		itab[i] = i % num_queue;

	/* Set hask key values */
	keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
	for (i = 0; i < HASH_KEYLEN; i++)
		keyp[i] = netvsc_hash_key[i];


	ret = rndis_filter_send_request(rdev, request);
	if (ret != 0)
		goto cleanup;

	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
	if (t == 0) {
		netdev_err(ndev, "timeout before we got a set response...\n");
		/* can't put_rndis_request, since we may still receive a
		 * send-completion.
		 */
		return -ETIMEDOUT;
	} else {
		set_complete = &request->response_msg.msg.set_complete;
		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
			netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
				   set_complete->status);
			ret = -EINVAL;
		}
	}

cleanup:
	put_rndis_request(rdev, request);
	return ret;
}


static int rndis_filter_query_device_link_status(struct rndis_device *dev)
static int rndis_filter_query_device_link_status(struct rndis_device *dev)
{
{
	u32 size = sizeof(u32);
	u32 size = sizeof(u32);
@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
	return ret;
	return ret;
}
}


static void netvsc_sc_open(struct vmbus_channel *new_sc)
{
	struct netvsc_device *nvscdev;
	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
	int ret;

	nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);

	if (chn_index >= nvscdev->num_chn)
		return;

	set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
			      NETVSC_PACKET_SIZE);

	ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
			 nvscdev->ring_size * PAGE_SIZE, NULL, 0,
			 netvsc_channel_cb, new_sc);

	if (ret == 0)
		nvscdev->chn_table[chn_index] = new_sc;
}

int rndis_filter_device_add(struct hv_device *dev,
int rndis_filter_device_add(struct hv_device *dev,
				  void *additional_info)
				  void *additional_info)
{
{
@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev,
	struct rndis_device *rndis_device;
	struct rndis_device *rndis_device;
	struct netvsc_device_info *device_info = additional_info;
	struct netvsc_device_info *device_info = additional_info;
	struct ndis_offload_params offloads;
	struct ndis_offload_params offloads;
	struct nvsp_message *init_packet;
	int t;
	struct ndis_recv_scale_cap rsscap;
	u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);


	rndis_device = get_rndis_device();
	rndis_device = get_rndis_device();
	if (!rndis_device)
	if (!rndis_device)
@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev,


	/* Initialize the rndis device */
	/* Initialize the rndis device */
	net_device = hv_get_drvdata(dev);
	net_device = hv_get_drvdata(dev);
	net_device->num_chn = 1;


	net_device->extension = rndis_device;
	net_device->extension = rndis_device;
	rndis_device->net_dev = net_device;
	rndis_device->net_dev = net_device;
@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev,
	if (ret)
	if (ret)
		goto err_dev_remv;
		goto err_dev_remv;



	rndis_filter_query_device_link_status(rndis_device);
	rndis_filter_query_device_link_status(rndis_device);


	device_info->link_state = rndis_device->link_state;
	device_info->link_state = rndis_device->link_state;
@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev,
		 rndis_device->hw_mac_adr,
		 rndis_device->hw_mac_adr,
		 device_info->link_state ? "down" : "up");
		 device_info->link_state ? "down" : "up");


	return ret;
	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
		return 0;

	/* vRSS setup */
	memset(&rsscap, 0, rsscap_size);
	ret = rndis_filter_query_device(rndis_device,
					OID_GEN_RECEIVE_SCALE_CAPABILITIES,
					&rsscap, &rsscap_size);
	if (ret || rsscap.num_recv_que < 2)
		goto out;

	net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
			       num_online_cpus() : rsscap.num_recv_que;
	if (net_device->num_chn == 1)
		goto out;

	net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
					 NETVSC_PACKET_SIZE);
	if (!net_device->sub_cb_buf) {
		net_device->num_chn = 1;
		dev_info(&dev->device, "No memory for subchannels.\n");
		goto out;
	}

	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);

	init_packet = &net_device->channel_init_pkt;
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
	init_packet->msg.v5_msg.subchn_req.num_subchannels =
						net_device->num_chn - 1;
	ret = vmbus_sendpacket(dev->channel, init_packet,
			       sizeof(struct nvsp_message),
			       (unsigned long)init_packet,
			       VM_PKT_DATA_INBAND,
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret)
		goto out;
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
	if (t == 0) {
		ret = -ETIMEDOUT;
		goto out;
	}
	if (init_packet->msg.v5_msg.subchn_comp.status !=
	    NVSP_STAT_SUCCESS) {
		ret = -ENODEV;
		goto out;
	}
	net_device->num_chn = 1 +
		init_packet->msg.v5_msg.subchn_comp.num_subchannels;

	vmbus_are_subchannels_present(dev->channel);

	ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);

out:
	if (ret)
		net_device->num_chn = 1;
	return 0; /* return 0 because primary channel can be used alone */


err_dev_remv:
err_dev_remv:
	rndis_filter_device_remove(dev);
	rndis_filter_device_remove(dev);