Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 79f33912 authored by Nimrod Andy's avatar Nimrod Andy Committed by David S. Miller
Browse files

net: fec: Add software TSO support



Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.

Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch

$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec   181 MBytes   506 Mbits/sec

During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.

CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Laight <David.Laight@ACULAB.COM>
CC: Li Frank <B20596@freescale.com>
Signed-off-by: default avatarFugang Duan <B38611@freescale.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6e909283
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -299,6 +299,12 @@ struct fec_enet_private {
	unsigned short bufdesc_size;
	unsigned short tx_ring_size;
	unsigned short rx_ring_size;
	unsigned short tx_stop_threshold;
	unsigned short tx_wake_threshold;

	/* Software TSO */
	char *tso_hdrs;
	dma_addr_t tso_hdrs_dma;

	struct	platform_device *pdev;

+232 −23
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/tso.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
@@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_PAUSE_FLAG_AUTONEG	0x1
#define FEC_PAUSE_FLAG_ENABLE	0x2

#define TSO_HEADER_SIZE		128
/* Max number of allowed TCP segments for software TSO */
#define FEC_MAX_TSO_SEGS	100
#define FEC_MAX_SKB_DESCS	(FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)

#define IS_TSO_HEADER(txq, addr) \
	((addr >= txq->tso_hdrs_dma) && \
	(addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))

static int mii_cnt;

static inline
@@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
	unsigned short buflen;
	unsigned int estatus = 0;
	unsigned int index;
	int entries_free;
	int ret;

	entries_free = fec_enet_get_free_txdesc_num(fep);
	if (entries_free < MAX_SKB_FRAGS + 1) {
		dev_kfree_skb_any(skb);
		if (net_ratelimit())
			netdev_err(ndev, "NOT enough BD for SG!\n");
		return NETDEV_TX_OK;
	}

	/* Protocol checksum off-load for TCP and UDP. */
	if (fec_enet_clear_csum(skb, ndev)) {
		dev_kfree_skb_any(skb);
@@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
	return 0;
}

static netdev_tx_t
fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
static int
fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
			struct bufdesc *bdp, int index, char *data,
			int size, bool last_tcp, bool is_last)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct bufdesc *bdp;
	const struct platform_device_id *id_entry =
				platform_get_device_id(fep->pdev);
	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
	unsigned short status;
	int entries_free;
	int ret;
	unsigned int estatus = 0;

	/* Fill in a Tx ring entry */
	bdp = fep->cur_tx;
	status = bdp->cbd_sc;
	status &= ~BD_ENET_TX_STATS;

	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
	bdp->cbd_datlen = size;

	if (((unsigned long) data) & FEC_ALIGNMENT ||
		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
		memcpy(fep->tx_bounce[index], data, size);
		data = fep->tx_bounce[index];

		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
			swap_buffer(data, size);
	}

	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
					size, DMA_TO_DEVICE);
	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
		dev_kfree_skb_any(skb);
		if (net_ratelimit())
			netdev_err(ndev, "Tx DMA memory map failed\n");
		return NETDEV_TX_BUSY;
	}

	if (fep->bufdesc_ex) {
		if (skb->ip_summed == CHECKSUM_PARTIAL)
			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
		ebdp->cbd_bdu = 0;
		ebdp->cbd_esc = estatus;
	}

	/* Handle the last BD specially */
	if (last_tcp)
		status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
	if (is_last) {
		status |= BD_ENET_TX_INTR;
		if (fep->bufdesc_ex)
			ebdp->cbd_esc |= BD_ENET_TX_INT;
	}

	bdp->cbd_sc = status;

	return 0;
}

static int
fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
			struct bufdesc *bdp, int index)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	const struct platform_device_id *id_entry =
				platform_get_device_id(fep->pdev);
	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
	struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
	void *bufaddr;
	unsigned long dmabuf;
	unsigned short status;
	unsigned int estatus = 0;

	status = bdp->cbd_sc;
	status &= ~BD_ENET_TX_STATS;
	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);

	if (status & BD_ENET_TX_READY) {
		/* Ooops.  All transmit buffers are full.  Bail out.
		 * This should not happen, since ndev->tbusy should be set.
		 */
	bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
	dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
	if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
		id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
		memcpy(fep->tx_bounce[index], skb->data, hdr_len);
		bufaddr = fep->tx_bounce[index];

		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
			swap_buffer(bufaddr, hdr_len);

		dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
					hdr_len, DMA_TO_DEVICE);
		if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
			dev_kfree_skb_any(skb);
			if (net_ratelimit())
			netdev_err(ndev, "tx queue full!\n");
				netdev_err(ndev, "Tx DMA memory map failed\n");
			return NETDEV_TX_BUSY;
		}
	}

	bdp->cbd_bufaddr = dmabuf;
	bdp->cbd_datlen = hdr_len;

	if (fep->bufdesc_ex) {
		if (skb->ip_summed == CHECKSUM_PARTIAL)
			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
		ebdp->cbd_bdu = 0;
		ebdp->cbd_esc = estatus;
	}

	bdp->cbd_sc = status;

	return 0;
}

static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
	int total_len, data_left;
	struct bufdesc *bdp = fep->cur_tx;
	struct tso_t tso;
	unsigned int index = 0;
	int ret;

	if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
		dev_kfree_skb_any(skb);
		if (net_ratelimit())
			netdev_err(ndev, "NOT enough BD for TSO!\n");
		return NETDEV_TX_OK;
	}

	/* Protocol checksum off-load for TCP and UDP. */
	if (fec_enet_clear_csum(skb, ndev)) {
		dev_kfree_skb_any(skb);
		return NETDEV_TX_OK;
	}

	/* Initialize the TSO handler, and prepare the first payload */
	tso_start(skb, &tso);

	total_len = skb->len - hdr_len;
	while (total_len > 0) {
		char *hdr;

		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
		total_len -= data_left;

		/* prepare packet headers: MAC + IP + TCP */
		hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
		ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
		if (ret)
			goto err_release;

		while (data_left > 0) {
			int size;

			size = min_t(int, tso.size, data_left);
			bdp = fec_enet_get_nextdesc(bdp, fep);
			index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
			ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
							size, size == data_left,
							total_len == 0);
			if (ret)
				goto err_release;

			data_left -= size;
			tso_build_data(skb, &tso, size);
		}

		bdp = fec_enet_get_nextdesc(bdp, fep);
	}

	/* Save skb pointer */
	fep->tx_skbuff[index] = skb;

	fec_enet_submit_work(bdp, fep);

	skb_tx_timestamp(skb);
	fep->cur_tx = bdp;

	/* Trigger transmission start */
	writel(0, fep->hwp + FEC_X_DES_ACTIVE);

	return 0;

err_release:
	/* TODO: Release all used data descriptors for TSO */
	return ret;
}

static netdev_tx_t
fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	int entries_free;
	int ret;

	if (skb_is_gso(skb))
		ret = fec_enet_txq_submit_tso(skb, ndev);
	else
		ret = fec_enet_txq_submit_skb(skb, ndev);
	if (ret)
		return ret;

	entries_free = fec_enet_get_free_txdesc_num(fep);
	if (entries_free < MAX_SKB_FRAGS + 1)
	if (entries_free <= fep->tx_stop_threshold)
		netif_stop_queue(ndev);

	return NETDEV_TX_OK;
@@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev)
	unsigned short status;
	struct	sk_buff	*skb;
	int	index = 0;
	int	entries;
	int	entries_free;

	fep = netdev_priv(ndev);
	bdp = fep->dirty_tx;
@@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev)
		index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);

		skb = fep->tx_skbuff[index];
		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
				DMA_TO_DEVICE);
		if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
					bdp->cbd_datlen, DMA_TO_DEVICE);
		bdp->cbd_bufaddr = 0;
		if (!skb) {
			bdp = fec_enet_get_nextdesc(bdp, fep);
@@ -962,10 +1157,12 @@ fec_enet_tx(struct net_device *ndev)

		/* Since we have freed up a buffer, the ring is no longer full
		 */
		entries = fec_enet_get_free_txdesc_num(fep);
		if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
		if (netif_queue_stopped(ndev)) {
			entries_free = fec_enet_get_free_txdesc_num(fep);
			if (entries_free >= fep->tx_wake_threshold)
				netif_wake_queue(ndev);
		}
	}
	return;
}

@@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev)
	fep->tx_ring_size = TX_RING_SIZE;
	fep->rx_ring_size = RX_RING_SIZE;

	fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
	fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;

	if (fep->bufdesc_ex)
		fep->bufdesc_size = sizeof(struct bufdesc_ex);
	else
@@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev)
	if (!cbd_base)
		return -ENOMEM;

	fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
						&fep->tso_hdrs_dma, GFP_KERNEL);
	if (!fep->tso_hdrs) {
		dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
		return -ENOMEM;
	}

	memset(cbd_base, 0, PAGE_SIZE);

	fep->netdev = ndev;
@@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev)
		ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;

	if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
		ndev->gso_max_segs = FEC_MAX_TSO_SEGS;

		/* enable hw accelerator */
		ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
				| NETIF_F_RXCSUM | NETIF_F_SG);
				| NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
	}