Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 186cd4d4 authored by Antoine Tenart's avatar Antoine Tenart Committed by David S. Miller
Browse files

net: mvpp2: software tso support



The patch uses the tso API to implement the tso functionality in Marvell
PPv2 driver.

Using iperf and 10G ports, using TSO shows a significant performance
improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a
significant CPU usage drop (from 25% to 15%).

Signed-off-by: default avatarAntoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 85affd7e
Loading
Loading
Loading
Loading
+157 −14
Original line number Original line Diff line number Diff line
@@ -35,6 +35,7 @@
#include <uapi/linux/ppp_defs.h>
#include <uapi/linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ipv6.h>
#include <net/tso.h>


/* RX Fifo Registers */
/* RX Fifo Registers */
#define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
#define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
@@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu {


	/* Index of the TX DMA descriptor to be cleaned up */
	/* Index of the TX DMA descriptor to be cleaned up */
	int txq_get_index;
	int txq_get_index;

	/* DMA buffer for TSO headers */
	char *tso_headers;
	dma_addr_t tso_headers_dma;
};
};


struct mvpp2_tx_queue {
struct mvpp2_tx_queue {
@@ -5494,6 +5499,14 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
		txq_pcpu->reserved_num = 0;
		txq_pcpu->reserved_num = 0;
		txq_pcpu->txq_put_index = 0;
		txq_pcpu->txq_put_index = 0;
		txq_pcpu->txq_get_index = 0;
		txq_pcpu->txq_get_index = 0;

		txq_pcpu->tso_headers =
			dma_alloc_coherent(port->dev->dev.parent,
					   MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
					   &txq_pcpu->tso_headers_dma,
					   GFP_KERNEL);
		if (!txq_pcpu->tso_headers)
			goto cleanup;
	}
	}


	return 0;
	return 0;
@@ -5501,6 +5514,11 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
	for_each_present_cpu(cpu) {
	for_each_present_cpu(cpu) {
		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
		kfree(txq_pcpu->buffs);
		kfree(txq_pcpu->buffs);

		dma_free_coherent(port->dev->dev.parent,
				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
				  txq_pcpu->tso_headers,
				  txq_pcpu->tso_headers_dma);
	}
	}


	dma_free_coherent(port->dev->dev.parent,
	dma_free_coherent(port->dev->dev.parent,
@@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
	for_each_present_cpu(cpu) {
	for_each_present_cpu(cpu) {
		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
		kfree(txq_pcpu->buffs);
		kfree(txq_pcpu->buffs);

		dma_free_coherent(port->dev->dev.parent,
				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
				  txq_pcpu->tso_headers,
				  txq_pcpu->tso_headers_dma);
	}
	}


	if (txq->descs)
	if (txq->descs)
@@ -6049,6 +6072,123 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
	return -ENOMEM;
	return -ENOMEM;
}
}


static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
				     struct net_device *dev,
				     struct mvpp2_tx_queue *txq,
				     struct mvpp2_tx_queue *aggr_txq,
				     struct mvpp2_txq_pcpu *txq_pcpu,
				     int hdr_sz)
{
	struct mvpp2_port *port = netdev_priv(dev);
	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
	dma_addr_t addr;

	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
	mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);

	addr = txq_pcpu->tso_headers_dma +
	       txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
	mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
	mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);

	mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
					    MVPP2_TXD_F_DESC |
					    MVPP2_TXD_PADDING_DISABLE);
	mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
}

static inline int mvpp2_tso_put_data(struct sk_buff *skb,
				     struct net_device *dev, struct tso_t *tso,
				     struct mvpp2_tx_queue *txq,
				     struct mvpp2_tx_queue *aggr_txq,
				     struct mvpp2_txq_pcpu *txq_pcpu,
				     int sz, bool left, bool last)
{
	struct mvpp2_port *port = netdev_priv(dev);
	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
	dma_addr_t buf_dma_addr;

	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
	mvpp2_txdesc_size_set(port, tx_desc, sz);

	buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
				      DMA_TO_DEVICE);
	if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
		mvpp2_txq_desc_put(txq);
		return -ENOMEM;
	}

	mvpp2_txdesc_offset_set(port, tx_desc,
				buf_dma_addr & MVPP2_TX_DESC_ALIGN);
	mvpp2_txdesc_dma_addr_set(port, tx_desc,
				  buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);

	if (!left) {
		mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
		if (last) {
			mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
			return 0;
		}
	} else {
		mvpp2_txdesc_cmd_set(port, tx_desc, 0);
	}

	mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
	return 0;
}

static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
			struct mvpp2_tx_queue *txq,
			struct mvpp2_tx_queue *aggr_txq,
			struct mvpp2_txq_pcpu *txq_pcpu)
{
	struct mvpp2_port *port = netdev_priv(dev);
	struct tso_t tso;
	int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
	int i, len, descs = 0;

	/* Check number of available descriptors */
	if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
				      tso_count_descs(skb)) ||
	    mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
					     tso_count_descs(skb)))
		return 0;

	tso_start(skb, &tso);
	len = skb->len - hdr_sz;
	while (len > 0) {
		int left = min_t(int, skb_shinfo(skb)->gso_size, len);
		char *hdr = txq_pcpu->tso_headers +
			    txq_pcpu->txq_put_index * TSO_HEADER_SIZE;

		len -= left;
		descs++;

		tso_build_hdr(skb, hdr, &tso, left, len == 0);
		mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);

		while (left > 0) {
			int sz = min_t(int, tso.size, left);
			left -= sz;
			descs++;

			if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
					       txq_pcpu, sz, left, len == 0))
				goto release;
			tso_build_data(skb, &tso, sz);
		}
	}

	return descs;

release:
	for (i = descs - 1; i >= 0; i--) {
		struct mvpp2_tx_desc *tx_desc = txq->descs + i;
		tx_desc_unmap_put(port, txq, tx_desc);
	}
	return 0;
}

/* Main tx processing */
/* Main tx processing */
static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
{
{
@@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
	txq_pcpu = this_cpu_ptr(txq->pcpu);
	txq_pcpu = this_cpu_ptr(txq->pcpu);
	aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
	aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];


	if (skb_is_gso(skb)) {
		frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
		goto out;
	}
	frags = skb_shinfo(skb)->nr_frags + 1;
	frags = skb_shinfo(skb)->nr_frags + 1;


	/* Check number of available descriptors */
	/* Check number of available descriptors */
@@ -6115,6 +6259,11 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
		}
		}
	}
	}


out:
	if (frags > 0) {
		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);

		txq_pcpu->reserved_num -= frags;
		txq_pcpu->reserved_num -= frags;
		txq_pcpu->count += frags;
		txq_pcpu->count += frags;
		aggr_txq->count += frags;
		aggr_txq->count += frags;
@@ -6123,14 +6272,8 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
		wmb();
		wmb();
		mvpp2_aggr_txq_pend_desc_add(port, frags);
		mvpp2_aggr_txq_pend_desc_add(port, frags);


	if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
		if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);

			netif_tx_stop_queue(nq);
			netif_tx_stop_queue(nq);
	}
out:
	if (frags > 0) {
		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);


		u64_stats_update_begin(&stats->syncp);
		u64_stats_update_begin(&stats->syncp);
		stats->tx_packets++;
		stats->tx_packets++;
@@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
		}
		}
	}
	}


	features = NETIF_F_SG | NETIF_F_IP_CSUM;
	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
	dev->features = features | NETIF_F_RXCSUM;
	dev->features = features | NETIF_F_RXCSUM;
	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
	dev->vlan_features |= features;
	dev->vlan_features |= features;