Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e9117e50 authored by Bert Kenward's avatar Bert Kenward Committed by David S. Miller
Browse files

sfc: Firmware-Assisted TSO version 2



Add support for FATSOv2 to the driver. FATSOv2 offloads far more of the task
 of TCP segmentation to the firmware, such that we now just pass a single
 super-packet to the NIC. This means TSO has a great deal in common with a
 normal DMA transmit, apart from adding a couple of option descriptors.
 NIC-specific checks have been moved off the fast path and in to
 initialisation where possible.

This also moves FATSOv1/SWTSO to a new file (tx_tso.c).  The end of transmit
 and some error handling is now outside TSO, since it is common with other
 code.

Signed-off-by: default avatarEdward Cree <ecree@solarflare.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e17705c4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
sfc-y			+= efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \
			   rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \
			   tenxpress.o txc43128_phy.o falcon_boards.o \
			   mcdi.o mcdi_port.o mcdi_mon.o ptp.o
			   mcdi.o mcdi_port.o mcdi_mon.o ptp.o tx_tso.o
sfc-$(CONFIG_SFC_MTD)	+= mtd.o
sfc-$(CONFIG_SFC_SRIOV)	+= sriov.o siena_sriov.o ef10_sriov.o

+120 −3
Original line number Diff line number Diff line
@@ -2086,6 +2086,78 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
			ER_DZ_TX_DESC_UPD, tx_queue->queue);
}

/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
 */
static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
				struct sk_buff *skb,
				bool *data_mapped)
{
	struct efx_tx_buffer *buffer;
	struct tcphdr *tcp;
	struct iphdr *ip;

	u16 ipv4_id;
	u32 seqnum;
	u32 mss;

	EFX_BUG_ON_PARANOID(tx_queue->tso_version != 2);

	mss = skb_shinfo(skb)->gso_size;

	if (unlikely(mss < 4)) {
		WARN_ONCE(1, "MSS of %u is too small for TSO v2\n", mss);
		return -EINVAL;
	}

	ip = ip_hdr(skb);
	if (ip->version == 4) {
		/* Modify IPv4 header if needed. */
		ip->tot_len = 0;
		ip->check = 0;
		ipv4_id = ip->id;
	} else {
		/* Modify IPv6 header if needed. */
		struct ipv6hdr *ipv6 = ipv6_hdr(skb);

		ipv6->payload_len = 0;
		ipv4_id = 0;
	}

	tcp = tcp_hdr(skb);
	seqnum = ntohl(tcp->seq);

	buffer = efx_tx_queue_get_insert_buffer(tx_queue);

	buffer->flags = EFX_TX_BUF_OPTION;
	buffer->len = 0;
	buffer->unmap_len = 0;
	EFX_POPULATE_QWORD_5(buffer->option,
			ESF_DZ_TX_DESC_IS_OPT, 1,
			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
			ESF_DZ_TX_TSO_OPTION_TYPE,
			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
			ESF_DZ_TX_TSO_IP_ID, ipv4_id,
			ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
			);
	++tx_queue->insert_count;

	buffer = efx_tx_queue_get_insert_buffer(tx_queue);

	buffer->flags = EFX_TX_BUF_OPTION;
	buffer->len = 0;
	buffer->unmap_len = 0;
	EFX_POPULATE_QWORD_4(buffer->option,
			ESF_DZ_TX_DESC_IS_OPT, 1,
			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
			ESF_DZ_TX_TSO_OPTION_TYPE,
			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
			ESF_DZ_TX_TSO_TCP_MSS, mss
			);
	++tx_queue->insert_count;

	return 0;
}

static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
@@ -2095,6 +2167,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
	struct efx_channel *channel = tx_queue->channel;
	struct efx_nic *efx = tx_queue->efx;
	struct efx_ef10_nic_data *nic_data = efx->nic_data;
	bool tso_v2 = false;
	size_t inlen;
	dma_addr_t dma_addr;
	efx_qword_t *txd;
@@ -2102,13 +2175,33 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
	int i;
	BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);

	/* TSOv2 is a limited resource that can only be configured on a limited
	 * number of queues. TSO without checksum offload is not really a thing,
	 * so we only enable it for those queues.
	 *
	 * TODO: handle failure to allocate this in the case where we've used
	 * all the queues.
	 */
	if (csum_offload && (nic_data->datapath_caps2 &
			(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
		tso_v2 = true;
		netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
				channel->channel);
	}

	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
	MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
	MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
			      /* This flag was removed from mcdi_pcol.h for
			       * the non-_EXT version of INIT_TXQ.  However,
			       * firmware still honours it.
			       */
			      INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
			      INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
			      INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);

	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);

@@ -2146,7 +2239,10 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
	tx_queue->write_count = 1;

	if (nic_data->datapath_caps &
	if (tso_v2) {
		tx_queue->handle_tso = efx_ef10_tx_tso_desc;
		tx_queue->tso_version = 2;
	} else if (nic_data->datapath_caps &
			(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
		tx_queue->tso_version = 1;
	}
@@ -2202,6 +2298,25 @@ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
			ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue);
}

#define EFX_EF10_MAX_TX_DESCRIPTOR_LEN 0x3fff

static unsigned int efx_ef10_tx_limit_len(struct efx_tx_queue *tx_queue,
					  dma_addr_t dma_addr, unsigned int len)
{
	if (len > EFX_EF10_MAX_TX_DESCRIPTOR_LEN) {
		/* If we need to break across multiple descriptors we should
		 * stop at a page boundary. This assumes the length limit is
		 * greater than the page size.
		 */
		dma_addr_t end = dma_addr + EFX_EF10_MAX_TX_DESCRIPTOR_LEN;

		BUILD_BUG_ON(EFX_EF10_MAX_TX_DESCRIPTOR_LEN < EFX_PAGE_SIZE);
		len = (end & (~(EFX_PAGE_SIZE - 1))) - dma_addr;
	}

	return len;
}

static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
{
	unsigned int old_write_count = tx_queue->write_count;
@@ -5469,6 +5584,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
	.tx_init = efx_ef10_tx_init,
	.tx_remove = efx_ef10_tx_remove,
	.tx_write = efx_ef10_tx_write,
	.tx_limit_len = efx_ef10_tx_limit_len,
	.rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
	.rx_probe = efx_ef10_rx_probe,
	.rx_init = efx_ef10_rx_init,
@@ -5575,6 +5691,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
	.tx_init = efx_ef10_tx_init,
	.tx_remove = efx_ef10_tx_remove,
	.tx_write = efx_ef10_tx_write,
	.tx_limit_len = efx_ef10_tx_limit_len,
	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
	.rx_probe = efx_ef10_rx_probe,
	.rx_init = efx_ef10_rx_init,
+1 −0
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
	EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
	EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
	EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
	EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
	EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+2 −0
Original line number Diff line number Diff line
@@ -2750,6 +2750,7 @@ const struct efx_nic_type falcon_a1_nic_type = {
	.tx_init = efx_farch_tx_init,
	.tx_remove = efx_farch_tx_remove,
	.tx_write = efx_farch_tx_write,
	.tx_limit_len = efx_farch_tx_limit_len,
	.rx_push_rss_config = dummy_rx_push_rss_config,
	.rx_probe = efx_farch_rx_probe,
	.rx_init = efx_farch_rx_init,
@@ -2849,6 +2850,7 @@ const struct efx_nic_type falcon_b0_nic_type = {
	.tx_init = efx_farch_tx_init,
	.tx_remove = efx_farch_tx_remove,
	.tx_write = efx_farch_tx_write,
	.tx_limit_len = efx_farch_tx_limit_len,
	.rx_push_rss_config = falcon_b0_rx_push_rss_config,
	.rx_probe = efx_farch_rx_probe,
	.rx_init = efx_farch_rx_init,
+15 −0
Original line number Diff line number Diff line
@@ -356,6 +356,21 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
	}
}

unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
				    dma_addr_t dma_addr, unsigned int len)
{
	/* Don't cross 4K boundaries with descriptors. */
	unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;

	len = min(limit, len);

	if (EFX_WORKAROUND_5391(tx_queue->efx) && (dma_addr & 0xf))
		len = min_t(unsigned int, len, 512 - (dma_addr & 0xf));

	return len;
}


/* Allocate hardware resources for a TX queue */
int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
{
Loading