Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3d03641c authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net/mlx4_en: Avoid a cache line miss in TX completion for single frag skb's



Add frag0_dma/frag0_byte_count into mlx4_en_tx_info to avoid a cache
line miss in TX completion for frames having one dma element.  (We avoid
reading back the tx descriptor)

Note this could be extended to 2/3 dma elements later, as we have free
room in mlx4_en_tx_info

Also, mlx4_en_free_tx_desc() no longer accesses skb_shinfo(). We use a
new nr_maps fields in mlx4_en_tx_info to avoid 2 or 3 cache misses.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarAmir Vadai <amirv@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent dc9b06d1
Loading
Loading
Loading
Loading
+46 −37
Original line number Diff line number Diff line
@@ -259,38 +259,40 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
				struct mlx4_en_tx_ring *ring,
				int index, u8 owner, u64 timestamp)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
	struct sk_buff *skb = tx_info->skb;
	struct skb_frag_struct *frag;
	void *end = ring->buf + ring->buf_size;
	int frags = skb_shinfo(skb)->nr_frags;
	struct sk_buff *skb = tx_info->skb;
	int nr_maps = tx_info->nr_maps;
	int i;

	if (unlikely(timestamp)) {
		struct skb_shared_hwtstamps hwts;

	if (timestamp) {
		mlx4_en_fill_hwtstamps(mdev, &hwts, timestamp);
		mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
		skb_tstamp_tx(skb, &hwts);
	}

	/* Optimize the common case when there are no wraparounds */
	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
		if (!tx_info->inl) {
			if (tx_info->linear) {
			if (tx_info->linear)
				dma_unmap_single(priv->ddev,
						tx_info->map0_dma,
						tx_info->map0_byte_count,
						PCI_DMA_TODEVICE);
			else
				dma_unmap_page(priv->ddev,
					       tx_info->map0_dma,
					       tx_info->map0_byte_count,
					       PCI_DMA_TODEVICE);
			for (i = 1; i < nr_maps; i++) {
				data++;
				dma_unmap_page(priv->ddev,
					(dma_addr_t)be64_to_cpu(data->addr),
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
				++data;
			}

			for (i = 0; i < frags; i++) {
				frag = &skb_shinfo(skb)->frags[i];
				dma_unmap_page(priv->ddev,
					(dma_addr_t) be64_to_cpu(data[i].addr),
					skb_frag_size(frag), PCI_DMA_TODEVICE);
			}
		}
	} else {
@@ -299,23 +301,25 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
				data = ring->buf + ((void *)data - end);
			}

			if (tx_info->linear) {
			if (tx_info->linear)
				dma_unmap_single(priv->ddev,
					(dma_addr_t) be64_to_cpu(data->addr),
					 be32_to_cpu(data->byte_count),
						tx_info->map0_dma,
						tx_info->map0_byte_count,
						PCI_DMA_TODEVICE);
				++data;
			}

			for (i = 0; i < frags; i++) {
			else
				dma_unmap_page(priv->ddev,
					       tx_info->map0_dma,
					       tx_info->map0_byte_count,
					       PCI_DMA_TODEVICE);
			for (i = 1; i < nr_maps; i++) {
				data++;
				/* Check for wraparound before unmapping */
				if ((void *) data >= end)
					data = ring->buf;
				frag = &skb_shinfo(skb)->frags[i];
				dma_unmap_page(priv->ddev,
					(dma_addr_t)be64_to_cpu(data->addr),
					 skb_frag_size(frag), PCI_DMA_TODEVICE);
				++data;
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
			}
		}
	}
@@ -751,19 +755,22 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
	tx_info->linear = (lso_header_size < skb_headlen(skb) &&
			   !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0;

	data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1;
	tx_info->nr_maps = skb_shinfo(skb)->nr_frags + tx_info->linear;
	data += tx_info->nr_maps - 1;

	if (is_inline(ring->inline_thold, skb, &fragptr)) {
		tx_info->inl = 1;
	} else {
		dma_addr_t dma = 0;
		u32 byte_count = 0;

		/* Map fragments if any */
		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
			const struct skb_frag_struct *frag;
			dma_addr_t dma;

			frag = &skb_shinfo(skb)->frags[i];
			byte_count = skb_frag_size(frag);
			dma = skb_frag_dma_map(ddev, frag,
					       0, skb_frag_size(frag),
					       0, byte_count,
					       DMA_TO_DEVICE);
			if (dma_mapping_error(ddev, dma))
				goto tx_drop_unmap;
@@ -771,14 +778,13 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
			data->addr = cpu_to_be64(dma);
			data->lkey = ring->mr_key;
			wmb();
			data->byte_count = cpu_to_be32(skb_frag_size(frag));
			data->byte_count = cpu_to_be32(byte_count);
			--data;
		}

		/* Map linear part if needed */
		if (tx_info->linear) {
			u32 byte_count = skb_headlen(skb) - lso_header_size;
			dma_addr_t dma;
			byte_count = skb_headlen(skb) - lso_header_size;

			dma = dma_map_single(ddev, skb->data +
					     lso_header_size, byte_count,
@@ -792,6 +798,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
			data->byte_count = cpu_to_be32(byte_count);
		}
		tx_info->inl = 0;
		/* tx completion can avoid cache line miss for common cases */
		tx_info->map0_dma = dma;
		tx_info->map0_byte_count = byte_count;
	}

	/*
+3 −0
Original line number Diff line number Diff line
@@ -216,12 +216,15 @@ enum cq_type {

struct mlx4_en_tx_info {
	struct sk_buff *skb;
	dma_addr_t	map0_dma;
	u32		map0_byte_count;
	u32		nr_txbb;
	u32		nr_bytes;
	u8		linear;
	u8		data_offset;
	u8		inl;
	u8		ts_requested;
	u8		nr_maps;
} ____cacheline_aligned_in_smp;