Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 117b07e6 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-XDP-performance-improvements'



Tariq Toukan says:

====================
mlx4 XDP performance improvements

This patchset contains data-path improvements, mainly for XDP_DROP
and XDP_TX cases.

Main patches:
* Patch 2 by Saeed allows enabling optimized A0 RX steering (in HW) when
  setting a single RX ring.
  With this configuration, HW packet-rate dramatically improves,
  reaching 28.1 Mpps in XDP_DROP case for both IPv4 (37% gain)
  and IPv6 (53% gain).
* Patch 6 enhances the XDP xmit function. Among other changes, now we
  ring one doorbell per NAPI. Patch gives 17% gain in XDP_TX case.
* Patch 7 obsoletes the NAPI of XDP_TX completion queue and integrates its
  poll into the respective RX NAPI. Patch gives 15% gain in XDP_TX case.

Series generated against net-next commit:
f7aec129 rxrpc: Cache the congestion window setting
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1492a3a7 4c07c132
Loading
Loading
Loading
Loading
+18 −7
Original line number Diff line number Diff line
@@ -146,16 +146,25 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
	if (err)
		goto free_eq;

	cq->mcq.comp  = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq;
	cq->mcq.event = mlx4_en_cq_event;

	if (cq->type != RX)
	switch (cq->type) {
	case TX:
		cq->mcq.comp = mlx4_en_tx_irq;
		netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
				  NAPI_POLL_WEIGHT);
	else
		napi_enable(&cq->napi);
		break;
	case RX:
		cq->mcq.comp = mlx4_en_rx_irq;
		netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);

		napi_enable(&cq->napi);
		break;
	case TX_XDP:
		/* nothing regarding napi, it's shared with rx ring */
		cq->xdp_busy = false;
		break;
	}

	return 0;

@@ -184,8 +193,10 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)

void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
	if (cq->type != TX_XDP) {
		napi_disable(&cq->napi);
		netif_napi_del(&cq->napi);
	}

	mlx4_cq_free(priv->mdev->dev, &cq->mcq);
}
+3 −3
Original line number Diff line number Diff line
@@ -125,9 +125,9 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
		priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;

	mutex_lock(&priv->mdev->state_lock);
	if (priv->mdev->dev->caps.flags2 &
	    MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB &&
	    priv->rss_map.indir_qp.qpn) {
	if ((priv->mdev->dev->caps.flags2 &
	     MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) &&
	    priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) {
		int i;
		int err = 0;
		int loopback = !!(features & NETIF_F_LOOPBACK);
+12 −8
Original line number Diff line number Diff line
@@ -596,6 +596,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
		return err;
	}

	en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode);

	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
		int base_qpn = mlx4_get_base_qpn(dev, priv->port);
		*qpn = base_qpn + index;
@@ -1010,7 +1012,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
				memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
				mc_list[5] = priv->port;
				err = mlx4_multicast_detach(mdev->dev,
							    &priv->rss_map.indir_qp,
							    priv->rss_map.indir_qp,
							    mc_list,
							    MLX4_PROT_ETH,
							    mclist->reg_id);
@@ -1032,7 +1034,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
				/* needed for B0 steering support */
				mc_list[5] = priv->port;
				err = mlx4_multicast_attach(mdev->dev,
							    &priv->rss_map.indir_qp,
							    priv->rss_map.indir_qp,
							    mc_list,
							    priv->port, 0,
							    MLX4_PROT_ETH,
@@ -1677,13 +1679,15 @@ int mlx4_en_start_port(struct net_device *dev)
			if (t != TX_XDP) {
				tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
				tx_ring->recycle_ring = NULL;
			} else {
				mlx4_en_init_recycle_ring(priv, i);
			}

				/* Arm CQ for TX completions */
				mlx4_en_arm_cq(priv, cq);

			} else {
				mlx4_en_init_recycle_ring(priv, i);
				/* XDP TX CQ should never be armed */
			}

			/* Set initial ownership of all Tx TXBBs to SW (1) */
			for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
				*((u32 *)(tx_ring->buf + j)) = 0xffffffff;
@@ -1742,7 +1746,7 @@ int mlx4_en_start_port(struct net_device *dev)
	/* Attach rx QP to bradcast address */
	eth_broadcast_addr(&mc_list[10]);
	mc_list[5] = priv->port; /* needed for B0 steering support */
	if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
	if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list,
				  priv->port, 0, MLX4_PROT_ETH,
				  &priv->broadcast_id))
		mlx4_warn(mdev, "Failed Attaching Broadcast\n");
@@ -1866,12 +1870,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
	/* Detach All multicasts */
	eth_broadcast_addr(&mc_list[10]);
	mc_list[5] = priv->port; /* needed for B0 steering support */
	mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
	mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list,
			      MLX4_PROT_ETH, priv->broadcast_id);
	list_for_each_entry(mclist, &priv->curr_list, list) {
		memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
		mc_list[5] = priv->port;
		mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
		mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp,
				      mc_list, MLX4_PROT_ETH, mclist->reg_id);
		if (mclist->tunnel_reg_id)
			mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id);
+96 −49
Original line number Diff line number Diff line
@@ -134,10 +134,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
				   struct mlx4_en_rx_ring *ring, int index,
				   gfp_t gfp)
{
	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
	struct mlx4_en_rx_desc *rx_desc = ring->buf +
		(index << ring->log_stride);
	struct mlx4_en_rx_alloc *frags = ring->rx_info +
					(index << priv->log_rx_info);
	if (ring->page_cache.index > 0) {
	if (likely(ring->page_cache.index > 0)) {
		/* XDP uses a single page per frame */
		if (!frags->page) {
			ring->page_cache.index--;
@@ -178,6 +179,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
	}
}

/* Function not in fast-path */
static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
{
	struct mlx4_en_rx_ring *ring;
@@ -539,14 +541,14 @@ static void validate_loopback(struct mlx4_en_priv *priv, void *va)
	priv->loopback_ok = 1;
}

static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
				      struct mlx4_en_rx_ring *ring)
{
	u32 missing = ring->actual_size - (ring->prod - ring->cons);

	/* Try to batch allocations, but not too much. */
	if (missing < 8)
		return false;
		return;
	do {
		if (mlx4_en_prepare_rx_desc(priv, ring,
					    ring->prod & ring->size_mask,
@@ -554,9 +556,9 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
					    __GFP_MEMALLOC))
			break;
		ring->prod++;
	} while (--missing);
	} while (likely(--missing));

	return true;
	mlx4_en_update_rx_prod_db(ring);
}

/* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -637,21 +639,14 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_cqe *cqe;
	struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
	struct mlx4_en_rx_alloc *frags;
	int factor = priv->cqe_factor;
	struct mlx4_en_rx_ring *ring;
	struct bpf_prog *xdp_prog;
	int doorbell_pending;
	struct sk_buff *skb;
	int index;
	int nr;
	unsigned int length;
	int cq_ring = cq->ring;
	bool doorbell_pending;
	struct mlx4_cqe *cqe;
	int polled = 0;
	int ip_summed;
	int factor = priv->cqe_factor;
	u64 timestamp;
	bool l2_tunnel;
	int index;

	if (unlikely(!priv->port_up))
		return 0;
@@ -659,6 +654,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
	if (unlikely(budget <= 0))
		return polled;

	ring = priv->rx_ring[cq_ring];

	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
	rcu_read_lock();
	xdp_prog = rcu_dereference(ring->xdp_prog);
@@ -673,10 +670,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
		    cq->mcq.cons_index & cq->size)) {
		struct mlx4_en_rx_alloc *frags;
		enum pkt_hash_types hash_type;
		struct sk_buff *skb;
		unsigned int length;
		int ip_summed;
		void *va;
		int nr;

		frags = ring->rx_info + (index << priv->log_rx_info);
		va = page_address(frags[0].page) + frags[0].page_offset;
		prefetchw(va);
		/*
		 * make sure we read the CQE after we read the ownership bit
		 */
@@ -768,7 +772,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
				break;
			case XDP_TX:
				if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
							length, cq->ring,
							length, cq_ring,
							&doorbell_pending))) {
					frags[0].page = NULL;
					goto next;
@@ -790,24 +794,27 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
		ring->packets++;

		skb = napi_get_frags(&cq->napi);
		if (!skb)
		if (unlikely(!skb))
			goto next;

		if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
			timestamp = mlx4_en_get_cqe_ts(cqe);
			mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
			u64 timestamp = mlx4_en_get_cqe_ts(cqe);

			mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb),
					       timestamp);
		}
		skb_record_rx_queue(skb, cq->ring);
		skb_record_rx_queue(skb, cq_ring);

		if (likely(dev->features & NETIF_F_RXCSUM)) {
			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
						      MLX4_CQE_STATUS_UDP)) {
				if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
				    cqe->checksum == cpu_to_be16(0xffff)) {
					ip_summed = CHECKSUM_UNNECESSARY;
					l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
					bool l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
						(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));

					ip_summed = CHECKSUM_UNNECESSARY;
					hash_type = PKT_HASH_TYPE_L4;
					if (l2_tunnel)
						skb->csum_level = 1;
					ring->csum_ok++;
@@ -822,6 +829,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
						goto csum_none;
					} else {
						ip_summed = CHECKSUM_COMPLETE;
						hash_type = PKT_HASH_TYPE_L3;
						ring->csum_complete++;
					}
				} else {
@@ -831,16 +839,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
		} else {
csum_none:
			ip_summed = CHECKSUM_NONE;
			hash_type = PKT_HASH_TYPE_L3;
			ring->csum_none++;
		}
		skb->ip_summed = ip_summed;
		if (dev->features & NETIF_F_RXHASH)
			skb_set_hash(skb,
				     be32_to_cpu(cqe->immed_rss_invalid),
				     (ip_summed == CHECKSUM_UNNECESSARY) ?
					PKT_HASH_TYPE_L4 :
					PKT_HASH_TYPE_L3);

				     hash_type);

		if ((cqe->vlan_my_qpn &
		     cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
@@ -867,15 +873,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
		++cq->mcq.cons_index;
		index = (cq->mcq.cons_index) & ring->size_mask;
		cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
		if (++polled == budget)
		if (unlikely(++polled == budget))
			break;
	}

	rcu_read_unlock();

	if (polled) {
		if (doorbell_pending)
			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
	if (likely(polled)) {
		if (doorbell_pending) {
			priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true;
			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]);
		}

		mlx4_cq_set_ci(&cq->mcq);
		wmb(); /* ensure HW sees CQ consumer before we post new buffers */
@@ -883,8 +891,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
	}
	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);

	if (mlx4_en_refill_rx_buffers(priv, ring))
		mlx4_en_update_rx_prod_db(ring);
	mlx4_en_refill_rx_buffers(priv, ring);

	return polled;
}
@@ -907,16 +914,30 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct net_device *dev = cq->dev;
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_en_cq *xdp_tx_cq = NULL;
	bool clean_complete = true;
	int done;

	if (priv->tx_ring_num[TX_XDP]) {
		xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
		if (xdp_tx_cq->xdp_busy) {
			clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
							       budget);
			xdp_tx_cq->xdp_busy = !clean_complete;
		}
	}

	done = mlx4_en_process_rx_cq(dev, cq, budget);

	/* If we used up all the quota - we're probably not done yet... */
	if (done == budget) {
	if (done == budget || !clean_complete) {
		const struct cpumask *aff;
		struct irq_data *idata;
		int cpu_curr;

		/* in case we got here because of !clean_complete */
		done = budget;

		INC_PERF_COUNTER(priv->pstats.napi_quota);

		cpu_curr = smp_processor_id();
@@ -936,7 +957,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
			done--;
	}
	/* Done for now */
	if (napi_complete_done(napi, done))
	if (likely(napi_complete_done(napi, done)))
		mlx4_en_arm_cq(priv, cq);
	return done;
}
@@ -1099,11 +1120,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
	int i, qpn;
	int err = 0;
	int good_qps = 0;
	u8 flags;

	en_dbg(DRV, priv, "Configuring rss steering\n");

	flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0;
	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
				    priv->rx_ring_num,
				    &rss_map->base_qpn, 0);
				    &rss_map->base_qpn, flags);
	if (err) {
		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
		return err;
@@ -1120,13 +1144,28 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
		++good_qps;
	}

	if (priv->rx_ring_num == 1) {
		rss_map->indir_qp = &rss_map->qps[0];
		priv->base_qpn = rss_map->indir_qp->qpn;
		en_info(priv, "Optimized Non-RSS steering\n");
		return 0;
	}

	rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL);
	if (!rss_map->indir_qp) {
		err = -ENOMEM;
		goto rss_err;
	}

	/* Configure RSS indirection qp */
	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp,
			    GFP_KERNEL);
	if (err) {
		en_err(priv, "Failed to allocate RSS indirection QP\n");
		goto rss_err;
	}
	rss_map->indir_qp.event = mlx4_en_sqp_event;

	rss_map->indir_qp->event = mlx4_en_sqp_event;
	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
				priv->rx_ring[0]->cqn, -1, &context);

@@ -1164,8 +1203,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
		err = -EINVAL;
		goto indir_err;
	}

	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
			       &rss_map->indir_qp, &rss_map->indir_state);
			       rss_map->indir_qp, &rss_map->indir_state);
	if (err)
		goto indir_err;

@@ -1173,9 +1213,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)

indir_err:
	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
		       MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp);
	mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
	mlx4_qp_free(mdev->dev, rss_map->indir_qp);
	kfree(rss_map->indir_qp);
	rss_map->indir_qp = NULL;
rss_err:
	for (i = 0; i < good_qps; i++) {
		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
@@ -1193,10 +1235,15 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
	int i;

	if (priv->rx_ring_num > 1) {
		mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
			       MLX4_QP_STATE_RST, NULL, 0, 0,
			       rss_map->indir_qp);
		mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
		mlx4_qp_free(mdev->dev, rss_map->indir_qp);
		kfree(rss_map->indir_qp);
		rss_map->indir_qp = NULL;
	}

	for (i = 0; i < priv->rx_ring_num; i++) {
		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
+148 −157

File changed.

Preview size limit exceeded, changes collapsed.

Loading