Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc26a490 authored by Tariq Toukan's avatar Tariq Toukan Committed by David S. Miller
Browse files

net/mlx4_en: Improve transmit CQ polling



Several small performance improvements in TX CQ polling,
including:
- Compiler branch predictor hints.
- Minimize variables scope.
- More proper check of cq type.
- Use boolean instead of int for a binary indication.

Performance tests:
Tested on ConnectX3Pro, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

Packet-rate tests for both regular stack and XDP use cases:
No noticeable gain, no degradation.

Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Reviewed-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9bcee89a
Loading
Loading
Loading
Loading
+7 −6
Original line number Original line Diff line number Diff line
@@ -402,8 +402,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
	struct mlx4_cq *mcq = &cq->mcq;
	struct mlx4_cq *mcq = &cq->mcq;
	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
	struct mlx4_cqe *cqe;
	struct mlx4_cqe *cqe;
	u16 index;
	u16 index, ring_index, stamp_index;
	u16 new_index, ring_index, stamp_index;
	u32 txbbs_skipped = 0;
	u32 txbbs_skipped = 0;
	u32 txbbs_stamp = 0;
	u32 txbbs_stamp = 0;
	u32 cons_index = mcq->cons_index;
	u32 cons_index = mcq->cons_index;
@@ -418,7 +417,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
	u32 last_nr_txbb;
	u32 last_nr_txbb;
	u32 ring_cons;
	u32 ring_cons;


	if (!priv->port_up)
	if (unlikely(!priv->port_up))
		return true;
		return true;


	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
@@ -433,6 +432,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
	/* Process all completed CQEs */
	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
			cons_index & size) && (done < budget)) {
			cons_index & size) && (done < budget)) {
		u16 new_index;

		/*
		/*
		 * make sure we read the CQE after we read the
		 * make sure we read the CQE after we read the
		 * ownership bit
		 * ownership bit
@@ -479,7 +480,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
	}
	}



	/*
	/*
	 * To prevent CQ overflow we first update CQ consumer and only then
	 * To prevent CQ overflow we first update CQ consumer and only then
	 * the ring consumer.
	 * the ring consumer.
@@ -492,7 +492,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
	ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
	ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
	ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
	ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;


	if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
	if (cq->type == TX_XDP)
		return done < budget;
		return done < budget;


	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
@@ -504,6 +504,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
		netif_tx_wake_queue(ring->tx_queue);
		netif_tx_wake_queue(ring->tx_queue);
		ring->wake_queue++;
		ring->wake_queue++;
	}
	}

	return done < budget;
	return done < budget;
}
}


@@ -524,7 +525,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct net_device *dev = cq->dev;
	struct net_device *dev = cq->dev;
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int clean_complete;
	bool clean_complete;


	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
	if (!clean_complete)
	if (!clean_complete)