Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f0ab34f0 authored by Yevgeny Petrilin's avatar Yevgeny Petrilin Committed by David S. Miller
Browse files

net/mlx4_en: using non collapsed CQ on TX



Moving to regular Completion Queue implementation (not collapsed)
Completion for each transmitted packet is written to new entry.

Signed-off-by: default avatarYevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0d9fdaa9
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -51,10 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
	int err;

	cq->size = entries;
	if (mode == RX)
	cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
	else
		cq->buf_size = sizeof(struct mlx4_cqe);

	cq->ring = ring;
	cq->is_tx = mode;
@@ -120,7 +117,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
		cq->size = priv->rx_ring[cq->ring].actual_size;

	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
			    cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
			    cq->wqres.db.dma, &cq->mcq, cq->vector, 0);
	if (err)
		return err;

+34 −33
Original line number Diff line number Diff line
@@ -307,59 +307,60 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
	return cnt;
}


static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_cq *mcq = &cq->mcq;
	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
	struct mlx4_cqe *cqe = cq->buf;
	struct mlx4_cqe *cqe;
	u16 index;
	u16 new_index;
	u16 new_index, ring_index;
	u32 txbbs_skipped = 0;
	u32 cq_last_sav;

	/* index always points to the first TXBB of the last polled descriptor */
	index = ring->cons & ring->size_mask;
	new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
	if (index == new_index)
		return;
	u32 cons_index = mcq->cons_index;
	int size = cq->size;
	u32 size_mask = ring->size_mask;
	struct mlx4_cqe *buf = cq->buf;

	if (!priv->port_up)
		return;

	index = cons_index & size_mask;
	cqe = &buf[index];
	ring_index = ring->cons & size_mask;

	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
			cons_index & size)) {
		/*
	 * We use a two-stage loop:
	 * - the first samples the HW-updated CQE
	 * - the second frees TXBBs until the last sample
	 * This lets us amortize CQE cache misses, while still polling the CQ
	 * until is quiescent.
		 * make sure we read the CQE after we read the
		 * ownership bit
		 */
	cq_last_sav = mcq->cons_index;
	do {
		do {
		rmb();

		/* Skip over last polled CQE */
			index = (index + ring->last_nr_txbb) & ring->size_mask;
			txbbs_skipped += ring->last_nr_txbb;
		new_index = be16_to_cpu(cqe->wqe_index) & size_mask;

			/* Poll next CQE */
		do {
			txbbs_skipped += ring->last_nr_txbb;
			ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
			/* free next descriptor */
			ring->last_nr_txbb = mlx4_en_free_tx_desc(
						priv, ring, index,
					priv, ring, ring_index,
					!!((ring->cons + txbbs_skipped) &
							ring->size));
			++mcq->cons_index;
		} while (ring_index != new_index);

		} while (index != new_index);
		++cons_index;
		index = cons_index & size_mask;
		cqe = &buf[index];
	}

		new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
	} while (index != new_index);
	AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
			 (u32) (mcq->cons_index - cq_last_sav));

	/*
	 * To prevent CQ overflow we first update CQ consumer and only then
	 * the ring consumer.
	 */
	mcq->cons_index = cons_index;
	mlx4_cq_set_ci(mcq);
	wmb();
	ring->cons += txbbs_skipped;