Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f5b1e47 authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed
Browse files

net/mlx5e: Optimize poll ICOSQ completion queue



UMR operations are more frequent and important.
Check them first, and add a compiler branch predictor hint.

According to current design, ICOSQ CQ can contain at most one
pending CQE per napi. Poll function is optimized accordingly.

Performance:
Single-stream packet-rate tested with pktgen.
Packets are dropped in tc level to zoom into driver data-path.
Larger gain is expected for larger packet sizes, as BW is higher
and UMR posts are more frequent.

---------------------------------------------
packet size | before    | after     | gain  |
64B         | 4,092,370 | 4,113,306 |  0.5% |
1024B       | 3,421,435 | 3,633,819 |  6.2% |

Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent a2fa1fe5
Loading
Loading
Loading
Loading
+33 −29
Original line number Diff line number Diff line
@@ -49,10 +49,40 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
	return cqe;
}

static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
					     struct mlx5e_icosq *sq,
					     struct mlx5_cqe64 *cqe,
					     u16 *sqcc)
{
	struct mlx5_wq_cyc *wq = &sq->wq;
	u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
	struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
	struct mlx5e_rq *rq = &sq->channel->rq;

	prefetch(rq);
	mlx5_cqwq_pop(&cq->wq);
	*sqcc += icowi->num_wqebbs;

	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
		WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n",
			  cqe->op_own);
		return;
	}

	if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
		mlx5e_post_rx_mpwqe(rq);
		return;
	}

	if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
		WARN_ONCE(true,
			  "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n",
			  icowi->opcode);
}

static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
{
	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
	struct mlx5_wq_cyc *wq;
	struct mlx5_cqe64 *cqe;
	u16 sqcc;

@@ -63,39 +93,13 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
	if (likely(!cqe))
		return;

	wq = &sq->wq;

	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
	 * otherwise a cq overrun may occur
	 */
	sqcc = sq->cc;

	do {
		u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
		struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];

		mlx5_cqwq_pop(&cq->wq);
		sqcc += icowi->num_wqebbs;

		if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
			WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n",
				  cqe->op_own);
			break;
		}

		switch (icowi->opcode) {
		case MLX5_OPCODE_NOP:
			break;
		case MLX5_OPCODE_UMR:
			mlx5e_post_rx_mpwqe(&sq->channel->rq);
			break;
		default:
			WARN_ONCE(true,
				  "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n",
				  icowi->opcode);
		}

	} while ((cqe = mlx5e_get_cqe(cq)));
	/* by design, there's only a single cqe */
	mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc);

	mlx5_cqwq_update_db_record(&cq->wq);