Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3a2f7033 authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed
Browse files

net/mlx5: Use order-0 allocations for all WQ types



Complete the transition of all WQ types to use fragmented
order-0 coherent memory instead of high-order allocations.

CQ-WQ already uses order-0.
Here we do the same for cyclic and linked-list WQs.

This allows the driver to load cleanly on systems with a highly
fragmented coherent memory.

Performance tests:
ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
Packet rate of 64B packets, single transmit ring, size 8K.

No degradation is sensed.

Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 549322f2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -314,7 +314,7 @@ struct mlx5e_cq {

	/* control */
	struct mlx5_core_dev      *mdev;
	struct mlx5_frag_wq_ctrl   wq_ctrl;
	struct mlx5_wq_ctrl        wq_ctrl;
} ____cacheline_aligned_in_smp;

struct mlx5e_tx_wqe_info {
+8 −7
Original line number Diff line number Diff line
@@ -646,7 +646,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
						MLX5_ADAPTER_PAGE_SHIFT);
	MLX5_SET64(wq, wq,  dbr_addr,		rq->wq_ctrl.db.dma);

	mlx5_fill_page_array(&rq->wq_ctrl.buf,
	mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));

	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
@@ -1096,7 +1096,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
					  MLX5_ADAPTER_PAGE_SHIFT);
	MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);

	mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
	mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));

	err = mlx5_core_create_sq(mdev, in, inlen, sqn);

@@ -1538,7 +1539,7 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,

static void mlx5e_free_cq(struct mlx5e_cq *cq)
{
	mlx5_cqwq_destroy(&cq->wq_ctrl);
	mlx5_wq_destroy(&cq->wq_ctrl);
}

static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
@@ -1554,7 +1555,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
	int err;

	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
		sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
		sizeof(u64) * cq->wq_ctrl.buf.npages;
	in = kvzalloc(inlen, GFP_KERNEL);
	if (!in)
		return -ENOMEM;
@@ -1563,7 +1564,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)

	memcpy(cqc, param->cqc, sizeof(param->cqc));

	mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));

	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
@@ -1571,7 +1572,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
					    MLX5_ADAPTER_PAGE_SHIFT);
	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);

+9 −8
Original line number Diff line number Diff line
@@ -383,16 +383,16 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
	return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
}

static inline void mlx5e_fill_icosq_edge(struct mlx5e_icosq *sq,
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
					      struct mlx5_wq_cyc *wq,
					 u16 pi)
					      u16 pi, u16 frag_pi)
{
	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
	u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;

	edge_wi = wi + nnops;

	/* fill sq edge with nops to avoid wqe wrapping two pages */
	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
	for (; wi < edge_wi; wi++) {
		wi->opcode = MLX5_OPCODE_NOP;
		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
@@ -407,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
	struct mlx5_wq_cyc *wq = &sq->wq;
	struct mlx5e_umr_wqe *umr_wqe;
	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
	u16 pi;
	u16 pi, frag_pi;
	int err;
	int i;

	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);

	if (unlikely(pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_size(wq))) {
		mlx5e_fill_icosq_edge(sq, wq, pi);
	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
	}

+13 −11
Original line number Diff line number Diff line
@@ -296,16 +296,16 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
	return -ENOMEM;
}

static inline void mlx5e_fill_sq_edge(struct mlx5e_txqsq *sq,
static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
					   struct mlx5_wq_cyc *wq,
				      u16 pi)
					   u16 pi, u16 frag_pi)
{
	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
	u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;

	edge_wi = wi + nnops;

	/* fill sq edge with nops to avoid wqe wrap around */
	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
	for (; wi < edge_wi; wi++) {
		wi->skb        = NULL;
		wi->num_wqebbs = 1;
@@ -358,8 +358,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
	unsigned char *skb_data = skb->data;
	unsigned int skb_len = skb->len;
	u16 ds_cnt, ds_cnt_inl = 0;
	u16 headlen, ihs, frag_pi;
	u8 num_wqebbs, opcode;
	u16 headlen, ihs;
	u32 num_bytes;
	int num_dma;
	__be16 mss;
@@ -395,8 +395,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
	}

	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
	if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
		mlx5e_fill_sq_edge(sq, wq, pi);
	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
	}

@@ -642,9 +643,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,

	unsigned char *skb_data = skb->data;
	unsigned int skb_len = skb->len;
	u16 headlen, ihs, pi, frag_pi;
	u16 ds_cnt, ds_cnt_inl = 0;
	u8 num_wqebbs, opcode;
	u16 headlen, ihs, pi;
	u32 num_bytes;
	int num_dma;
	__be16 mss;
@@ -680,8 +681,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
	}

	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
	if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
		mlx5e_fill_sq_edge(sq, wq, pi);
	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
		mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
	}

+7 −7
Original line number Diff line number Diff line
@@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
	}

	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
		sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
		sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
	in = kvzalloc(inlen, GFP_KERNEL);
	if (!in) {
		err = -ENOMEM;
@@ -469,12 +469,12 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
	MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
	MLX5_SET(cqc, cqc, c_eqn, eqn);
	MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
	MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
	MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
			   MLX5_ADAPTER_PAGE_SHIFT);
	MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);

	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);

	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
	kvfree(in);
@@ -500,7 +500,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
	goto out;

err_cqwq:
	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
	mlx5_wq_destroy(&conn->cq.wq_ctrl);
out:
	return err;
}
@@ -510,7 +510,7 @@ static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
	tasklet_disable(&conn->cq.tasklet);
	tasklet_kill(&conn->cq.tasklet);
	mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
	mlx5_wq_destroy(&conn->cq.wq_ctrl);
}

static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
@@ -591,7 +591,7 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);

	mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
	mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
				  (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));

	err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
Loading