Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 417608c2 authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier
Browse files

IB/mlx4: Remove limitation on LSO header size



Current code has a limitation: an LSO header is not allowed to cross a
64 byte boundary.  This patch removes this limitation by setting the
WQE RR for large headers thus allowing LSO headers of any size.  The
extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64
to 128 bytes, assuming this is reasonable upper limit for header
length.  Also, this patch will cause IB_DEVICE_UD_TSO to be set only
for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version
2.6.000 and higher.

Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent ecdc428e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
	if (dev->dev->caps.max_gso_sz)
	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
		props->device_cap_flags |= IB_DEVICE_UD_TSO;
	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+12 −12
Original line number Diff line number Diff line
@@ -54,7 +54,8 @@ enum {
	/*
	 * Largest possible UD header: send with GRH and immediate data.
	 */
	MLX4_IB_UD_HEADER_SIZE		= 72
	MLX4_IB_UD_HEADER_SIZE		= 72,
	MLX4_IB_LSO_HEADER_SPARE	= 128,
};

struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
};

enum {
	MLX4_IB_MIN_SQ_STRIDE = 6
	MLX4_IB_MIN_SQ_STRIDE	= 6,
	MLX4_IB_CACHE_LINE_SIZE	= 64,
};

static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
	case IB_QPT_UD:
		return sizeof (struct mlx4_wqe_ctrl_seg) +
			sizeof (struct mlx4_wqe_datagram_seg) +
			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
	case IB_QPT_UC:
		return sizeof (struct mlx4_wqe_ctrl_seg) +
			sizeof (struct mlx4_wqe_raddr_seg);
@@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)

static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
			 __be32 *lso_hdr_sz)
			 __be32 *lso_hdr_sz, __be32 *blh)
{
	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);

	/*
	 * This is a temporary limitation and will be removed in
	 * a forthcoming FW release:
	 */
	if (unlikely(halign > 64))
		return -EINVAL;
	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
		*blh = cpu_to_be32(1 << 6);

	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
	__be32 dummy;
	__be32 *lso_wqe;
	__be32 uninitialized_var(lso_hdr_sz);
	__be32 blh;
	int i;

	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		lso_wqe = &dummy;
		blh = 0;

		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
			err = -ENOMEM;
@@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;

			if (wr->opcode == IB_WR_LSO) {
				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
				if (unlikely(err)) {
					*bad_wr = wr;
					goto out;
@@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
		}

		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;

		stamp = ind + qp->sq_spare_wqes;
		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
+1 −0
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
		[ 9] = "Q_Key violation counter",
		[10] = "VMM",
		[12] = "DPDP",
		[15] = "Big LSO headers",
		[16] = "MW support",
		[17] = "APM support",
		[18] = "Atomic ops support",
+1 −0
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ enum {
	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
	MLX4_DEV_CAP_FLAG_BLH		= 1 << 15,
	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,