Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b3f4256f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  IB/mlx4: Make sure inline data segments don't cross a 64 byte boundary
  IB/mlx4: Handle FW command interface rev 3
  IB/mlx4: Handle buffer wraparound in __mlx4_ib_cq_clean()
  IB/mlx4: Get rid of max_inline_data calculation
  IB/mlx4: Handle new FW requirement for send request prefetching
  IB/mlx4: Fix warning in rounding up queue sizes
  IB/mlx4: Fix handling of wq->tail for send completions
parents 044f620a e61ef241
Loading
Loading
Loading
Loading
+12 −7
Original line number Diff line number Diff line
@@ -354,8 +354,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
	if (is_send) {
		wq = &(*cur_qp)->sq;
		wqe_ctr = be16_to_cpu(cqe->wqe_index);
		wq->tail += wqe_ctr - (u16) wq->tail;
		wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
		wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else if ((*cur_qp)->ibqp.srq) {
		srq = to_msrq((*cur_qp)->ibqp.srq);
@@ -364,7 +364,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
		mlx4_ib_free_srq_wqe(srq, wqe_ctr);
	} else {
		wq	  = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	}

@@ -478,7 +478,8 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
{
	u32 prod_index;
	int nfreed = 0;
	struct mlx4_cqe *cqe;
	struct mlx4_cqe *cqe, *dest;
	u8 owner_bit;

	/*
	 * First we need to find the current producer index, so we
@@ -501,9 +502,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
				mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
			++nfreed;
		} else if (nfreed)
			memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
			       cqe, sizeof *cqe);
		} else if (nfreed) {
			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
			owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
			memcpy(dest, cqe, sizeof *cqe);
			dest->owner_sr_opcode = owner_bit |
				(dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
		}
	}

	if (nfreed) {
+11 −5
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
	props->max_pkeys	   = dev->dev->caps.pkey_table_len;
	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -168,9 +168,9 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
	props->state		= out_mad->data[32] & 0xf;
	props->phys_state	= out_mad->data[33] >> 4;
	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len;
	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len[port];
	props->max_msg_sz	= 0x80000000;
	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len;
	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
	props->active_width	= out_mad->data[31] & 0xf;
@@ -280,8 +280,14 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
		return PTR_ERR(mailbox);

	memset(mailbox->buf, 0, 256);

	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
	} else {
		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
	}

	err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
		       MLX4_CMD_TIME_CLASS_B);
+4 −1
Original line number Diff line number Diff line
@@ -95,7 +95,8 @@ struct mlx4_ib_mr {
struct mlx4_ib_wq {
	u64		       *wrid;
	spinlock_t		lock;
	int			max;
	int			wqe_cnt;
	int			max_post;
	int			max_gs;
	int			offset;
	int			wqe_shift;
@@ -113,6 +114,7 @@ struct mlx4_ib_qp {

	u32			doorbell_qpn;
	__be32			sq_signal_bits;
	int			sq_spare_wqes;
	struct mlx4_ib_wq	sq;

	struct ib_umem	       *umem;
@@ -123,6 +125,7 @@ struct mlx4_ib_qp {
	u8			alt_port;
	u8			atomic_rd_en;
	u8			resp_depth;
	u8			sq_no_prefetch;
	u8			state;
};

+128 −68
Original line number Diff line number Diff line
@@ -109,6 +109,20 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
	return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
}

/*
 * Stamp a SQ WQE so that it is invalid if prefetched by marking the
 * first four bytes of every 64 byte chunk with 0xffffffff, except for
 * the very first chunk of the WQE.
 */
static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
{
	u32 *wqe = get_send_wqe(qp, n);
	int i;

	for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)
		wqe[i] = 0xffffffff;
}

static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
	struct ib_event event;
@@ -178,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type)
	case IB_QPT_GSI:
		return sizeof (struct mlx4_wqe_ctrl_seg) +
			ALIGN(MLX4_IB_UD_HEADER_SIZE +
			      DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
					   MLX4_INLINE_ALIGN) *
			      sizeof (struct mlx4_wqe_inline_seg),
			      sizeof (struct mlx4_wqe_data_seg)) +
			ALIGN(4 +
@@ -201,18 +217,18 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
		if (cap->max_recv_wr)
			return -EINVAL;

		qp->rq.max = qp->rq.max_gs = 0;
		qp->rq.wqe_cnt = qp->rq.max_gs = 0;
	} else {
		/* HW requires >= 1 RQ entry with >= 1 gather entry */
		if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
			return -EINVAL;

		qp->rq.max	 = roundup_pow_of_two(max(1, cap->max_recv_wr));
		qp->rq.max_gs	 = roundup_pow_of_two(max(1, cap->max_recv_sge));
		qp->rq.wqe_cnt	 = roundup_pow_of_two(max(1U, cap->max_recv_wr));
		qp->rq.max_gs	 = roundup_pow_of_two(max(1U, cap->max_recv_sge));
		qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
	}

	cap->max_recv_wr  = qp->rq.max;
	cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
	cap->max_recv_sge = qp->rq.max_gs;

	return 0;
@@ -236,8 +252,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
		return -EINVAL;

	qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;

	qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
							sizeof (struct mlx4_wqe_data_seg),
							cap->max_inline_data +
@@ -246,20 +260,27 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
	qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
		sizeof (struct mlx4_wqe_data_seg);

	qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
		(qp->sq.max << qp->sq.wqe_shift);
	/*
	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
	 * allow HW to prefetch.
	 */
	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
	qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);

	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
		qp->rq.offset = 0;
		qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
	} else {
		qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
		qp->sq.offset = 0;
	}

	cap->max_send_wr     = qp->sq.max;
	cap->max_send_wr  = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes;
	cap->max_send_sge = qp->sq.max_gs;
	cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
		sizeof (struct mlx4_wqe_inline_seg);
	/* We don't support inline sends for kernel QPs (yet) */
	cap->max_inline_data = 0;

	return 0;
}
@@ -267,11 +288,11 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
static int set_user_sq_size(struct mlx4_ib_qp *qp,
			    struct mlx4_ib_create_qp *ucmd)
{
	qp->sq.max       = 1 << ucmd->log_sq_bb_count;
	qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;
	qp->sq.wqe_shift = ucmd->log_sq_stride;

	qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
		(qp->sq.max << qp->sq.wqe_shift);
	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
		(qp->sq.wqe_cnt << qp->sq.wqe_shift);

	return 0;
}
@@ -307,6 +328,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
			goto err;
		}

		qp->sq_no_prefetch = ucmd.sq_no_prefetch;

		err = set_user_sq_size(qp, &ucmd);
		if (err)
			goto err;
@@ -334,6 +357,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
				goto err_mtt;
		}
	} else {
		qp->sq_no_prefetch = 0;

		err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
		if (err)
			goto err;
@@ -360,16 +385,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
		if (err)
			goto err_mtt;

		qp->sq.wrid  = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
		qp->rq.wrid  = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);

		if (!qp->sq.wrid || !qp->rq.wrid) {
			err = -ENOMEM;
			goto err_wrid;
		}

		/* We don't support inline sends for kernel QPs (yet) */
		init_attr->cap.max_inline_data = 0;
	}

	err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
@@ -583,24 +605,6 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
	return 0;
}

static void init_port(struct mlx4_ib_dev *dev, int port)
{
	struct mlx4_init_port_param param;
	int err;

	memset(&param, 0, sizeof param);

	param.port_width_cap = dev->dev->caps.port_width_cap;
	param.vl_cap	     = dev->dev->caps.vl_cap;
	param.mtu	     = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
	param.max_gid	     = dev->dev->caps.gid_table_len;
	param.max_pkey	     = dev->dev->caps.pkey_table_len;

	err = mlx4_INIT_PORT(dev->dev, &param, port);
	if (err)
		printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
}

static int to_mlx4_st(enum ib_qp_type type)
{
	switch (type) {
@@ -674,9 +678,9 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
	path->counter_index = 0xff;

	if (ah->ah_flags & IB_AH_GRH) {
		if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
		if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
			printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
			       ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
			       ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
			return -1;
		}

@@ -743,14 +747,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
		context->mtu_msgmax = (attr->path_mtu << 5) | 31;
	}

	if (qp->rq.max)
		context->rq_size_stride = ilog2(qp->rq.max) << 3;
	if (qp->rq.wqe_cnt)
		context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
	context->rq_size_stride |= qp->rq.wqe_shift - 4;

	if (qp->sq.max)
		context->sq_size_stride = ilog2(qp->sq.max) << 3;
	if (qp->sq.wqe_cnt)
		context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
	context->sq_size_stride |= qp->sq.wqe_shift - 4;

	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
		context->sq_size_stride |= !!qp->sq_no_prefetch << 7;

	if (qp->ibqp.uobject)
		context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
	else
@@ -789,13 +796,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
			return -EINVAL;

		if (attr->alt_port_num == 0 ||
		    attr->alt_port_num > dev->dev->caps.num_ports)
			return -EINVAL;

		if (attr->alt_pkey_index >=
		    dev->dev->caps.pkey_table_len[attr->alt_port_num])
			return -EINVAL;

		if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
				  attr->alt_port_num))
			return -EINVAL;
@@ -884,16 +892,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,

	/*
	 * Before passing a kernel QP to the HW, make sure that the
	 * ownership bits of the send queue are set so that the
	 * hardware doesn't start processing stale work requests.
	 * ownership bits of the send queue are set and the SQ
	 * headroom is stamped so that the hardware doesn't start
	 * processing stale work requests.
	 */
	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
		struct mlx4_wqe_ctrl_seg *ctrl;
		int i;

		for (i = 0; i < qp->sq.max; ++i) {
		for (i = 0; i < qp->sq.wqe_cnt; ++i) {
			ctrl = get_send_wqe(qp, i);
			ctrl->owner_opcode = cpu_to_be32(1 << 31);

			stamp_send_wqe(qp, i);
		}
	}

@@ -923,7 +934,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
	 */
	if (is_qp0(dev, qp)) {
		if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
			init_port(dev, qp->port);
			if (mlx4_INIT_PORT(dev->dev, qp->port))
				printk(KERN_WARNING "INIT_PORT failed for port %d\n",
				       qp->port);

		if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
		    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
@@ -986,13 +999,14 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
		goto out;

	if ((attr_mask & IB_QP_PKEY_INDEX) &&
	     attr->pkey_index >= dev->dev->caps.pkey_table_len) {
	if ((attr_mask & IB_QP_PORT) &&
	    (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
		goto out;
	}

	if ((attr_mask & IB_QP_PORT) &&
	    (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
	if (attr_mask & IB_QP_PKEY_INDEX) {
		int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
		if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
			goto out;
	}

@@ -1037,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
	u16 pkey;
	int send_size;
	int header_size;
	int spc;
	int i;

	send_size = 0;
@@ -1112,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
		printk("\n");
	}

	/*
	 * Inline data segments may not cross a 64 byte boundary.  If
	 * our UD header is bigger than the space available up to the
	 * next 64 byte boundary in the WQE, use two inline data
	 * segments to hold the UD header.
	 */
	spc = MLX4_INLINE_ALIGN -
		((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
	if (header_size <= spc) {
		inl->byte_count = cpu_to_be32(1 << 31 | header_size);
		memcpy(inl + 1, sqp->header_buf, header_size);
		i = 1;
	} else {
		inl->byte_count = cpu_to_be32(1 << 31 | spc);
		memcpy(inl + 1, sqp->header_buf, spc);

		inl = (void *) (inl + 1) + spc;
		memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
		/*
		 * Need a barrier here to make sure all the data is
		 * visible before the byte_count field is set.
		 * Otherwise the HCA prefetcher could grab the 64-byte
		 * chunk with this inline segment and get a valid (!=
		 * 0xffffffff) byte count but stale data, and end up
		 * generating a packet with bad headers.
		 *
		 * The first inline segment's byte_count field doesn't
		 * need a barrier, because it comes after a
		 * control/MLX segment and therefore is at an offset
		 * of 16 mod 64.
		 */
		wmb();
		inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
		i = 2;
	}

	return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
	return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
}

static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
@@ -1124,7 +1172,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
	struct mlx4_ib_cq *cq;

	cur = wq->head - wq->tail;
	if (likely(cur + nreq < wq->max))
	if (likely(cur + nreq < wq->max_post))
		return 0;

	cq = to_mcq(ib_cq);
@@ -1132,7 +1180,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
	cur = wq->head - wq->tail;
	spin_unlock(&cq->lock);

	return cur + nreq >= wq->max;
	return cur + nreq >= wq->max_post;
}

int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1165,8 +1213,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
			goto out;
		}

		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
		qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;

		ctrl->srcrb_flags =
			(wr->send_flags & IB_SEND_SIGNALED ?
@@ -1301,7 +1349,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
		}

		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
			(ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);

		/*
		 * We can improve latency by not stamping the last
		 * send queue WQE until after ringing the doorbell, so
		 * only stamp here if there are still more WQEs to post.
		 */
		if (wr->next)
			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
				       (qp->sq.wqe_cnt - 1));

		++ind;
	}
@@ -1324,6 +1381,9 @@ out:
		 * and reach the HCA out of order.
		 */
		mmiowb();

		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
			       (qp->sq.wqe_cnt - 1));
	}

	spin_unlock_irqrestore(&qp->rq.lock, flags);
@@ -1344,7 +1404,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,

	spin_lock_irqsave(&qp->rq.lock, flags);

	ind = qp->rq.head & (qp->rq.max - 1);
	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
@@ -1375,7 +1435,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,

		qp->rq.wrid[ind] = wr->wr_id;

		ind = (ind + 1) & (qp->rq.max - 1);
		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
	}

out:
+5 −4
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@
 * Increment this value if any changes that break userspace ABI
 * compatibility are made.
 */
#define MLX4_IB_UVERBS_ABI_VERSION	2
#define MLX4_IB_UVERBS_ABI_VERSION	3

/*
 * Make sure that all structs defined in this file remain laid out so
@@ -89,7 +89,8 @@ struct mlx4_ib_create_qp {
	__u64	db_addr;
	__u8	log_sq_bb_count;
	__u8	log_sq_stride;
        __u8	reserved[6];
	__u8	sq_no_prefetch;
	__u8	reserved[5];
};

#endif /* MLX4_IB_USER_H */
Loading