Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7bcb134 authored by Lijun Ou's avatar Lijun Ou Committed by Jason Gunthorpe
Browse files

RDMA/hns: Add SRQ support for hip08 kernel mode



This patch implements the SRQ(Share Receive Queue) verbs
and update the poll cq verbs to deal with SRQ complentions.

Signed-off-by: default avatarLijun Ou <oulijun@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 5c1f167a
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -120,6 +120,10 @@ enum {
	HNS_ROCE_CMD_SQD2RTS_QP		= 0x20,
	HNS_ROCE_CMD_2RST_QP		= 0x21,
	HNS_ROCE_CMD_QUERY_QP		= 0x22,
	HNS_ROCE_CMD_SW2HW_SRQ		= 0x70,
	HNS_ROCE_CMD_MODIFY_SRQC	= 0x72,
	HNS_ROCE_CMD_QUERY_SRQC		= 0x73,
	HNS_ROCE_CMD_HW2SW_SRQ		= 0x74,
};

int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
+57 −1
Original line number Diff line number Diff line
@@ -111,6 +111,9 @@
#define PAGES_SHIFT_24				24
#define PAGES_SHIFT_32				32

#define HNS_ROCE_IDX_QUE_ENTRY_SZ		4
#define SRQ_DB_REG				0x230

enum {
	HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
	HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -436,9 +439,37 @@ struct hns_roce_cq {
	struct completion		free;
};

struct hns_roce_idx_que {
	struct hns_roce_buf		idx_buf;
	int				entry_sz;
	u32				buf_size;
	struct ib_umem			*umem;
	struct hns_roce_mtt		mtt;
	u64				*bitmap;
};

struct hns_roce_srq {
	struct ib_srq		ibsrq;
	int			srqn;
	void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
	unsigned long		srqn;
	int			max;
	int			max_gs;
	int			wqe_shift;
	void __iomem		*db_reg_l;

	atomic_t		refcount;
	struct completion	free;

	struct hns_roce_buf	buf;
	u64		       *wrid;
	struct ib_umem	       *umem;
	struct hns_roce_mtt	mtt;
	struct hns_roce_idx_que idx_que;
	spinlock_t		lock;
	int			head;
	int			tail;
	u16			wqe_ctr;
	struct mutex		mutex;
};

struct hns_roce_uar_table {
@@ -761,6 +792,12 @@ struct hns_roce_caps {
	u32		cqe_ba_pg_sz;
	u32		cqe_buf_pg_sz;
	u32		cqe_hop_num;
	u32		srqwqe_ba_pg_sz;
	u32		srqwqe_buf_pg_sz;
	u32		srqwqe_hop_num;
	u32		idx_ba_pg_sz;
	u32		idx_buf_pg_sz;
	u32		idx_hop_num;
	u32		eqe_ba_pg_sz;
	u32		eqe_buf_pg_sz;
	u32		eqe_hop_num;
@@ -829,6 +866,17 @@ struct hns_roce_hw {
	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
	int (*init_eq)(struct hns_roce_dev *hr_dev);
	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
	void (*write_srqc)(struct hns_roce_dev *hr_dev,
			   struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
			   void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
			   dma_addr_t dma_handle_wqe,
			   dma_addr_t dma_handle_idx);
	int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
		       enum ib_srq_attr_mask srq_attr_mask,
		       struct ib_udata *udata);
	int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
	int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
			     const struct ib_recv_wr **bad_wr);
};

struct hns_roce_dev {
@@ -1038,6 +1086,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
			       struct hns_roce_mtt *mtt, struct ib_umem *umem);

struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
				   struct ib_srq_init_attr *srq_init_attr,
				   struct ib_udata *udata);
int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
			enum ib_srq_attr_mask srq_attr_mask,
			struct ib_udata *udata);
int hns_roce_destroy_srq(struct ib_srq *ibsrq);

struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
				 struct ib_qp_init_attr *init_attr,
				 struct ib_udata *udata);
+31 −1
Original line number Diff line number Diff line
@@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
	    (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
	    (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
	    (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
	    (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT))
	    (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
	    (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
	    (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
		return true;

	return false;
@@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
		mhop->ba_l0_num = mhop->bt_chunk_size / 8;
		mhop->hop_num = hr_dev->caps.cqe_hop_num;
		break;
	case HEM_TYPE_SRQWQE:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
					    + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
					    + PAGE_SHIFT);
		mhop->ba_l0_num = mhop->bt_chunk_size / 8;
		mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
		break;
	case HEM_TYPE_IDX:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
				       + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
				       + PAGE_SHIFT);
		mhop->ba_l0_num = mhop->bt_chunk_size / 8;
		mhop->hop_num = hr_dev->caps.idx_hop_num;
		break;
	default:
		dev_err(dev, "Table %d not support multi-hop addressing!\n",
			 table->type);
@@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
			bt_chunk_size = buf_chunk_size;
			hop_num = hr_dev->caps.cqe_hop_num;
			break;
		case HEM_TYPE_SRQWQE:
			buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
					+ PAGE_SHIFT);
			bt_chunk_size = buf_chunk_size;
			hop_num = hr_dev->caps.srqwqe_hop_num;
			break;
		case HEM_TYPE_IDX:
			buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
					+ PAGE_SHIFT);
			bt_chunk_size = buf_chunk_size;
			hop_num = hr_dev->caps.idx_hop_num;
			break;
		default:
			dev_err(dev,
			  "Table %d not support to init hem table here!\n",
+360 −24
Original line number Diff line number Diff line
@@ -1339,6 +1339,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
	caps->cqe_ba_pg_sz	= 0;
	caps->cqe_buf_pg_sz	= 0;
	caps->cqe_hop_num	= HNS_ROCE_CQE_HOP_NUM;
	caps->srqwqe_ba_pg_sz	= 0;
	caps->srqwqe_buf_pg_sz	= 0;
	caps->srqwqe_hop_num	= HNS_ROCE_SRQWQE_HOP_NUM;
	caps->idx_ba_pg_sz	= 0;
	caps->idx_buf_pg_sz	= 0;
	caps->idx_hop_num	= HNS_ROCE_IDX_HOP_NUM;
	caps->eqe_ba_pg_sz	= 0;
	caps->eqe_buf_pg_sz	= 0;
	caps->eqe_hop_num	= HNS_ROCE_EQE_HOP_NUM;
@@ -2028,6 +2034,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
	return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}

static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
{
	return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
}

static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
{
	u32 bitmap_num;
	int bit_num;

	/* always called with interrupts disabled. */
	spin_lock(&srq->lock);

	bitmap_num = wqe_index / (sizeof(u64) * 8);
	bit_num = wqe_index % (sizeof(u64) * 8);
	srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
	srq->tail++;

	spin_unlock(&srq->lock);
}

static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
	*hr_cq->set_ci_db = cons_index & 0xffffff;
@@ -2039,6 +2066,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
	struct hns_roce_v2_cqe *cqe, *dest;
	u32 prod_index;
	int nfreed = 0;
	int wqe_index;
	u8 owner_bit;

	for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
@@ -2056,7 +2084,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
		if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
				    V2_CQE_BYTE_16_LCL_QPN_S) &
				    HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
			/* In v1 engine, not support SRQ */
			if (srq &&
			    roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
				wqe_index = roce_get_field(cqe->byte_4,
						     V2_CQE_BYTE_4_WQE_INDX_M,
						     V2_CQE_BYTE_4_WQE_INDX_S);
				hns_roce_free_srq_wqe(srq, wqe_index);
			}
			++nfreed;
		} else if (nfreed) {
			dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
@@ -2233,6 +2267,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
				struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
	struct hns_roce_srq *srq = NULL;
	struct hns_roce_dev *hr_dev;
	struct hns_roce_v2_cqe *cqe;
	struct hns_roce_qp *hr_qp;
@@ -2275,6 +2310,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
	wc->qp = &(*cur_qp)->ibqp;
	wc->vendor_err = 0;

	if (is_send) {
		wq = &(*cur_qp)->sq;
		if ((*cur_qp)->sq_signal_bits) {
			/*
			 * If sg_signal_bit is 1,
			 * firstly tail pointer updated to wqe
			 * which current cqe correspond to
			 */
			wqe_ctr = (u16)roce_get_field(cqe->byte_4,
						      V2_CQE_BYTE_4_WQE_INDX_M,
						      V2_CQE_BYTE_4_WQE_INDX_S);
			wq->tail += (wqe_ctr - (u16)wq->tail) &
				    (wq->wqe_cnt - 1);
		}

		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else if ((*cur_qp)->ibqp.srq) {
		srq = to_hr_srq((*cur_qp)->ibqp.srq);
		wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
						     V2_CQE_BYTE_4_WQE_INDX_M,
						     V2_CQE_BYTE_4_WQE_INDX_S));
		wc->wr_id = srq->wrid[wqe_ctr];
		hns_roce_free_srq_wqe(srq, wqe_ctr);
	} else {
		/* Update tail pointer, record wr_id */
		wq = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	}

	status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
				V2_CQE_BYTE_4_STATUS_S);
	switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
@@ -2394,23 +2460,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
			wc->status = IB_WC_GENERAL_ERR;
			break;
		}

		wq = &(*cur_qp)->sq;
		if ((*cur_qp)->sq_signal_bits) {
			/*
			 * If sg_signal_bit is 1,
			 * firstly tail pointer updated to wqe
			 * which current cqe correspond to
			 */
			wqe_ctr = (u16)roce_get_field(cqe->byte_4,
						      V2_CQE_BYTE_4_WQE_INDX_M,
						      V2_CQE_BYTE_4_WQE_INDX_S);
			wq->tail += (wqe_ctr - (u16)wq->tail) &
				    (wq->wqe_cnt - 1);
		}

		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else {
		/* RQ correspond to CQE */
		wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -2455,11 +2504,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
				return -EAGAIN;
		}

		/* Update tail pointer, record wr_id */
		wq = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;

		wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
					    V2_CQE_BYTE_32_SL_S);
		wc->src_qp = (u8)roce_get_field(cqe->byte_32,
@@ -2768,6 +2812,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,

	roce_set_field(context->byte_20_smac_sgid_idx,
		       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
		       (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
		       hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
		       ilog2((unsigned int)hr_qp->rq.wqe_cnt));
	roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
		       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3109,6 +3155,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,

	roce_set_field(context->byte_20_smac_sgid_idx,
		       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
		       (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
		       hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
		       ilog2((unsigned int)hr_qp->rq.wqe_cnt));
	roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
		       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3810,6 +3858,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
		set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);

	roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
		     ibqp->srq ? 1 : 0);
	roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
		     V2_QPC_BYTE_108_INV_CREDIT_S, 0);

	/* Every status migrate must change state */
	roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
		       V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -4095,7 +4148,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
			hns_roce_free_db(hr_dev, &hr_qp->rdb);
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
	if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
	     hr_qp->rq.wqe_cnt) {
		kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
		kfree(hr_qp->rq_inl_buf.wqe_list);
	}
@@ -5350,6 +5404,284 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
	destroy_workqueue(hr_dev->irq_workq);
}

static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
				   struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
				   u32 cqn, void *mb_buf, u64 *mtts_wqe,
				   u64 *mtts_idx, dma_addr_t dma_handle_wqe,
				   dma_addr_t dma_handle_idx)
{
	struct hns_roce_srq_context *srq_context;

	srq_context = mb_buf;
	memset(srq_context, 0, sizeof(*srq_context));

	roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
		       SRQC_BYTE_4_SRQ_ST_S, 1);

	roce_set_field(srq_context->byte_4_srqn_srqst,
		       SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
		       SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
		       (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
		       hr_dev->caps.srqwqe_hop_num));
	roce_set_field(srq_context->byte_4_srqn_srqst,
		       SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
		       ilog2(srq->max));

	roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
		       SRQC_BYTE_4_SRQN_S, srq->srqn);

	roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
		       SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);

	roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
		       SRQC_BYTE_12_SRQ_XRCD_S, xrcd);

	srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));

	roce_set_field(srq_context->byte_24_wqe_bt_ba,
		       SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
		       SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
		       cpu_to_le32(dma_handle_wqe >> 35));

	roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
		       SRQC_BYTE_28_PD_S, pdn);
	roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
		       SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
		       fls(srq->max_gs - 1));

	srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
	srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
	roce_set_field(srq_context->rsv_idx_bt_ba,
		       SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
		       SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
		       cpu_to_le32(dma_handle_idx >> 35));

	srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
	srq_context->idx_cur_blk_addr =
				     cpu_to_le32(srq_context->idx_cur_blk_addr);
	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
		       SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
		       SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
		       cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
		       SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
		       SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
		       hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
		       hr_dev->caps.idx_hop_num);

	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
		       SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
		       SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
		       hr_dev->caps.idx_ba_pg_sz);
	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
		       SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
		       SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
		       hr_dev->caps.idx_buf_pg_sz);

	srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
	srq_context->idx_nxt_blk_addr =
				   cpu_to_le32(srq_context->idx_nxt_blk_addr);
	roce_set_field(srq_context->rsv_idxnxtblkaddr,
		       SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
		       SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
		       cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
	roce_set_field(srq_context->byte_56_xrc_cqn,
		       SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
		       cqn);
	roce_set_field(srq_context->byte_56_xrc_cqn,
		       SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
		       SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
		       hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
	roce_set_field(srq_context->byte_56_xrc_cqn,
		       SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
		       SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
		       hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);

	roce_set_bit(srq_context->db_record_addr_record_en,
		     SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
}

static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
				  struct ib_srq_attr *srq_attr,
				  enum ib_srq_attr_mask srq_attr_mask,
				  struct ib_udata *udata)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
	struct hns_roce_srq *srq = to_hr_srq(ibsrq);
	struct hns_roce_srq_context *srq_context;
	struct hns_roce_srq_context *srqc_mask;
	struct hns_roce_cmd_mailbox *mailbox;
	int ret;

	if (srq_attr_mask & IB_SRQ_LIMIT) {
		if (srq_attr->srq_limit >= srq->max)
			return -EINVAL;

		mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
		if (IS_ERR(mailbox))
			return PTR_ERR(mailbox);

		srq_context = mailbox->buf;
		srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;

		memset(srqc_mask, 0xff, sizeof(*srqc_mask));

		roce_set_field(srq_context->byte_8_limit_wl,
			       SRQC_BYTE_8_SRQ_LIMIT_WL_M,
			       SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
		roce_set_field(srqc_mask->byte_8_limit_wl,
			       SRQC_BYTE_8_SRQ_LIMIT_WL_M,
			       SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);

		ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
					HNS_ROCE_CMD_MODIFY_SRQC,
					HNS_ROCE_CMD_TIMEOUT_MSECS);
		hns_roce_free_cmd_mailbox(hr_dev, mailbox);
		if (ret) {
			dev_err(hr_dev->dev,
				"MODIFY SRQ Failed to cmd mailbox.\n");
			return ret;
		}
	}

	return 0;
}

int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
	struct hns_roce_srq *srq = to_hr_srq(ibsrq);
	struct hns_roce_srq_context *srq_context;
	struct hns_roce_cmd_mailbox *mailbox;
	int limit_wl;
	int ret;

	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
	if (IS_ERR(mailbox))
		return PTR_ERR(mailbox);

	srq_context = mailbox->buf;
	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
				HNS_ROCE_CMD_QUERY_SRQC,
				HNS_ROCE_CMD_TIMEOUT_MSECS);
	if (ret) {
		dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
		goto out;
	}

	limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
				  SRQC_BYTE_8_SRQ_LIMIT_WL_M,
				  SRQC_BYTE_8_SRQ_LIMIT_WL_S);

	attr->srq_limit = limit_wl;
	attr->max_wr    = srq->max - 1;
	attr->max_sge   = srq->max_gs;

	memcpy(srq_context, mailbox->buf, sizeof(*srq_context));

out:
	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
	return ret;
}

static int find_empty_entry(struct hns_roce_idx_que *idx_que)
{
	int bit_num;
	int i;

	/* bitmap[i] is set zero if all bits are allocated */
	for (i = 0; idx_que->bitmap[i] == 0; ++i)
		;
	bit_num = ffs(idx_que->bitmap[i]);
	idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));

	return i * sizeof(u64) * 8 + (bit_num - 1);
}

static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
			   int cur_idx, int wqe_idx)
{
	unsigned int *addr;

	addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
						   cur_idx * idx_que->entry_sz);
	*addr = wqe_idx;
}

static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
				     const struct ib_recv_wr *wr,
				     const struct ib_recv_wr **bad_wr)
{
	struct hns_roce_srq *srq = to_hr_srq(ibsrq);
	struct hns_roce_v2_wqe_data_seg *dseg;
	struct hns_roce_v2_db srq_db;
	unsigned long flags;
	int ret = 0;
	int wqe_idx;
	void *wqe;
	int nreq;
	int ind;
	int i;

	spin_lock_irqsave(&srq->lock, flags);

	ind = srq->head & (srq->max - 1);

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		if (unlikely(wr->num_sge > srq->max_gs)) {
			ret = -EINVAL;
			*bad_wr = wr;
			break;
		}

		if (unlikely(srq->head == srq->tail)) {
			ret = -ENOMEM;
			*bad_wr = wr;
			break;
		}

		wqe_idx = find_empty_entry(&srq->idx_que);
		fill_idx_queue(&srq->idx_que, ind, wqe_idx);
		wqe = get_srq_wqe(srq, wqe_idx);
		dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;

		for (i = 0; i < wr->num_sge; ++i) {
			dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
			dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
			dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
		}

		if (i < srq->max_gs) {
			dseg->len = 0;
			dseg->lkey = cpu_to_le32(0x100);
			dseg->addr = 0;
		}

		srq->wrid[wqe_idx] = wr->wr_id;
		ind = (ind + 1) & (srq->max - 1);
	}

	if (likely(nreq)) {
		srq->head += nreq;

		/*
		 * Make sure that descriptors are written before
		 * doorbell record.
		 */
		wmb();

		srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
		srq_db.parameter = srq->head;

		hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);

	}

	spin_unlock_irqrestore(&srq->lock, flags);

	return ret;
}

static const struct hns_roce_hw hns_roce_hw_v2 = {
	.cmq_init = hns_roce_v2_cmq_init,
	.cmq_exit = hns_roce_v2_cmq_exit,
@@ -5377,6 +5709,10 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
	.poll_cq = hns_roce_v2_poll_cq,
	.init_eq = hns_roce_v2_init_eq_table,
	.cleanup_eq = hns_roce_v2_cleanup_eq_table,
	.write_srqc = hns_roce_v2_write_srqc,
	.modify_srq = hns_roce_v2_modify_srq,
	.query_srq = hns_roce_v2_query_srq,
	.post_srq_recv = hns_roce_v2_post_srq_recv,
};

static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
+86 −0
Original line number Diff line number Diff line
@@ -93,8 +93,10 @@
#define HNS_ROCE_CONTEXT_HOP_NUM		1
#define HNS_ROCE_MTT_HOP_NUM			1
#define HNS_ROCE_CQE_HOP_NUM			1
#define HNS_ROCE_SRQWQE_HOP_NUM			1
#define HNS_ROCE_PBL_HOP_NUM			2
#define HNS_ROCE_EQE_HOP_NUM			2
#define HNS_ROCE_IDX_HOP_NUM			1

#define HNS_ROCE_V2_GID_INDEX_NUM		256

@@ -334,6 +336,90 @@ struct hns_roce_v2_cq_context {
#define	V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define	V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)

struct hns_roce_srq_context {
	__le32	byte_4_srqn_srqst;
	__le32	byte_8_limit_wl;
	__le32	byte_12_xrcd;
	__le32	byte_16_pi_ci;
	__le32	wqe_bt_ba;
	__le32	byte_24_wqe_bt_ba;
	__le32	byte_28_rqws_pd;
	__le32	idx_bt_ba;
	__le32	rsv_idx_bt_ba;
	__le32	idx_cur_blk_addr;
	__le32	byte_44_idxbufpgsz_addr;
	__le32	idx_nxt_blk_addr;
	__le32	rsv_idxnxtblkaddr;
	__le32	byte_56_xrc_cqn;
	__le32	db_record_addr_record_en;
	__le32	db_record_addr;
};

#define SRQC_BYTE_4_SRQ_ST_S 0
#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)

#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)

#define SRQC_BYTE_4_SRQ_SHIFT_S 4
#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)

#define SRQC_BYTE_4_SRQN_S 8
#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)

#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)

#define SRQC_BYTE_12_SRQ_XRCD_S 0
#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)

#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)

#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)

#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)

#define SRQC_BYTE_28_PD_S 0
#define SRQC_BYTE_28_PD_M GENMASK(23, 0)

#define SRQC_BYTE_28_RQWS_S 24
#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)

#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)

#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)

#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)

#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)

#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)

#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)

#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)

#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)

#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)

#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0

#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)

enum{
	V2_MPT_ST_VALID = 0x1,
	V2_MPT_ST_FREE	= 0x2,
Loading