Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 93aa2187 authored by Wei Hu(Xavier)'s avatar Wei Hu(Xavier) Committed by Doug Ledford
Browse files

RDMA/hns: Add CQ operations support for hip08 RoCE driver



This patch adds CQ relevant operations for hip08 RoCE driver,
such as create CQ, destroy CQ, poll CQ and Request Completion
Notification(req_notify_cq).

Signed-off-by: default avatarLijun Ou <oulijun@huawei.com>
Signed-off-by: default avatarShaobo Xu <xushaobo2@huawei.com>
Signed-off-by: default avatarWei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 7afddafa
Loading
Loading
Loading
Loading
+14 −14
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
	if (ret)
		dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
			hr_cq->cqn);

	if (hr_dev->eq_table.eq) {
		/* Waiting interrupt process procedure carried out */
		synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);

@@ -203,6 +203,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
		if (atomic_dec_and_test(&hr_cq->refcount))
			complete(&hr_cq->free);
		wait_for_completion(&hr_cq->free);
	}

	spin_lock_irq(&cq_table->lock);
	radix_tree_delete(&cq_table->tree, hr_cq->cqn);
@@ -303,7 +304,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
	struct hns_roce_uar *uar = NULL;
	int vector = attr->comp_vector;
	int cq_entries = attr->cqe;
	int ret = 0;
	int ret;

	if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
		dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
@@ -311,13 +312,12 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
		return ERR_PTR(-EINVAL);
	}

	hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL);
	hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
	if (!hr_cq)
		return ERR_PTR(-ENOMEM);

	/* In v1 engine, parameter verification */
	if (cq_entries < HNS_ROCE_MIN_CQE_NUM)
		cq_entries = HNS_ROCE_MIN_CQE_NUM;
	if (hr_dev->caps.min_cqes)
		cq_entries = max(cq_entries, hr_dev->caps.min_cqes);

	cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
	hr_cq->ib_cq.cqe = cq_entries - 1;
@@ -369,7 +369,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
	 * problems if tptr is set to zero here, so we initialze it in user
	 * space.
	 */
	if (!context)
	if (!context && hr_cq->tptr_addr)
		*hr_cq->tptr_addr = 0;

	/* Get created cq handler and carry out event */
+1 −0
Original line number Diff line number Diff line
@@ -508,6 +508,7 @@ struct hns_roce_caps {
	int		max_qp_dest_rdma;
	int		num_cqs;
	int		max_cqes;
	int		min_cqes;
	int		reserved_cqs;
	int		num_aeq_vectors;	/* 1 */
	int		num_comp_vectors;	/* 32 ceq */
+1 −0
Original line number Diff line number Diff line
@@ -1476,6 +1476,7 @@ int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
	caps->num_qps		= HNS_ROCE_V1_MAX_QP_NUM;
	caps->max_wqes		= HNS_ROCE_V1_MAX_WQE_NUM;
	caps->num_cqs		= HNS_ROCE_V1_MAX_CQ_NUM;
	caps->min_cqes		= HNS_ROCE_MIN_CQE_NUM;
	caps->max_cqes		= HNS_ROCE_V1_MAX_CQE_NUM;
	caps->max_sq_sg		= HNS_ROCE_V1_SG_NUM;
	caps->max_rq_sg		= HNS_ROCE_V1_SG_NUM;
+376 −0
Original line number Diff line number Diff line
@@ -765,6 +765,379 @@ static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
	roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val);
}

static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
				   n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
}

static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
	struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);

	/* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
	return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^
		!!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL;
}

static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
{
	return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}

static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
	struct hns_roce_v2_cq_db cq_db;

	cq_db.byte_4 = 0;
	cq_db.parameter = 0;

	roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_TAG_M,
		       V2_CQ_DB_BYTE_4_TAG_S, hr_cq->cqn);
	roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_CMD_M,
		       V2_CQ_DB_BYTE_4_CMD_S, HNS_ROCE_V2_CQ_DB_PTR);

	roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CONS_IDX_M,
		       V2_CQ_DB_PARAMETER_CONS_IDX_S,
		       cons_index & ((hr_cq->cq_depth << 1) - 1));
	roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CMD_SN_M,
		       V2_CQ_DB_PARAMETER_CMD_SN_S, 1);

	hns_roce_write64_k((__be32 *)&cq_db, hr_cq->cq_db_l);

}

static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
				  struct hns_roce_cq *hr_cq, void *mb_buf,
				  u64 *mtts, dma_addr_t dma_handle, int nent,
				  u32 vector)
{
	struct hns_roce_v2_cq_context *cq_context;

	cq_context = mb_buf;
	memset(cq_context, 0, sizeof(*cq_context));

	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M,
		       V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID);
	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
		       V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
		       V2_CQC_BYTE_4_CEQN_S, vector);
	cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn);

	roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
		       V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);

	cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
	cq_context->cqe_cur_blk_addr =
				cpu_to_le32(cq_context->cqe_cur_blk_addr);

	roce_set_field(cq_context->byte_16_hop_addr,
		       V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M,
		       V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S,
		       cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT)));
	roce_set_field(cq_context->byte_16_hop_addr,
		       V2_CQC_BYTE_16_CQE_HOP_NUM_M,
		       V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num ==
		       HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);

	cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
	roce_set_field(cq_context->byte_24_pgsz_addr,
		       V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M,
		       V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S,
		       cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT)));
	roce_set_field(cq_context->byte_24_pgsz_addr,
		       V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
		       V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
		       hr_dev->caps.cqe_ba_pg_sz);
	roce_set_field(cq_context->byte_24_pgsz_addr,
		       V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M,
		       V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
		       hr_dev->caps.cqe_buf_pg_sz);

	cq_context->cqe_ba = (u32)(dma_handle >> 3);

	roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
		       V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
}

static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
				     enum ib_cq_notify_flags flags)
{
	struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
	u32 notification_flag;
	u32 doorbell[2];

	doorbell[0] = 0;
	doorbell[1] = 0;

	notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
			     V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL;
	/*
	 * flags = 0; Notification Flag = 1, next
	 * flags = 1; Notification Flag = 0, solocited
	 */
	roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_TAG_M, V2_DB_BYTE_4_TAG_S,
		       hr_cq->cqn);
	roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S,
		       HNS_ROCE_V2_CQ_DB_NTR);
	roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CONS_IDX_M,
		       V2_CQ_DB_PARAMETER_CONS_IDX_S,
		       hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
	roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CMD_SN_M,
		       V2_CQ_DB_PARAMETER_CMD_SN_S, 1);
	roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S,
		     notification_flag);

	hns_roce_write64_k(doorbell, hr_cq->cq_db_l);

	return 0;
}

static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
				struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
	struct hns_roce_dev *hr_dev;
	struct hns_roce_v2_cqe *cqe;
	struct hns_roce_qp *hr_qp;
	struct hns_roce_wq *wq;
	int is_send;
	u16 wqe_ctr;
	u32 opcode;
	u32 status;
	int qpn;

	/* Find cqe according to consumer index */
	cqe = next_cqe_sw_v2(hr_cq);
	if (!cqe)
		return -EAGAIN;

	++hr_cq->cons_index;
	/* Memory barrier */
	rmb();

	/* 0->SQ, 1->RQ */
	is_send = !roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S);

	qpn = roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
				V2_CQE_BYTE_16_LCL_QPN_S);

	if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) {
		hr_dev = to_hr_dev(hr_cq->ib_cq.device);
		hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
		if (unlikely(!hr_qp)) {
			dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n",
				hr_cq->cqn, (qpn & HNS_ROCE_V2_CQE_QPN_MASK));
			return -EINVAL;
		}
		*cur_qp = hr_qp;
	}

	wc->qp = &(*cur_qp)->ibqp;
	wc->vendor_err = 0;

	status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
				V2_CQE_BYTE_4_STATUS_S);
	switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
	case HNS_ROCE_CQE_V2_SUCCESS:
		wc->status = IB_WC_SUCCESS;
		break;
	case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR:
		wc->status = IB_WC_LOC_LEN_ERR;
		break;
	case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR:
		wc->status = IB_WC_LOC_QP_OP_ERR;
		break;
	case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR:
		wc->status = IB_WC_LOC_PROT_ERR;
		break;
	case HNS_ROCE_CQE_V2_WR_FLUSH_ERR:
		wc->status = IB_WC_WR_FLUSH_ERR;
		break;
	case HNS_ROCE_CQE_V2_MW_BIND_ERR:
		wc->status = IB_WC_MW_BIND_ERR;
		break;
	case HNS_ROCE_CQE_V2_BAD_RESP_ERR:
		wc->status = IB_WC_BAD_RESP_ERR;
		break;
	case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR:
		wc->status = IB_WC_LOC_ACCESS_ERR;
		break;
	case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR:
		wc->status = IB_WC_REM_INV_REQ_ERR;
		break;
	case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR:
		wc->status = IB_WC_REM_ACCESS_ERR;
		break;
	case HNS_ROCE_CQE_V2_REMOTE_OP_ERR:
		wc->status = IB_WC_REM_OP_ERR;
		break;
	case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR:
		wc->status = IB_WC_RETRY_EXC_ERR;
		break;
	case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR:
		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
		break;
	case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR:
		wc->status = IB_WC_REM_ABORT_ERR;
		break;
	default:
		wc->status = IB_WC_GENERAL_ERR;
		break;
	}

	/* CQE status error, directly return */
	if (wc->status != IB_WC_SUCCESS)
		return 0;

	if (is_send) {
		wc->wc_flags = 0;
		/* SQ corresponding to CQE */
		switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
				       V2_CQE_BYTE_4_OPCODE_S) & 0x1f) {
		case HNS_ROCE_SQ_OPCODE_SEND:
			wc->opcode = IB_WC_SEND;
			break;
		case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV:
			wc->opcode = IB_WC_SEND;
			break;
		case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM:
			wc->opcode = IB_WC_SEND;
			wc->wc_flags |= IB_WC_WITH_IMM;
			break;
		case HNS_ROCE_SQ_OPCODE_RDMA_READ:
			wc->opcode = IB_WC_RDMA_READ;
			wc->byte_len = le32_to_cpu(cqe->byte_cnt);
			break;
		case HNS_ROCE_SQ_OPCODE_RDMA_WRITE:
			wc->opcode = IB_WC_RDMA_WRITE;
			break;
		case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM:
			wc->opcode = IB_WC_RDMA_WRITE;
			wc->wc_flags |= IB_WC_WITH_IMM;
			break;
		case HNS_ROCE_SQ_OPCODE_LOCAL_INV:
			wc->opcode = IB_WC_LOCAL_INV;
			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
			break;
		case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
			wc->opcode = IB_WC_COMP_SWAP;
			wc->byte_len  = 8;
			break;
		case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
			wc->opcode = IB_WC_FETCH_ADD;
			wc->byte_len  = 8;
			break;
		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
			wc->opcode = IB_WC_MASKED_COMP_SWAP;
			wc->byte_len  = 8;
			break;
		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
			wc->opcode = IB_WC_MASKED_FETCH_ADD;
			wc->byte_len  = 8;
			break;
		case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
			wc->opcode = IB_WC_REG_MR;
			break;
		case HNS_ROCE_SQ_OPCODE_BIND_MW:
			wc->opcode = IB_WC_REG_MR;
			break;
		default:
			wc->status = IB_WC_GENERAL_ERR;
			break;
		}

		wq = &(*cur_qp)->sq;
		if ((*cur_qp)->sq_signal_bits) {
			/*
			 * If sg_signal_bit is 1,
			 * firstly tail pointer updated to wqe
			 * which current cqe correspond to
			 */
			wqe_ctr = (u16)roce_get_field(cqe->byte_4,
						      V2_CQE_BYTE_4_WQE_INDX_M,
						      V2_CQE_BYTE_4_WQE_INDX_S);
			wq->tail += (wqe_ctr - (u16)wq->tail) &
				    (wq->wqe_cnt - 1);
		}

		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else {
		/* RQ correspond to CQE */
		wc->byte_len = le32_to_cpu(cqe->byte_cnt);

		opcode = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
					V2_CQE_BYTE_4_OPCODE_S);
		switch (opcode & 0x1f) {
		case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
			wc->wc_flags = IB_WC_WITH_IMM;
			wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata);
			break;
		case HNS_ROCE_V2_OPCODE_SEND:
			wc->opcode = IB_WC_RECV;
			wc->wc_flags = 0;
			break;
		case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
			wc->opcode = IB_WC_RECV;
			wc->wc_flags = IB_WC_WITH_IMM;
			wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata);
			break;
		case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
			wc->opcode = IB_WC_RECV;
			wc->wc_flags = IB_WC_WITH_INVALIDATE;
			wc->ex.invalidate_rkey = cqe->rkey_immtdata;
			break;
		default:
			wc->status = IB_WC_GENERAL_ERR;
			break;
		}

		/* Update tail pointer, record wr_id */
		wq = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;

		wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
					    V2_CQE_BYTE_32_SL_S);
		wc->src_qp = (u8)roce_get_field(cqe->byte_32,
						V2_CQE_BYTE_32_RMT_QPN_M,
						V2_CQE_BYTE_32_RMT_QPN_S);
		wc->wc_flags |= (roce_get_bit(cqe->byte_32,
					      V2_CQE_BYTE_32_GRH_S) ?
					      IB_WC_GRH : 0);
	}

	return 0;
}

static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
			       struct ib_wc *wc)
{
	struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
	struct hns_roce_qp *cur_qp = NULL;
	unsigned long flags;
	int npolled;

	spin_lock_irqsave(&hr_cq->lock, flags);

	for (npolled = 0; npolled < num_entries; ++npolled) {
		if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
			break;
	}

	if (npolled) {
		/* Memory barrier */
		wmb();
		hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
	}

	spin_unlock_irqrestore(&hr_cq->lock, flags);

	return npolled;
}

static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
			       struct hns_roce_hem_table *table, int obj,
			       int step_idx)
@@ -906,8 +1279,11 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
	.chk_mbox = hns_roce_v2_chk_mbox,
	.set_gid = hns_roce_v2_set_gid,
	.set_mac = hns_roce_v2_set_mac,
	.write_cqc = hns_roce_v2_write_cqc,
	.set_hem = hns_roce_v2_set_hem,
	.clear_hem = hns_roce_v2_clear_hem,
	.req_notify_cq = hns_roce_v2_req_notify_cq,
	.poll_cq = hns_roce_v2_poll_cq,
};

static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
+222 −0
Original line number Diff line number Diff line
@@ -100,6 +100,61 @@
	(step_idx == 1 && hop_num == 1) || \
	(step_idx == 2 && hop_num == 2))

#define V2_CQ_DB_REQ_NOT_SOL			0
#define V2_CQ_DB_REQ_NOT			1

#define V2_CQ_STATE_VALID			1

#define HNS_ROCE_V2_CQE_QPN_MASK		0x3ffff

enum {
	HNS_ROCE_SQ_OPCODE_SEND = 0x0,
	HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1,
	HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2,
	HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3,
	HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4,
	HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5,
	HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6,
	HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7,
	HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
	HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
	HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa,
	HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb,
	HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc,
};

enum {
	/* rq operations */
	HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
	HNS_ROCE_V2_OPCODE_SEND = 0x1,
	HNS_ROCE_V2_OPCODE_SEND_WITH_IMM = 0x2,
	HNS_ROCE_V2_OPCODE_SEND_WITH_INV = 0x3,
};

enum {
	HNS_ROCE_V2_CQ_DB_PTR	= 0x3,
	HNS_ROCE_V2_CQ_DB_NTR	= 0x4,
};

enum {
	HNS_ROCE_CQE_V2_SUCCESS				= 0x00,
	HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR		= 0x01,
	HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR			= 0x02,
	HNS_ROCE_CQE_V2_LOCAL_PROT_ERR			= 0x04,
	HNS_ROCE_CQE_V2_WR_FLUSH_ERR			= 0x05,
	HNS_ROCE_CQE_V2_MW_BIND_ERR			= 0x06,
	HNS_ROCE_CQE_V2_BAD_RESP_ERR			= 0x10,
	HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR		= 0x11,
	HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR		= 0x12,
	HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR		= 0x13,
	HNS_ROCE_CQE_V2_REMOTE_OP_ERR			= 0x14,
	HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR		= 0x15,
	HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR		= 0x16,
	HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR		= 0x22,

	HNS_ROCE_V2_CQE_STATUS_MASK			= 0xff,
};

/* CMQ command */
enum hns_roce_opcode_type {
	HNS_ROCE_OPC_QUERY_HW_VER			= 0x8000,
@@ -122,6 +177,173 @@ enum hns_roce_cmd_return_status {
	CMD_QUEUE_FULL		= 3,
};

struct hns_roce_v2_cq_context {
	u32	byte_4_pg_ceqn;
	u32	byte_8_cqn;
	u32	cqe_cur_blk_addr;
	u32	byte_16_hop_addr;
	u32	cqe_nxt_blk_addr;
	u32	byte_24_pgsz_addr;
	u32	byte_28_cq_pi;
	u32	byte_32_cq_ci;
	u32	cqe_ba;
	u32	byte_40_cqe_ba;
	u32	byte_44_db_record;
	u32	db_record_addr;
	u32	byte_52_cqe_cnt;
	u32	byte_56_cqe_period_maxcnt;
	u32	cqe_report_timer;
	u32	byte_64_se_cqe_idx;
};
#define	V2_CQC_BYTE_4_CQ_ST_S 0
#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0)

#define	V2_CQC_BYTE_4_POLL_S 2

#define	V2_CQC_BYTE_4_SE_S 3

#define	V2_CQC_BYTE_4_OVER_IGNORE_S 4

#define	V2_CQC_BYTE_4_COALESCE_S 5

#define	V2_CQC_BYTE_4_ARM_ST_S 6
#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6)

#define	V2_CQC_BYTE_4_SHIFT_S 8
#define V2_CQC_BYTE_4_SHIFT_M GENMASK(12, 8)

#define	V2_CQC_BYTE_4_CMD_SN_S 13
#define V2_CQC_BYTE_4_CMD_SN_M GENMASK(14, 13)

#define	V2_CQC_BYTE_4_CEQN_S 15
#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15)

#define	V2_CQC_BYTE_4_PAGE_OFFSET_S 24
#define V2_CQC_BYTE_4_PAGE_OFFSET_M GENMASK(31, 24)

#define	V2_CQC_BYTE_8_CQN_S 0
#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)

#define	V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)

#define	V2_CQC_BYTE_16_CQE_HOP_NUM_S 30
#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30)

#define	V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S 0
#define V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M GENMASK(19, 0)

#define	V2_CQC_BYTE_24_CQE_BA_PG_SZ_S 24
#define V2_CQC_BYTE_24_CQE_BA_PG_SZ_M GENMASK(27, 24)

#define	V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S 28
#define V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M GENMASK(31, 28)

#define	V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0
#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0)

#define	V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0
#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0)

#define	V2_CQC_BYTE_40_CQE_BA_S 0
#define V2_CQC_BYTE_40_CQE_BA_M GENMASK(28, 0)

#define	V2_CQC_BYTE_44_DB_RECORD_EN_S 0

#define	V2_CQC_BYTE_52_CQE_CNT_S 0
#define	V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)

#define	V2_CQC_BYTE_56_CQ_MAX_CNT_S 0
#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0)

#define	V2_CQC_BYTE_56_CQ_PERIOD_S 16
#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16)

#define	V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define	V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)

struct hns_roce_v2_cqe {
	u32	byte_4;
	u32	rkey_immtdata;
	u32	byte_12;
	u32	byte_16;
	u32	byte_cnt;
	u32	smac;
	u32	byte_28;
	u32	byte_32;
};

#define	V2_CQE_BYTE_4_OPCODE_S 0
#define V2_CQE_BYTE_4_OPCODE_M GENMASK(4, 0)

#define	V2_CQE_BYTE_4_RQ_INLINE_S 5

#define	V2_CQE_BYTE_4_S_R_S 6

#define	V2_CQE_BYTE_4_OWNER_S 7

#define	V2_CQE_BYTE_4_STATUS_S 8
#define V2_CQE_BYTE_4_STATUS_M GENMASK(15, 8)

#define	V2_CQE_BYTE_4_WQE_INDX_S 16
#define V2_CQE_BYTE_4_WQE_INDX_M GENMASK(31, 16)

#define	V2_CQE_BYTE_12_XRC_SRQN_S 0
#define V2_CQE_BYTE_12_XRC_SRQN_M GENMASK(23, 0)

#define	V2_CQE_BYTE_16_LCL_QPN_S 0
#define V2_CQE_BYTE_16_LCL_QPN_M GENMASK(23, 0)

#define	V2_CQE_BYTE_16_SUB_STATUS_S 24
#define V2_CQE_BYTE_16_SUB_STATUS_M GENMASK(31, 24)

#define	V2_CQE_BYTE_28_SMAC_4_S 0
#define V2_CQE_BYTE_28_SMAC_4_M	GENMASK(7, 0)

#define	V2_CQE_BYTE_28_SMAC_5_S 8
#define V2_CQE_BYTE_28_SMAC_5_M	GENMASK(15, 8)

#define	V2_CQE_BYTE_28_PORT_TYPE_S 16
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)

#define	V2_CQE_BYTE_32_RMT_QPN_S 0
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)

#define	V2_CQE_BYTE_32_SL_S 24
#define V2_CQE_BYTE_32_SL_M GENMASK(26, 24)

#define	V2_CQE_BYTE_32_PORTN_S 27
#define V2_CQE_BYTE_32_PORTN_M GENMASK(29, 27)

#define	V2_CQE_BYTE_32_GRH_S 30

#define	V2_CQE_BYTE_32_LPK_S 31

#define	V2_DB_BYTE_4_TAG_S 0
#define V2_DB_BYTE_4_TAG_M GENMASK(23, 0)

#define	V2_DB_BYTE_4_CMD_S 24
#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)

struct hns_roce_v2_cq_db {
	u32	byte_4;
	u32	parameter;
};

#define	V2_CQ_DB_BYTE_4_TAG_S 0
#define V2_CQ_DB_BYTE_4_TAG_M GENMASK(23, 0)

#define	V2_CQ_DB_BYTE_4_CMD_S 24
#define V2_CQ_DB_BYTE_4_CMD_M GENMASK(27, 24)

#define V2_CQ_DB_PARAMETER_CONS_IDX_S 0
#define V2_CQ_DB_PARAMETER_CONS_IDX_M GENMASK(23, 0)

#define V2_CQ_DB_PARAMETER_CMD_SN_S 25
#define V2_CQ_DB_PARAMETER_CMD_SN_M GENMASK(26, 25)

#define V2_CQ_DB_PARAMETER_NOTIFY_S 24

struct hns_roce_query_version {
	__le16 rocee_vendor_id;
	__le16 rocee_hw_version;