Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3b1ea430 authored by Devesh Sharma's avatar Devesh Sharma Committed by Doug Ledford
Browse files

RDMA/ocrdma: Depend on async link events from CNA



Recently Dough Ledford reported a deadlock happening
between ocrdma-load sequence and NetworkManager service
issuing "open" on be2net interface.

The deadlock happens when any be2net hook (e.g. open/close) is called
in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
    takes rtnl_lock in ib_register_device() in GID initialization routines.
    So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

With this patch we stop using administrative open and close events
injected by be2net driver. These events were used to dispatch PORT_ACTIVE
and PORT_ERROR events to the IB-stack. This patch implements a logic
to receive async-link-events generated from CNA whenever link-state-change
is detected. Now on, these async-events will be used to dispatch
PORT_ACTIVE and PORT_ERROR events to IB-stack.

Depending on async-events from CNA removes the need to hold device-list-mutex
and thus breaks the busy-wait scenario.

Reported-by: default avatarDoug Ledford <dledford@redhat.com>
CC: Sathya Perla <sathya.perla@avagotech.com>
Signed-off-by: default avatarPadmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
Signed-off-by: default avatarSelvin Xavier <selvin.xavier@avagotech.com>
Signed-off-by: default avatarDevesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent d310a344
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -232,6 +232,10 @@ struct phy_info {
	u16 interface_type;
};

enum ocrdma_flags {
	OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
};

struct ocrdma_dev {
	struct ib_device ibdev;
	struct ocrdma_dev_attr attr;
@@ -287,6 +291,7 @@ struct ocrdma_dev {
	atomic_t update_sl;
	u16 pvid;
	u32 asic_id;
	u32 flags;

	ulong last_stats_time;
	struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
		(state & OCRDMA_STATE_FLAG_SYNC);
}

static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
{
	return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
}

#endif
+35 −7
Original line number Diff line number Diff line
@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,

	cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
	cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
	/* Request link events on this  MQ. */
	cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);

	cmd->async_cqid_ringsize = cq->id;
	cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,21 +821,43 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
	}
}

static void ocrdma_process_link_state(struct ocrdma_dev *dev,
				      struct ocrdma_ae_mcqe *cqe)
{
	struct ocrdma_ae_lnkst_mcqe *evt;
	u8 lstate;

	evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
	lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);

	if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
		return;

	if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
		ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
}

static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
	/* async CQE processing */
	struct ocrdma_ae_mcqe *cqe = ae_cqe;
	u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
			OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;

	if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
	switch (evt_code) {
	case OCRDMA_ASYNC_LINK_EVE_CODE:
		ocrdma_process_link_state(dev, cqe);
		break;
	case OCRDMA_ASYNC_RDMA_EVE_CODE:
		ocrdma_dispatch_ibevent(dev, cqe);
	else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
		break;
	case OCRDMA_ASYNC_GRP5_EVE_CODE:
		ocrdma_process_grp5_aync(dev, cqe);
	else
		break;
	default:
		pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
		       dev->id, evt_code);
	}
}

static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
{
@@ -1363,7 +1387,8 @@ static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
	return status;
}

int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
			      u8 *lnk_state)
{
	int status = -ENOMEM;
	struct ocrdma_get_link_speed_rsp *rsp;
@@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
		goto mbx_err;

	rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
	if (lnk_speed)
		*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
			      >> OCRDMA_PHY_PS_SHIFT;
	if (lnk_state)
		*lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);

mbx_err:
	kfree(cmd);
+3 −1
Original line number Diff line number Diff line
@@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
		       bool solicited, u16 cqe_popped);

/* verbs specific mailbox commands */
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
			      u8 *lnk_st);
int ocrdma_query_config(struct ocrdma_dev *,
			struct ocrdma_mbx_query_config *config);

@@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev);
void ocrdma_init_service_level(struct ocrdma_dev *);
void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev);
void ocrdma_free_pd_range(struct ocrdma_dev *dev);
void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate);

#endif				/* __OCRDMA_HW_H__ */
+25 −9
Original line number Diff line number Diff line
@@ -289,6 +289,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
	int status = 0, i;
	u8 lstate = 0;
	struct ocrdma_dev *dev;

	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -318,6 +319,11 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
	if (status)
		goto alloc_err;

	/* Query Link state and update */
	status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate);
	if (!status)
		ocrdma_update_link_state(dev, lstate);

	for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
		if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
			goto sysfs_err;
@@ -372,7 +378,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
	ocrdma_remove_free(dev);
}

static int ocrdma_open(struct ocrdma_dev *dev)
static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev)
{
	struct ib_event port_event;

@@ -383,7 +389,7 @@ static int ocrdma_open(struct ocrdma_dev *dev)
	return 0;
}

static int ocrdma_close(struct ocrdma_dev *dev)
static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev)
{
	struct ib_event err_event;

@@ -396,7 +402,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)

static void ocrdma_shutdown(struct ocrdma_dev *dev)
{
	ocrdma_close(dev);
	ocrdma_dispatch_port_error(dev);
	ocrdma_remove(dev);
}

@@ -407,16 +413,26 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev)
static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
{
	switch (event) {
	case BE_DEV_UP:
		ocrdma_open(dev);
		break;
	case BE_DEV_DOWN:
		ocrdma_close(dev);
		break;
	case BE_DEV_SHUTDOWN:
		ocrdma_shutdown(dev);
		break;
	default:
		break;
	}
}

void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate)
{
	if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) {
		dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT;
		if (!lstate)
			return;
	}

	if (!lstate)
		ocrdma_dispatch_port_error(dev);
	else
		ocrdma_dispatch_port_active(dev);
}

static struct ocrdma_driver ocrdma_drv = {
+45 −4
Original line number Diff line number Diff line
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
	u32 valid_ae_event;
};

#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
enum ocrdma_async_event_code {
	OCRDMA_ASYNC_LINK_EVE_CODE	= 0x01,
	OCRDMA_ASYNC_GRP5_EVE_CODE	= 0x05,
	OCRDMA_ASYNC_RDMA_EVE_CODE	= 0x14
};

enum ocrdma_async_grp5_events {
	OCRDMA_ASYNC_EVENT_QOS_VALUE	= 0x01,
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
	OCRDMA_MAX_ASYNC_ERRORS
};

struct ocrdma_ae_lnkst_mcqe {
	u32 speed_state_ptn;
	u32 qos_reason_falut;
	u32 evt_tag;
	u32 valid_ae_event;
};

enum {
	OCRDMA_AE_LSC_PORT_NUM_MASK	= 0x3F,
	OCRDMA_AE_LSC_PT_SHIFT		= 0x06,
	OCRDMA_AE_LSC_PT_MASK		= (0x03 <<
			OCRDMA_AE_LSC_PT_SHIFT),
	OCRDMA_AE_LSC_LS_SHIFT		= 0x08,
	OCRDMA_AE_LSC_LS_MASK		= (0xFF <<
			OCRDMA_AE_LSC_LS_SHIFT),
	OCRDMA_AE_LSC_LD_SHIFT		= 0x10,
	OCRDMA_AE_LSC_LD_MASK		= (0xFF <<
			OCRDMA_AE_LSC_LD_SHIFT),
	OCRDMA_AE_LSC_PPS_SHIFT		= 0x18,
	OCRDMA_AE_LSC_PPS_MASK		= (0xFF <<
			OCRDMA_AE_LSC_PPS_SHIFT),
	OCRDMA_AE_LSC_PPF_MASK		= 0xFF,
	OCRDMA_AE_LSC_ER_SHIFT		= 0x08,
	OCRDMA_AE_LSC_ER_MASK		= (0xFF <<
			OCRDMA_AE_LSC_ER_SHIFT),
	OCRDMA_AE_LSC_QOS_SHIFT		= 0x10,
	OCRDMA_AE_LSC_QOS_MASK		= (0xFFFF <<
			OCRDMA_AE_LSC_QOS_SHIFT)
};

enum {
	OCRDMA_AE_LSC_PLINK_DOWN	= 0x00,
	OCRDMA_AE_LSC_PLINK_UP		= 0x01,
	OCRDMA_AE_LSC_LLINK_DOWN	= 0x02,
	OCRDMA_AE_LSC_LLINK_MASK	= 0x02,
	OCRDMA_AE_LSC_LLINK_UP		= 0x03
};

/* mailbox command request and responses */
enum {
	OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT		= 2,
@@ -676,7 +717,7 @@ enum {
	OCRDMA_PHY_PFLT_SHIFT	= 0x18,
	OCRDMA_QOS_LNKSP_MASK	= 0xFFFF0000,
	OCRDMA_QOS_LNKSP_SHIFT	= 0x10,
	OCRDMA_LLST_MASK	= 0xFF,
	OCRDMA_LINK_ST_MASK	= 0x01,
	OCRDMA_PLFC_MASK	= 0x00000400,
	OCRDMA_PLFC_SHIFT	= 0x8,
	OCRDMA_PLRFC_MASK	= 0x00000200,
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {

	u32 pflt_pps_ld_pnum;
	u32 qos_lsp;
	u32 res_lls;
	u32 res_lnk_st;
};

enum {
Loading