Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 989a1780 authored by Steve Wise's avatar Steve Wise Committed by Roland Dreier
Browse files

RDMA/cxgb3: Correctly serialize peer abort path



Open MPI and other stress testing exposed a few bad bugs in handling
aborts in the middle of a normal close.  Fix these by:

 - serializing abort reply and peer abort processing with disconnect
   processing

 - warning (and ignoring) if ep timer is stopped when it wasn't running

 - cleaning up disconnect path to correctly deal with aborting and
   dead endpoints

 - in iwch_modify_qp(), taking a ref on the ep before releasing the qp
   lock if iwch_ep_disconnect() will be called.  The ref is dropped
   after calling disconnect.

Signed-off-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent e463c7b1
Loading
Loading
Loading
Loading
+66 −34
Original line number Original line Diff line number Diff line
@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
{
	PDBG("%s ep %p\n", __func__, ep);
	PDBG("%s ep %p\n", __func__, ep);
	if (!timer_pending(&ep->timer)) {
		printk(KERN_ERR "%s timer stopped when its not running!  ep %p state %u\n",
			__func__, ep, ep->com.state);
		WARN_ON(1);
		return;
	}
	del_timer_sync(&ep->timer);
	del_timer_sync(&ep->timer);
	put_ep(&ep->com);
	put_ep(&ep->com);
}
}
@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
{
	struct iwch_ep *ep = ctx;
	struct iwch_ep *ep = ctx;
	unsigned long flags;
	int release = 0;


	PDBG("%s ep %p\n", __func__, ep);
	PDBG("%s ep %p\n", __func__, ep);
	BUG_ON(!ep);


	/*
	/*
	 * We get 2 abort replies from the HW.  The first one must
	 * We get 2 abort replies from the HW.  The first one must
@@ -1095,8 +1104,21 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
		return CPL_RET_BUF_DONE;
		return CPL_RET_BUF_DONE;
	}
	}


	spin_lock_irqsave(&ep->com.lock, flags);
	switch (ep->com.state) {
	case ABORTING:
		close_complete_upcall(ep);
		close_complete_upcall(ep);
	state_set(&ep->com, DEAD);
		__state_set(&ep->com, DEAD);
		release = 1;
		break;
	default:
		printk(KERN_ERR "%s ep %p state %d\n",
		     __func__, ep, ep->com.state);
		break;
	}
	spin_unlock_irqrestore(&ep->com.lock, flags);

	if (release)
		release_ep_resources(ep);
		release_ep_resources(ep);
	return CPL_RET_BUF_DONE;
	return CPL_RET_BUF_DONE;
}
}
@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
	struct sk_buff *rpl_skb;
	struct sk_buff *rpl_skb;
	struct iwch_qp_attributes attrs;
	struct iwch_qp_attributes attrs;
	int ret;
	int ret;
	int state;
	int release = 0;
	unsigned long flags;


	if (is_neg_adv_abort(req->status)) {
	if (is_neg_adv_abort(req->status)) {
		PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
		PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
		return CPL_RET_BUF_DONE;
		return CPL_RET_BUF_DONE;
	}
	}


	state = state_read(&ep->com);
	spin_lock_irqsave(&ep->com.lock, flags);
	PDBG("%s ep %p state %u\n", __func__, ep, state);
	PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
	switch (state) {
	switch (ep->com.state) {
	case CONNECTING:
	case CONNECTING:
		break;
		break;
	case MPA_REQ_WAIT:
	case MPA_REQ_WAIT:
@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
		break;
		break;
	case DEAD:
	case DEAD:
		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
		spin_unlock_irqrestore(&ep->com.lock, flags);
		return CPL_RET_BUF_DONE;
		return CPL_RET_BUF_DONE;
	default:
	default:
		BUG_ON(1);
		BUG_ON(1);
		break;
		break;
	}
	}
	dst_confirm(ep->dst);
	dst_confirm(ep->dst);
	if (ep->com.state != ABORTING) {
		__state_set(&ep->com, DEAD);
		release = 1;
	}
	spin_unlock_irqrestore(&ep->com.lock, flags);


	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
	if (!rpl_skb) {
	if (!rpl_skb) {
		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
		       __func__);
		       __func__);
		dst_release(ep->dst);
		release = 1;
		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
		goto out;
		put_ep(&ep->com);
		return CPL_RET_BUF_DONE;
	}
	}
	rpl_skb->priority = CPL_PRIORITY_DATA;
	rpl_skb->priority = CPL_PRIORITY_DATA;
	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
	rpl->cmd = CPL_ABORT_NO_RST;
	rpl->cmd = CPL_ABORT_NO_RST;
	cxgb3_ofld_send(ep->com.tdev, rpl_skb);
	cxgb3_ofld_send(ep->com.tdev, rpl_skb);
	if (state != ABORTING) {
out:
		state_set(&ep->com, DEAD);
	if (release)
		release_ep_resources(ep);
		release_ep_resources(ep);
	}
	return CPL_RET_BUF_DONE;
	return CPL_RET_BUF_DONE;
}
}


@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg)
	struct iwch_ep *ep = (struct iwch_ep *)arg;
	struct iwch_ep *ep = (struct iwch_ep *)arg;
	struct iwch_qp_attributes attrs;
	struct iwch_qp_attributes attrs;
	unsigned long flags;
	unsigned long flags;
	int abort = 1;


	spin_lock_irqsave(&ep->com.lock, flags);
	spin_lock_irqsave(&ep->com.lock, flags);
	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
	     ep->com.state);
	     ep->com.state);
	switch (ep->com.state) {
	switch (ep->com.state) {
	case MPA_REQ_SENT:
	case MPA_REQ_SENT:
		__state_set(&ep->com, ABORTING);
		connect_reply_upcall(ep, -ETIMEDOUT);
		connect_reply_upcall(ep, -ETIMEDOUT);
		break;
		break;
	case MPA_REQ_WAIT:
	case MPA_REQ_WAIT:
		__state_set(&ep->com, ABORTING);
		break;
		break;
	case CLOSING:
	case CLOSING:
	case MORIBUND:
	case MORIBUND:
@@ -1679,12 +1708,16 @@ static void ep_timeout(unsigned long arg)
				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
				     &attrs, 1);
				     &attrs, 1);
		}
		}
		__state_set(&ep->com, ABORTING);
		break;
		break;
	default:
	default:
		BUG();
		printk(KERN_ERR "%s unexpected state ep %p state %u\n",
			__func__, ep, ep->com.state);
		WARN_ON(1);
		abort = 0;
	}
	}
	__state_set(&ep->com, CLOSING);
	spin_unlock_irqrestore(&ep->com.lock, flags);
	spin_unlock_irqrestore(&ep->com.lock, flags);
	if (abort)
		abort_connection(ep, NULL, GFP_ATOMIC);
		abort_connection(ep, NULL, GFP_ATOMIC);
	put_ep(&ep->com);
	put_ep(&ep->com);
}
}
@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
	     states[ep->com.state], abrupt);
	     states[ep->com.state], abrupt);


	if (ep->com.state == DEAD) {
		PDBG("%s already dead ep %p\n", __func__, ep);
		goto out;
	}

	if (abrupt) {
		if (ep->com.state != ABORTING) {
			ep->com.state = ABORTING;
			close = 1;
		}
		goto out;
	}

	switch (ep->com.state) {
	switch (ep->com.state) {
	case MPA_REQ_WAIT:
	case MPA_REQ_WAIT:
	case MPA_REQ_SENT:
	case MPA_REQ_SENT:
	case MPA_REQ_RCVD:
	case MPA_REQ_RCVD:
	case MPA_REP_SENT:
	case MPA_REP_SENT:
	case FPDU_MODE:
	case FPDU_MODE:
		start_ep_timer(ep);
		ep->com.state = CLOSING;
		close = 1;
		close = 1;
		if (abrupt)
			ep->com.state = ABORTING;
		else {
			ep->com.state = CLOSING;
			start_ep_timer(ep);
		}
		break;
		break;
	case CLOSING:
	case CLOSING:
		ep->com.state = MORIBUND;
		close = 1;
		close = 1;
		if (abrupt) {
			stop_ep_timer(ep);
			ep->com.state = ABORTING;
		} else
			ep->com.state = MORIBUND;
		break;
		break;
	case MORIBUND:
	case MORIBUND:
	case ABORTING:
	case DEAD:
		PDBG("%s ignoring disconnect ep %p state %u\n",
		     __func__, ep, ep->com.state);
		break;
		break;
	default:
	default:
		BUG();
		BUG();
		break;
		break;
	}
	}
out:

	spin_unlock_irqrestore(&ep->com.lock, flags);
	spin_unlock_irqrestore(&ep->com.lock, flags);
	if (close) {
	if (close) {
		if (abrupt)
		if (abrupt)
+1 −0
Original line number Original line Diff line number Diff line
@@ -56,6 +56,7 @@
#define put_ep(ep) { \
#define put_ep(ep) { \
	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
	     ep, atomic_read(&((ep)->kref.refcount))); \
	     ep, atomic_read(&((ep)->kref.refcount))); \
	WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
	kref_put(&((ep)->kref), __free_ep); \
	kref_put(&((ep)->kref), __free_ep); \
}
}


+5 −1
Original line number Original line Diff line number Diff line
@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
				abort=0;
				abort=0;
				disconnect = 1;
				disconnect = 1;
				ep = qhp->ep;
				ep = qhp->ep;
				get_ep(&ep->com);
			}
			}
			flush_qp(qhp, &flag);
			flush_qp(qhp, &flag);
			break;
			break;
@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
				abort=1;
				abort=1;
				disconnect = 1;
				disconnect = 1;
				ep = qhp->ep;
				ep = qhp->ep;
				get_ep(&ep->com);
			}
			}
			goto err;
			goto err;
			break;
			break;
@@ -929,8 +931,10 @@ out:
	 * on the EP.  This can be a normal close (RTS->CLOSING) or
	 * on the EP.  This can be a normal close (RTS->CLOSING) or
	 * an abnormal close (RTS/CLOSING->ERROR).
	 * an abnormal close (RTS/CLOSING->ERROR).
	 */
	 */
	if (disconnect)
	if (disconnect) {
		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
		put_ep(&ep->com);
	}


	/*
	/*
	 * If free is 1, then we've disassociated the EP from the QP
	 * If free is 1, then we've disassociated the EP from the QP