Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6492cdf3 authored by Faisal Latif's avatar Faisal Latif Committed by Roland Dreier
Browse files

RDMA/nes: CM connection setup/teardown rework



Major rework of CM connection setup/teardown.  We had a number of issues
with MPI applications not starting/terminating properly over time.
With these changes we were able to run longer on larger clusters.

* Remove memory allocation from nes_connect() and nes_cm_connect().
* Fix mini_cm_dec_refcnt_listen() when destroying listener.
* Remove unnecessary code from schedule_nes_timer() and nes_cm_timer_tick().
* Functionalize mini_cm_recv_pkt() and process_packet().
* Clean up cm_node->ref_count usage.
* Reuse skbs if available.

Signed-off-by: default avatarFaisal Latif <flatif@neteffect.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent fb2e405f
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -276,6 +276,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r
	}
	nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);

	nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
	kfree(nesqp->allocated_buffer);

}
@@ -289,7 +290,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
	struct nes_qp *nesqp;
	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
	struct nes_device *nesdev = nesvnic->nesdev;
	struct nes_adapter *nesadapter = nesdev->nesadapter;
	struct nes_hw_cqp_wqe *cqp_wqe;
	struct nes_cqp_request *cqp_request;
	u32 opcode;
@@ -303,8 +303,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
	}

	if (atomic_dec_and_test(&nesqp->refcount)) {
		nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;

		/* Destroy the QP */
		cqp_request = nes_get_cqp_request(nesdev);
		if (cqp_request == NULL) {
+1135 −899

File changed.

Preview size limit exceeded, changes collapsed.

+20 −3
Original line number Diff line number Diff line
@@ -83,6 +83,8 @@ enum nes_timer_type {
#define SET_FIN 4
#define SET_RST 8

#define TCP_OPTIONS_PADDING	3

struct option_base {
	u8 optionnum;
	u8 length;
@@ -177,6 +179,7 @@ enum nes_cm_node_state {
	NES_CM_STATE_ESTABLISHED,
	NES_CM_STATE_ACCEPTING,
	NES_CM_STATE_MPAREQ_SENT,
	NES_CM_STATE_MPAREQ_RCVD,
	NES_CM_STATE_TSA,
	NES_CM_STATE_FIN_WAIT1,
	NES_CM_STATE_FIN_WAIT2,
@@ -187,6 +190,16 @@ enum nes_cm_node_state {
	NES_CM_STATE_CLOSED
};

enum nes_tcpip_pkt_type {
	NES_PKT_TYPE_UNKNOWN,
	NES_PKT_TYPE_SYN,
	NES_PKT_TYPE_SYNACK,
	NES_PKT_TYPE_ACK,
	NES_PKT_TYPE_FIN,
	NES_PKT_TYPE_RST
};


/* type of nes connection */
enum nes_cm_conn_type {
	NES_CM_IWARP_CONN_TYPE,
@@ -257,7 +270,9 @@ struct nes_cm_node {
	struct net_device         *netdev;

	struct nes_cm_node        *loopbackpartner;
	struct list_head          retrans_list;

	struct nes_timer_entry	*send_entry;

	spinlock_t                retrans_list_lock;
	struct list_head          recv_list;
	spinlock_t                recv_list_lock;
@@ -276,6 +291,8 @@ struct nes_cm_node {
	struct nes_vnic           *nesvnic;
	int                       apbvt_set;
	int                       accept_pend;
	int			freed;
	struct nes_qp		*nesqp;
};

/* structure for client or CM to fill when making CM api calls. */
@@ -366,14 +383,14 @@ struct nes_cm_ops {
			struct nes_cm_info *);
	int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
	struct nes_cm_node * (*connect)(struct nes_cm_core *,
			struct nes_vnic *, struct ietf_mpa_frame *,
			struct nes_vnic *, u16, void *,
			struct nes_cm_info *);
	int (*close)(struct nes_cm_core *, struct nes_cm_node *);
	int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
			struct nes_cm_node *);
	int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
			struct nes_cm_node *);
	int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
	void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
			struct sk_buff *);
	int (*destroy_cm_core)(struct nes_cm_core *);
	int (*get)(struct nes_cm_core *);
+0 −9
Original line number Diff line number Diff line
@@ -2814,7 +2814,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
			nesqp = *((struct nes_qp **)&context);
			if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
				nesqp->cm_id->add_ref(nesqp->cm_id);
				nes_add_ref(&nesqp->ibqp);
				schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
						NES_TIMER_TYPE_CLOSE, 1, 0);
				nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
@@ -2838,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
			if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
				tcp_state = NES_AEQE_TCP_STATE_CLOSED;
			}
			nes_add_ref(&nesqp->ibqp);
			spin_lock_irqsave(&nesqp->lock, flags);
			nesqp->hw_iwarp_state = iwarp_state;
			nesqp->hw_tcp_state = tcp_state;
@@ -2876,7 +2874,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
				}
				spin_unlock_irqrestore(&nesqp->lock, flags);
				if (next_iwarp_state) {
					nes_add_ref(&nesqp->ibqp);
					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
							" also added another reference\n",
							nesqp->hwqp.qp_id, next_iwarp_state);
@@ -2888,7 +2885,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
					/* FIN Received but ib state not RTS,
							close complete will be on its way */
					spin_unlock_irqrestore(&nesqp->lock, flags);
					nes_rem_ref(&nesqp->ibqp);
					return;
				}
				spin_unlock_irqrestore(&nesqp->lock, flags);
@@ -2922,7 +2918,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
					((nesqp->ibqp_state == IB_QPS_RTS)&&
					(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
				nes_add_ref(&nesqp->ibqp);
				nes_cm_disconn(nesqp);
			} else {
				nesqp->in_disconnect = 0;
@@ -2931,7 +2926,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
			break;
		case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
			nesqp = *((struct nes_qp **)&context);
			nes_add_ref(&nesqp->ibqp);
			spin_lock_irqsave(&nesqp->lock, flags);
			nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
			nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
@@ -3042,7 +3036,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
			}
			/* tell cm to disconnect, cm will queue work to thread */
			nes_add_ref(&nesqp->ibqp);
			nes_cm_disconn(nesqp);
			break;
		case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
@@ -3062,7 +3055,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
			}
			/* tell cm to disconnect, cm will queue work to thread */
			nes_add_ref(&nesqp->ibqp);
			nes_cm_disconn(nesqp);
			break;
		case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
@@ -3082,7 +3074,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
			}
			/* tell cm to disconnect, cm will queue work to thread */
			nes_add_ref(&nesqp->ibqp);
			nes_cm_disconn(nesqp);
			break;
			/* TODO: additional AEs need to be here */
+0 −15
Original line number Diff line number Diff line
@@ -2867,7 +2867,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
			nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
			nesqp->iwarp_state, atomic_read(&nesqp->refcount));

	nes_add_ref(&nesqp->ibqp);
	spin_lock_irqsave(&nesqp->lock, qplockflags);

	nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
@@ -2882,7 +2881,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
						nesqp->hwqp.qp_id);
				if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -2893,7 +2891,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
						nesqp->hwqp.qp_id);
				if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -2904,14 +2901,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
						nesqp->hwqp.qp_id);
				if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				if (nesqp->cm_id == NULL) {
					nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
							nesqp->hwqp.qp_id );
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
@@ -2929,7 +2924,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
						nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
				if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return 0;
				} else {
					if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
@@ -2937,7 +2931,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
								" ignored due to current iWARP state\n",
								nesqp->hwqp.qp_id);
						spin_unlock_irqrestore(&nesqp->lock, qplockflags);
						nes_rem_ref(&nesqp->ibqp);
						return -EINVAL;
					}
					if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
@@ -2969,7 +2962,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
						nesqp->hwqp.qp_id);
				if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
@@ -2982,7 +2974,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
			case IB_QPS_RESET:
				if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
					nes_rem_ref(&nesqp->ibqp);
					return -EINVAL;
				}
				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
@@ -3008,7 +2999,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
				break;
			default:
				spin_unlock_irqrestore(&nesqp->lock, qplockflags);
				nes_rem_ref(&nesqp->ibqp);
				return -EINVAL;
				break;
		}
@@ -3088,7 +3078,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
							nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
							original_last_aeq, nesqp->last_aeq);
					/* this one is for the cm_disconnect thread */
					nes_add_ref(&nesqp->ibqp);
					spin_lock_irqsave(&nesqp->lock, qplockflags);
					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
					nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
@@ -3097,14 +3086,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
				} else {
					nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
							nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
					nes_rem_ref(&nesqp->ibqp);
				}
			} else {
				spin_lock_irqsave(&nesqp->lock, qplockflags);
				if (nesqp->cm_id) {
					/* These two are for the timer thread */
					if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
						nes_add_ref(&nesqp->ibqp);
						nesqp->cm_id->add_ref(nesqp->cm_id);
						nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
								" need ae to finish up, original_last_aeq = 0x%04X."
@@ -3128,14 +3115,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
					" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
					nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
					original_last_aeq, nesqp->last_aeq);
			nes_rem_ref(&nesqp->ibqp);
		}
	} else {
		nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
				" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
				nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
				original_last_aeq, nesqp->last_aeq);
		nes_rem_ref(&nesqp->ibqp);
	}

	err = 0;