Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ba607808 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'smc-link-layer-control-enhancements'



Ursula Braun says:

====================
net/smc: Link Layer Control enhancements

here is a series of smc patches enabling SMC communication with peers
supporting more than one link per link group.

The first three patches are preparing code cleanups.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3c5aa0bc 9651b934
Loading
Loading
Loading
Loading
+60 −67
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@
 *  applicable with RoCE-cards only
 *
 *  Initial restrictions:
 *    - non-blocking connect postponed
 *    - IPv6 support postponed
 *    - support for alternate links postponed
 *    - partial support for non-blocking sockets only
@@ -24,7 +23,6 @@

#include <linux/module.h>
#include <linux/socket.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
@@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}

/* determine subnet and mask of internal TCP socket */
int smc_netinfo_by_tcpsk(struct socket *clcsock,
			 __be32 *subnet, u8 *prefix_len)
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
	struct in_device *in_dev;
	struct sockaddr_in addr;
	int rc = -ENOENT;

	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

	/* get address to which the internal TCP socket is bound */
	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
	/* analyze IPv4 specific data of net_device belonging to TCP socket */
	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dst->dev);
	for_ifa(in_dev) {
		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
			continue;
		*prefix_len = inet_mask_len(ifa->ifa_mask);
		*subnet = ifa->ifa_address & ifa->ifa_mask;
		rc = 0;
		break;
	} endfor_ifa(in_dev);
	rcu_read_unlock();

out_rel:
	dst_release(dst);
out:
	return rc;
}

static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
	struct smc_link_group *lgr = smc->conn.lgr;
	struct smc_link *link;
@@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
		return rc;
	}

	if (link->llc_confirm_rc)
		return SMC_CLC_DECL_RMBE_EC;

	rc = smc_ib_modify_qp_rts(link);
	if (rc)
		return SMC_CLC_DECL_INTERR;
@@ -346,13 +308,35 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
	/* send CONFIRM LINK response over RoCE fabric */
	rc = smc_llc_send_confirm_link(link,
				       link->smcibdev->mac[link->ibport - 1],
				       gid, SMC_LLC_RESP);
				       &link->smcibdev->gid[link->ibport - 1],
				       SMC_LLC_RESP);
	if (rc < 0)
		return SMC_CLC_DECL_TCL;

	/* receive ADD LINK request from server over RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
							 SMC_LLC_WAIT_TIME);
	if (rest <= 0) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE);
		return rc;
	}

	/* send add link reject message, only one link supported for now */
	rc = smc_llc_send_add_link(link,
				   link->smcibdev->mac[link->ibport - 1],
				   &link->smcibdev->gid[link->ibport - 1],
				   SMC_LLC_RESP);
	if (rc < 0)
		return SMC_CLC_DECL_TCL;

	link->state = SMC_LNK_ACTIVE;

	return 0;
}

static void smc_conn_save_peer_info(struct smc_sock *smc,
				    struct smc_clc_msg_accept_confirm *clc)
{
@@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
	link->peer_mtu = clc->qp_mtu;
}

static void smc_lgr_forget(struct smc_link_group *lgr)
{
	spin_lock_bh(&smc_lgr_list.lock);
	/* do not use this link group for new connections */
	if (!list_empty(&lgr->list))
		list_del_init(&lgr->list);
	spin_unlock_bh(&smc_lgr_list.lock);
}

/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
	struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
	struct smc_clc_msg_accept_confirm aclc;
	int local_contact = SMC_FIRST_CONTACT;
	struct smc_ib_device *smcibdev;
@@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)

	srv_first_contact = aclc.hdr.flag;
	mutex_lock(&smc_create_lgr_pending);
	local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
					ibport, &aclc.lcl, srv_first_contact);
	local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
					srv_first_contact);
	if (local_contact < 0) {
		rc = local_contact;
		if (rc == -ENOMEM)
@@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)

	if (local_contact == SMC_FIRST_CONTACT) {
		/* QP confirmation over RoCE fabric */
		reason_code = smc_clnt_conf_first_link(
			smc, &smcibdev->gid[ibport - 1]);
		reason_code = smc_clnt_conf_first_link(smc);
		if (reason_code < 0) {
			rc = reason_code;
			goto out_err_unlock;
@@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
		goto out_err;
	if (addr->sa_family != AF_INET)
		goto out_err;
	smc->addr = addr;	/* needed for nonblocking connect */

	lock_sock(sk);
	switch (sk->sk_state) {
@@ -748,11 +720,36 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE);
		return rc;
	}

	if (link->llc_confirm_resp_rc)
		return SMC_CLC_DECL_RMBE_EC;

	/* send ADD LINK request to client over the RoCE fabric */
	rc = smc_llc_send_add_link(link,
				   link->smcibdev->mac[link->ibport - 1],
				   &link->smcibdev->gid[link->ibport - 1],
				   SMC_LLC_REQ);
	if (rc < 0)
		return SMC_CLC_DECL_TCL;

	/* receive ADD LINK response from client over the RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
							 SMC_LLC_WAIT_TIME);
	if (rest <= 0) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE);
		return rc;
	}

	link->state = SMC_LNK_ACTIVE;

	return 0;
}

/* setup for RDMA connection of server */
static void smc_listen_work(struct work_struct *work)
{
@@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work)
	struct sock *newsmcsk = &new_smc->sk;
	struct smc_clc_msg_proposal *pclc;
	struct smc_ib_device *smcibdev;
	struct sockaddr_in peeraddr;
	u8 buf[SMC_CLC_MAX_LEN];
	struct smc_link *link;
	int reason_code = 0;
@@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
	}

	/* determine subnet and mask from internal TCP socket */
	rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
	rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
	if (rc) {
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;
@@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
		goto decline_rdma;
	}

	/* get address of the peer connected to the internal TCP socket */
	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);

	/* allocate connection / link group */
	mutex_lock(&smc_create_lgr_pending);
	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
					smcibdev, ibport, &pclc->lcl, 0);
	local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
					0);
	if (local_contact < 0) {
		rc = local_contact;
		if (rc == -ENOMEM)
+1 −4
Original line number Diff line number Diff line
@@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */
	struct sock		sk;
	struct socket		*clcsock;	/* internal tcp socket */
	struct smc_connection	conn;		/* smc connection */
	struct sockaddr		*addr;		/* inet connect address */
	struct smc_sock		*listen_smc;	/* listen parent */
	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
	struct work_struct	smc_listen_work;/* prepare new accept socket */
@@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)

struct smc_clc_msg_local;

int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
			 u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
int smc_conn_create(struct smc_sock *smc,
		    struct smc_ib_device *smcibdev, u8 ibport,
		    struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
+45 −2
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
 */

#include <linux/in.h>
#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>

@@ -22,6 +23,9 @@
#include "smc_clc.h"
#include "smc_ib.h"

/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};

/* check if received message has a correct header length and contains valid
 * heading and trailing eyecatchers
 */
@@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
	return true;
}

/* determine subnet and mask of internal TCP socket */
int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
			     __be32 *subnet, u8 *prefix_len)
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
	struct in_device *in_dev;
	struct sockaddr_in addr;
	int rc = -ENOENT;

	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

	/* get address to which the internal TCP socket is bound */
	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
	/* analyze IPv4 specific data of net_device belonging to TCP socket */
	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dst->dev);
	for_ifa(in_dev) {
		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
			continue;
		*prefix_len = inet_mask_len(ifa->ifa_mask);
		*subnet = ifa->ifa_address & ifa->ifa_mask;
		rc = 0;
		break;
	} endfor_ifa(in_dev);
	rcu_read_unlock();

out_rel:
	dst_release(dst);
out:
	return rc;
}

/* Wait for data on the tcp-socket, analyze received data
 * Returns:
 * 0 if success and it was not a decline that we received.
@@ -211,7 +254,7 @@ int smc_clc_send_proposal(struct smc_sock *smc,

	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
	/* determine subnet and mask from internal TCP socket */
	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
	rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
				      &pclc_prfx.prefix_len);
	if (rc)
		return SMC_CLC_DECL_CNFERR; /* configuration error */
+3 −6
Original line number Diff line number Diff line
@@ -22,9 +22,6 @@
#define SMC_CLC_CONFIRM		0x03
#define SMC_CLC_DECLINE		0x04

/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};

#define SMC_CLC_V1		0x1		/* SMC version                */
#define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
#define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
#define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
#define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */

struct smc_clc_msg_hdr {	/* header1 of clc messages */
	u8 eyecatcher[4];	/* eye catcher */
@@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}

struct smc_sock;
struct smc_ib_device;

int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
			     u8 *prefix_len);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
		     u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
+54 −23
Original line number Diff line number Diff line
@@ -144,7 +144,7 @@ static void smc_lgr_free_work(struct work_struct *work)
}

/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
static int smc_lgr_create(struct smc_sock *smc,
			  struct smc_ib_device *smcibdev, u8 ibport,
			  char *peer_systemid, unsigned short vlan_id)
{
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
	}
	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
	lgr->sync_err = false;
	lgr->daddr = peer_in_addr;
	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
	lgr->vlan_id = vlan_id;
	rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,

	lnk = &lgr->lnk[SMC_SINGLE_LINK];
	/* initialize link */
	lnk->state = SMC_LNK_ACTIVATING;
	lnk->smcibdev = smcibdev;
	lnk->ibport = ibport;
	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
		goto destroy_qp;
	init_completion(&lnk->llc_confirm);
	init_completion(&lnk->llc_confirm_resp);
	init_completion(&lnk->llc_add);
	init_completion(&lnk->llc_add_resp);

	smc->conn.lgr = lgr;
	rwlock_init(&lgr->conns_lock);
@@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
	kfree(lgr);
}

void smc_lgr_forget(struct smc_link_group *lgr)
{
	spin_lock_bh(&smc_lgr_list.lock);
	/* do not use this link group for new connections */
	if (!list_empty(&lgr->list))
		list_del_init(&lgr->list);
	spin_unlock_bh(&smc_lgr_list.lock);
}

/* terminate linkgroup abnormally */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
@@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
	struct smc_sock *smc;
	struct rb_node *node;

	spin_lock_bh(&smc_lgr_list.lock);
	if (list_empty(&lgr->list)) {
		/* termination already triggered */
		spin_unlock_bh(&smc_lgr_list.lock);
		return;
	}
	/* do not use this link group for new connections */
	list_del_init(&lgr->list);
	spin_unlock_bh(&smc_lgr_list.lock);
	smc_lgr_forget(lgr);

	write_lock_bh(&lgr->conns_lock);
	node = rb_first(&lgr->conns_all);
@@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
}

/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
int smc_conn_create(struct smc_sock *smc,
		    struct smc_ib_device *smcibdev, u8 ibport,
		    struct smc_clc_msg_local *lcl, int srv_first_contact)
{
@@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,

create:
	if (local_contact == SMC_FIRST_CONTACT) {
		rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
		rc = smc_lgr_create(smc, smcibdev, ibport,
				    lcl->id_for_peer, vlan_id);
		if (rc)
			goto out;
@@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
	return -ENOSPC;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
			    struct smc_clc_msg_accept_confirm *clc)
/* add a new rtoken from peer */
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
{
	u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
	struct smc_link_group *lgr = conn->lgr;
	u32 rkey = ntohl(clc->rmb_rkey);
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
		    test_bit(i, lgr->rtokens_used_mask)) {
			conn->rtoken_idx = i;
			/* already in list */
			return i;
		}
	}
	i = smc_rmb_reserve_rtoken_idx(lgr);
	if (i < 0)
		return i;
	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
	return i;
}

/* delete an rtoken */
int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
{
	u32 rkey = ntohl(nw_rkey);
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
		    test_bit(i, lgr->rtokens_used_mask)) {
			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;

			clear_bit(i, lgr->rtokens_used_mask);
			return 0;
		}
	}
	conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
	return -ENOENT;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
			    struct smc_clc_msg_accept_confirm *clc)
{
	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
					  clc->rmb_rkey);
	if (conn->rtoken_idx < 0)
		return conn->rtoken_idx;
	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
	return 0;
}
Loading