Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b8124b53 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-smc-IPv6-support'



Ursula Braun says:

====================
net/smc: IPv6 support

these smc patches for the net-next tree add IPv6 support.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8b7372c1 aaa4d33f
Loading
Loading
Loading
Loading
+51 −27
Original line number Diff line number Diff line
@@ -7,12 +7,11 @@
 *  applicable with RoCE-cards only
 *
 *  Initial restrictions:
 *    - IPv6 support postponed
 *    - support for alternate links postponed
 *    - partial support for non-blocking sockets only
 *    - support for urgent data postponed
 *
 *  Copyright IBM Corp. 2016
 *  Copyright IBM Corp. 2016, 2018
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 *              based on prototype from Frank Blaschka
@@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};

static struct smc_hashinfo smc_v6_hashinfo = {
	.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};

int smc_hash_sk(struct sock *sk)
{
	struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -103,6 +106,18 @@ struct proto smc_proto = {
};
EXPORT_SYMBOL_GPL(smc_proto);

struct proto smc_proto6 = {
	.name		= "SMC6",
	.owner		= THIS_MODULE,
	.keepalive	= smc_set_keepalive,
	.hash		= smc_hash_sk,
	.unhash		= smc_unhash_sk,
	.obj_size	= sizeof(struct smc_sock),
	.h.smc_hash	= &smc_v6_hashinfo,
	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto6);

static int smc_release(struct socket *sock)
{
	struct sock *sk = sock->sk;
@@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk)
	sk_refcnt_debug_dec(sk);
}

static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
				   int protocol)
{
	struct smc_sock *smc;
	struct proto *prot;
	struct sock *sk;

	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
	if (!sk)
		return NULL;

	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
	sk->sk_state = SMC_INIT;
	sk->sk_destruct = smc_destruct;
	sk->sk_protocol = SMCPROTO_SMC;
	sk->sk_protocol = protocol;
	smc = smc_sk(sk);
	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
	INIT_LIST_HEAD(&smc->accept_q);
@@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
		goto out;

	rc = -EAFNOSUPPORT;
	if (addr->sin_family != AF_INET &&
	    addr->sin_family != AF_INET6 &&
	    addr->sin_family != AF_UNSPEC)
		goto out;
	/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
	if ((addr->sin_family != AF_INET) &&
	    ((addr->sin_family != AF_UNSPEC) ||
	     (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
	if (addr->sin_family == AF_UNSPEC &&
	    addr->sin_addr.s_addr != htonl(INADDR_ANY))
		goto out;

	lock_sock(sk);
@@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
	/* separate smc parameter checking to be safe */
	if (alen < sizeof(addr->sa_family))
		goto out_err;
	if (addr->sa_family != AF_INET)
	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
		goto out_err;

	lock_sock(sk);
@@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
	int rc;

	release_sock(lsk);
	new_sk = smc_sock_alloc(sock_net(lsk), NULL);
	new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
	if (!new_sk) {
		rc = -ENOMEM;
		lsk->sk_err = ENOMEM;
@@ -767,8 +788,6 @@ static void smc_listen_work(struct work_struct *work)
	struct smc_link *link;
	int reason_code = 0;
	int rc = 0;
	__be32 subnet;
	u8 prefix_len;
	u8 ibport;

	/* check if peer is smc capable */
@@ -803,17 +822,11 @@ static void smc_listen_work(struct work_struct *work)
		goto decline_rdma;
	}

	/* determine subnet and mask from internal TCP socket */
	rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
	if (rc) {
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;
	}

	pclc = (struct smc_clc_msg_proposal *)&buf;
	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
	if (pclc_prfx->outgoing_subnet != subnet ||
	    pclc_prfx->prefix_len != prefix_len) {

	rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
	if (rc) {
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;
	}
@@ -1375,6 +1388,7 @@ static const struct proto_ops smc_sock_ops = {
static int smc_create(struct net *net, struct socket *sock, int protocol,
		      int kern)
{
	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
	struct smc_sock *smc;
	struct sock *sk;
	int rc;
@@ -1384,20 +1398,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
		goto out;

	rc = -EPROTONOSUPPORT;
	if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
	if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
		goto out;

	rc = -ENOBUFS;
	sock->ops = &smc_sock_ops;
	sk = smc_sock_alloc(net, sock);
	sk = smc_sock_alloc(net, sock, protocol);
	if (!sk)
		goto out;

	/* create internal TCP socket for CLC handshake and fallback */
	smc = smc_sk(sk);
	smc->use_fallback = false; /* assume rdma capability first */
	rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
			      IPPROTO_TCP, &smc->clcsock);
	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
			      &smc->clcsock);
	if (rc) {
		sk_common_release(sk);
		goto out;
@@ -1437,16 +1451,23 @@ static int __init smc_init(void)

	rc = proto_register(&smc_proto, 1);
	if (rc) {
		pr_err("%s: proto_register fails with %d\n", __func__, rc);
		pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
		goto out_pnet;
	}

	rc = proto_register(&smc_proto6, 1);
	if (rc) {
		pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
		goto out_proto;
	}

	rc = sock_register(&smc_sock_family_ops);
	if (rc) {
		pr_err("%s: sock_register fails with %d\n", __func__, rc);
		goto out_proto;
		goto out_proto6;
	}
	INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
	INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);

	rc = smc_ib_register_client();
	if (rc) {
@@ -1459,6 +1480,8 @@ static int __init smc_init(void)

out_sock:
	sock_unregister(PF_SMC);
out_proto6:
	proto_unregister(&smc_proto6);
out_proto:
	proto_unregister(&smc_proto);
out_pnet:
@@ -1483,6 +1506,7 @@ static void __exit smc_exit(void)
	static_branch_disable(&tcp_have_smc);
	smc_ib_unregister_client();
	sock_unregister(PF_SMC);
	proto_unregister(&smc_proto6);
	proto_unregister(&smc_proto);
	smc_pnet_exit();
}
+3 −1
Original line number Diff line number Diff line
@@ -18,11 +18,13 @@

#include "smc_ib.h"

#define SMCPROTO_SMC		0	/* SMC protocol */
#define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
#define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */

#define SMC_MAX_PORTS		2	/* Max # of ports */

extern struct proto smc_proto;
extern struct proto smc_proto6;

#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
+169 −34
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
 *  CLC (connection layer control) handshake over initial TCP socket to
 *  prepare for RDMA traffic
 *
 *  Copyright IBM Corp. 2016
 *  Copyright IBM Corp. 2016, 2018
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */
@@ -15,6 +15,7 @@
#include <linux/if_ether.h>
#include <linux/sched/signal.h>

#include <net/addrconf.h>
#include <net/sock.h>
#include <net/tcp.h>

@@ -74,15 +75,67 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
	return true;
}

/* determine subnet and mask of internal TCP socket */
int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
			     __be32 *subnet, u8 *prefix_len)
/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
				 struct smc_clc_msg_proposal_prefix *prop)
{
	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);

	if (!in_dev)
		return -ENODEV;
	for_ifa(in_dev) {
		if (!inet_ifa_match(ipv4, ifa))
			continue;
		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
		return 0;
	} endfor_ifa(in_dev);
	return -ENOENT;
}

/* fill CLC proposal msg with ipv6 prefixes from device */
static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
				 struct smc_clc_msg_proposal_prefix *prop,
				 struct smc_clc_ipv6_prefix *ipv6_prfx)
{
#if IS_ENABLED(CONFIG_IPV6)
	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
	struct inet6_ifaddr *ifa;
	int cnt = 0;

	if (!in6_dev)
		return -ENODEV;
	/* use a maximum of 8 IPv6 prefixes from device */
	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
			continue;
		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
				 &ifa->addr, ifa->prefix_len);
		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
		cnt++;
		if (cnt == SMC_CLC_MAX_V6_PREFIX)
			break;
	}
	prop->ipv6_prefixes_cnt = cnt;
	if (cnt)
		return 0;
#endif
	return -ENOENT;
}

/* retrieve and set prefixes in CLC proposal msg */
static int smc_clc_prfx_set(struct socket *clcsock,
			    struct smc_clc_msg_proposal_prefix *prop,
			    struct smc_clc_ipv6_prefix *ipv6_prfx)
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
	struct in_device *in_dev;
	struct sockaddr_in addr;
	struct sockaddr_storage addrs;
	struct sockaddr_in6 *addr6;
	struct sockaddr_in *addr;
	int rc = -ENOENT;

	memset(prop, 0, sizeof(*prop));
	if (!dst) {
		rc = -ENOTCONN;
		goto out;
@@ -91,22 +144,97 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
		rc = -ENODEV;
		goto out_rel;
	}

	/* get address to which the internal TCP socket is bound */
	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
	/* analyze IPv4 specific data of net_device belonging to TCP socket */
	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
	/* analyze IP specific data of net_device belonging to TCP socket */
	addr6 = (struct sockaddr_in6 *)&addrs;
	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dst->dev);
	if (addrs.ss_family == PF_INET) {
		/* IPv4 */
		addr = (struct sockaddr_in *)&addrs;
		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
		/* mapped IPv4 address - peer is IPv4 only */
		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
					   prop);
	} else {
		/* IPv6 */
		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
	}
	rcu_read_unlock();
out_rel:
	dst_release(dst);
out:
	return rc;
}

/* match ipv4 addrs of dev against addr in CLC proposal */
static int smc_clc_prfx_match4_rcu(struct net_device *dev,
				   struct smc_clc_msg_proposal_prefix *prop)
{
	struct in_device *in_dev = __in_dev_get_rcu(dev);

	if (!in_dev)
		return -ENODEV;
	for_ifa(in_dev) {
		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
			continue;
		*prefix_len = inet_mask_len(ifa->ifa_mask);
		*subnet = ifa->ifa_address & ifa->ifa_mask;
		rc = 0;
		break;
		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
		    inet_ifa_match(prop->outgoing_subnet, ifa))
			return 0;
	} endfor_ifa(in_dev);
	rcu_read_unlock();

	return -ENOENT;
}

/* match ipv6 addrs of dev against addrs in CLC proposal */
static int smc_clc_prfx_match6_rcu(struct net_device *dev,
				   struct smc_clc_msg_proposal_prefix *prop)
{
#if IS_ENABLED(CONFIG_IPV6)
	struct inet6_dev *in6_dev = __in6_dev_get(dev);
	struct smc_clc_ipv6_prefix *ipv6_prfx;
	struct inet6_ifaddr *ifa;
	int i, max;

	if (!in6_dev)
		return -ENODEV;
	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
			continue;
		for (i = 0; i < max; i++) {
			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
					      ifa->prefix_len))
				return 0;
		}
	}
#endif
	return -ENOENT;
}

/* check if proposed prefixes match one of our device prefixes */
int smc_clc_prfx_match(struct socket *clcsock,
		       struct smc_clc_msg_proposal_prefix *prop)
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
	int rc;

	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}
	rcu_read_lock();
	if (!prop->ipv6_prefixes_cnt)
		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
	else
		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
	rcu_read_unlock();
out_rel:
	dst_release(dst);
out:
@@ -232,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
			  struct smc_ib_device *smcibdev,
			  u8 ibport)
{
	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
	struct smc_clc_msg_proposal_prefix pclc_prfx;
	struct smc_clc_msg_proposal pclc;
	struct smc_clc_msg_trail trl;
	int len, i, plen, rc;
	int reason_code = 0;
	struct kvec vec[3];
	struct kvec vec[4];
	struct msghdr msg;
	int len, plen, rc;

	/* retrieve ip prefixes for CLC proposal msg */
	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
	if (rc)
		return SMC_CLC_DECL_CNFERR; /* configuration error */

	/* send SMC Proposal CLC message */
	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
	plen = sizeof(pclc) + sizeof(pclc_prfx) +
	       (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
	       sizeof(trl);
	memset(&pclc, 0, sizeof(pclc));
	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -252,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
	pclc.iparea_offset = htons(0);

	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
	/* determine subnet and mask from internal TCP socket */
	rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
				      &pclc_prfx.prefix_len);
	if (rc)
		return SMC_CLC_DECL_CNFERR; /* configuration error */
	pclc_prfx.ipv6_prefixes_cnt = 0;
	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	memset(&msg, 0, sizeof(msg));
	vec[0].iov_base = &pclc;
	vec[0].iov_len = sizeof(pclc);
	vec[1].iov_base = &pclc_prfx;
	vec[1].iov_len = sizeof(pclc_prfx);
	vec[2].iov_base = &trl;
	vec[2].iov_len = sizeof(trl);
	i = 0;
	vec[i].iov_base = &pclc;
	vec[i++].iov_len = sizeof(pclc);
	vec[i].iov_base = &pclc_prfx;
	vec[i++].iov_len = sizeof(pclc_prfx);
	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
		vec[i].iov_base = &ipv6_prfx[0];
		vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
				   sizeof(ipv6_prfx[0]);
	}
	vec[i].iov_base = &trl;
	vec[i++].iov_len = sizeof(trl);
	/* due to the few bytes needed for clc-handshake this cannot block */
	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
	if (len < sizeof(pclc)) {
		if (len >= 0) {
			reason_code = -ENETUNREACH;
+12 −5
Original line number Diff line number Diff line
@@ -60,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */
	u8 mac[6];		/* mac of ib_device port */
};

#define SMC_CLC_MAX_V6_PREFIX	8

/* Struct would be 4 byte aligned, but it is used in an array that is sent
 * to peers and must conform to RFC7609, hence we need to use packed here.
 */
struct smc_clc_ipv6_prefix {
	u8 prefix[4];
	struct in6_addr prefix;
	u8 prefix_len;
} __packed;
} __packed;			/* format defined in RFC7609 */

struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/
	__be32 outgoing_subnet;	/* subnet mask */
@@ -79,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
} __aligned(4);

#define SMC_CLC_PROPOSAL_MAX_OFFSET	0x28
#define SMC_CLC_PROPOSAL_MAX_PREFIX	(8 * sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_PROPOSAL_MAX_PREFIX	(SMC_CLC_MAX_V6_PREFIX * \
					 sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN		(sizeof(struct smc_clc_msg_proposal) + \
				 SMC_CLC_PROPOSAL_MAX_OFFSET + \
				 sizeof(struct smc_clc_msg_proposal_prefix) + \
				 SMC_CLC_PROPOSAL_MAX_PREFIX + \
				 sizeof(struct smc_clc_msg_trail))

@@ -122,8 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}

int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
			     u8 *prefix_len);
int smc_clc_prfx_match(struct socket *clcsock,
		       struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
		     u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);