Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da19a102 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle with many of the commits being smallish
  code fixes and improvements across the drivers.

   - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and
     rxe

   - Memory window support in hns

   - mlx5 user API 'flow mutate/steering' allows accessing the full
     packet mangling and matching machinery from user space

   - Support inter-working with verbs API calls in the 'devx' mlx5 user
     API, and provide options to use devx with less privilege

   - Modernize the use of syfs and the device interface to use attribute
     groups and cdev properly for uverbs, and clean up some of the core
     code's device list management

   - More progress on net namespaces for RDMA devices

   - Consolidate driver BAR mmapping support into core code helpers and
     rework how RDMA holds poitners to mm_struct for get_user_pages
     cases

   - First pass to use 'dev_name' instead of ib_device->name

   - Device renaming for RDMA devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits)
  IB/mlx5: Add support for extended atomic operations
  RDMA/core: Fix comment for hw stats init for port == 0
  RDMA/core: Refactor ib_register_device() function
  RDMA/core: Fix unwinding flow in case of error to register device
  ib_srp: Remove WARN_ON in srp_terminate_io()
  IB/mlx5: Allow scatter to CQE without global signaled WRs
  IB/mlx5: Verify that driver supports user flags
  IB/mlx5: Support scatter to CQE for DC transport type
  RDMA/drivers: Use core provided API for registering device attributes
  RDMA/core: Allow existing drivers to set one sysfs group per device
  IB/rxe: Remove unnecessary enum values
  RDMA/umad: Use kernel API to allocate umad indexes
  RDMA/uverbs: Use kernel API to allocate uverbs indexes
  RDMA/core: Increase total number of RDMA ports across all devices
  IB/mlx4: Add port and TID to MAD debug print
  IB/mlx4: Enable debug print of SMPs
  RDMA/core: Rename ports_parent to ports_kobj
  RDMA/core: Do not expose unsupported counters
  IB/mlx4: Refer to the device kobject instead of ports_parent
  RDMA/nldev: Allow IB device rename through RDMA netlink
  ...
parents e5f6d9af a60109dc
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -91,6 +91,24 @@ Description:
		stacked (e.g: VLAN interfaces) but still have the same MAC
		address as their parent device.

What:		/sys/class/net/<iface>/dev_port
Date:		February 2014
KernelVersion:	3.15
Contact:	netdev@vger.kernel.org
Description:
		Indicates the port number of this network device, formatted
		as a decimal value. Some NICs have multiple independent ports
		on the same PCI bus, device and function. This attribute allows
		userspace to distinguish the respective interfaces.

		Note: some device drivers started to use 'dev_id' for this
		purpose since long before 3.15 and have not adopted the new
		attribute ever since. To query the port number, some tools look
		exclusively at 'dev_port', while others only consult 'dev_id'.
		If a network device has multiple client adapter ports as
		described in the previous paragraph and does not set this
		attribute to its port number, it's a kernel bug.

What:		/sys/class/net/<iface>/dormant
Date:		March 2006
KernelVersion:	2.6.17
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
	tristate "InfiniBand userspace access (verbs and CM)"
	select ANON_INODES
	depends on MMU
	---help---
	  Userspace InfiniBand access support.  This enables the
	  kernel side of userspace verbs and the userspace
+252 −154
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
@@ -61,6 +62,7 @@ struct addr_req {
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	struct delayed_work work;
	bool resolve_by_gid_attr;	/* Consider gid attr in resolve phase */
	int status;
	u32 seq;
};
@@ -219,18 +221,54 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
EXPORT_SYMBOL(rdma_addr_size_kss);

void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
		    const struct net_device *dev,
		    const unsigned char *dst_dev_addr)
/**
 * rdma_copy_src_l2_addr - Copy netdevice source addresses
 * @dev_addr:	Destination address pointer where to copy the addresses
 * @dev:	Netdevice whose source addresses to copy
 *
 * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
 * This includes unicast address, broadcast address, device type and
 * interface index.
 */
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
			   const struct net_device *dev)
{
	dev_addr->dev_type = dev->type;
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
	dev_addr->bound_dev_if = dev->ifindex;
}
EXPORT_SYMBOL(rdma_copy_addr);
EXPORT_SYMBOL(rdma_copy_src_l2_addr);

static struct net_device *
rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
{
	struct net_device *dev = NULL;
	int ret = -EADDRNOTAVAIL;

	switch (src_in->sa_family) {
	case AF_INET:
		dev = __ip_dev_find(net,
				    ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
				    false);
		if (dev)
			ret = 0;
		break;
#if IS_ENABLED(CONFIG_IPV6)
	case AF_INET6:
		for_each_netdev_rcu(net, dev) {
			if (ipv6_chk_addr(net,
					  &((const struct sockaddr_in6 *)src_in)->sin6_addr,
					  dev, 1)) {
				ret = 0;
				break;
			}
		}
		break;
#endif
	}
	return ret ? ERR_PTR(ret) : dev;
}

int rdma_translate_ip(const struct sockaddr *addr,
		      struct rdma_dev_addr *dev_addr)
@@ -241,38 +279,17 @@ int rdma_translate_ip(const struct sockaddr *addr,
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
		if (!dev)
			return -ENODEV;
		rdma_copy_addr(dev_addr, dev, NULL);
		rdma_copy_src_l2_addr(dev_addr, dev);
		dev_put(dev);
		return 0;
	}

	switch (addr->sa_family) {
	case AF_INET:
		dev = ip_dev_find(dev_addr->net,
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);

		if (!dev)
			return -EADDRNOTAVAIL;

		rdma_copy_addr(dev_addr, dev, NULL);
		dev_put(dev);
		break;
#if IS_ENABLED(CONFIG_IPV6)
	case AF_INET6:
	rcu_read_lock();
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
					  dev, 1)) {
				rdma_copy_addr(dev_addr, dev, NULL);
				break;
			}
		}
	dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
	if (!IS_ERR(dev))
		rdma_copy_src_l2_addr(dev_addr, dev);
	rcu_read_unlock();
		break;
#endif
	}
	return 0;
	return PTR_ERR_OR_ZERO(dev);
}
EXPORT_SYMBOL(rdma_translate_ip);

@@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
	spin_unlock_bh(&lock);
}

static int ib_nl_fetch_ha(const struct dst_entry *dst,
			  struct rdma_dev_addr *dev_addr,
static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
			  const void *daddr, u32 seq, u16 family)
{
	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
	if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
		return -EADDRNOTAVAIL;

	/* We fill in what we can, the response will fill the rest */
	rdma_copy_addr(dev_addr, dst->dev, NULL);
	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}

@@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
		neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
		rdma_copy_addr(dev_addr, dst->dev, n->ha);
		memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
	}

	neigh_release(n);
@@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
		(const void *)&dst_in6->sin6_addr;
	sa_family_t family = dst_in->sa_family;

	/* Gateway + ARPHRD_INFINIBAND -> IB router */
	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
	/* If we have a gateway in IB mode then it must be an IB network */
	if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
		return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
	else
		return dst_fetch_ha(dst, dev_addr, daddr);
}

static int addr4_resolve(struct sockaddr_in *src_in,
			 const struct sockaddr_in *dst_in,
static int addr4_resolve(struct sockaddr *src_sock,
			 const struct sockaddr *dst_sock,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
{
	struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
	const struct sockaddr_in *dst_in =
			(const struct sockaddr_in *)dst_sock;

	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
	struct rtable *rt;
@@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
	if (ret)
		return ret;

	src_in->sin_family = AF_INET;
	src_in->sin_addr.s_addr = fl4.saddr;

	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
	 */
	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
		addr->network = RDMA_NETWORK_IPV4;

	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);

	*prt = rt;
@@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
}

#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
			 const struct sockaddr_in6 *dst_in,
static int addr6_resolve(struct sockaddr *src_sock,
			 const struct sockaddr *dst_sock,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
{
	struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
	const struct sockaddr_in6 *dst_in =
				(const struct sockaddr_in6 *)dst_sock;
	struct flowi6 fl6;
	struct dst_entry *dst;
	struct rt6_info *rt;
	int ret;

	memset(&fl6, 0, sizeof fl6);
@@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
	if (ret < 0)
		return ret;

	rt = (struct rt6_info *)dst;
	if (ipv6_addr_any(&src_in->sin6_addr)) {
		src_in->sin6_family = AF_INET6;
	if (ipv6_addr_any(&src_in->sin6_addr))
		src_in->sin6_addr = fl6.saddr;
	}

	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
	 */
	if (rt->rt6i_flags & RTF_GATEWAY &&
	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
		addr->network = RDMA_NETWORK_IPV6;

	addr->hoplimit = ip6_dst_hoplimit(dst);

@@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
	return 0;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
			 const struct sockaddr_in6 *dst_in,
static int addr6_resolve(struct sockaddr *src_sock,
			 const struct sockaddr *dst_sock,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
{
@@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(const struct dst_entry *dst,
			      const struct sockaddr *dst_in,
			      struct rdma_dev_addr *addr,
			      unsigned int ndev_flags,
			      u32 seq)
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;
	int ret = 0;

		ret = rdma_translate_ip(dst_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);
	if (ndev_flags & IFF_LOOPBACK) {
		memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
	} else {
		if (!(ndev_flags & IFF_NOARP)) {
			/* If the device doesn't do ARP internally */
			ret = fetch_ha(dst, addr, dst_in, seq);
		}
	}
	return ret;
}

static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
			    const struct sockaddr *dst_in,
			    const struct dst_entry *dst,
			    const struct net_device *ndev)
{
	int ret = 0;

	if (dst->dev->flags & IFF_LOOPBACK)
		ret = rdma_translate_ip(dst_in, dev_addr);
	else
		rdma_copy_src_l2_addr(dev_addr, dst->dev);

	/*
	 * If there's a gateway and type of device not ARPHRD_INFINIBAND,
	 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
	 * network type accordingly.
	 */
	if (has_gateway(dst, dst_in->sa_family) &&
	    ndev->type != ARPHRD_INFINIBAND)
		dev_addr->network = dst_in->sa_family == AF_INET ?
						RDMA_NETWORK_IPV4 :
						RDMA_NETWORK_IPV6;
	else
		dev_addr->network = RDMA_NETWORK_IB;

	return ret;
}

	/* If the device doesn't do ARP internally */
	if (!(dst->dev->flags & IFF_NOARP))
		return fetch_ha(dst, addr, dst_in, seq);
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
				 unsigned int *ndev_flags,
				 const struct sockaddr *dst_in,
				 const struct dst_entry *dst)
{
	struct net_device *ndev = READ_ONCE(dst->dev);

	*ndev_flags = ndev->flags;
	/* A physical device must be the RDMA device to use */
	if (ndev->flags & IFF_LOOPBACK) {
		/*
		 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
		 * loopback IP address. So if route is resolved to loopback
		 * interface, translate that to a real ndev based on non
		 * loopback IP address.
		 */
		ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
		if (IS_ERR(ndev))
			return -ENODEV;
	}

	rdma_copy_addr(addr, dst->dev, NULL);
	return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
}

static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
{
	struct net_device *ndev;

	ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
	if (IS_ERR(ndev))
		return PTR_ERR(ndev);

	/*
	 * Since we are holding the rcu, reading net and ifindex
	 * are safe without any additional reference; because
	 * change_net_namespace() in net/core/dev.c does rcu sync
	 * after it changes the state to IFF_DOWN and before
	 * updating netdev fields {net, ifindex}.
	 */
	addr->net = dev_net(ndev);
	addr->bound_dev_if = ndev->ifindex;
	return 0;
}

static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
{
	addr->net = &init_net;
	addr->bound_dev_if = 0;
}

static int addr_resolve(struct sockaddr *src_in,
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
			bool resolve_neigh,
			bool resolve_by_gid_attr,
			u32 seq)
{
	struct net_device *ndev;
	struct dst_entry *dst;
	struct dst_entry *dst = NULL;
	unsigned int ndev_flags = 0;
	struct rtable *rt = NULL;
	int ret;

	if (!addr->net) {
@@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
		return -EINVAL;
	}

	if (src_in->sa_family == AF_INET) {
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);

		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = rt->dst.dev;
			dev_hold(ndev);
	rcu_read_lock();
	if (resolve_by_gid_attr) {
		if (!addr->sgid_attr) {
			rcu_read_unlock();
			pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
			return -EINVAL;
		}

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
		/*
		 * If the request is for a specific gid attribute of the
		 * rdma_dev_addr, derive net from the netdevice of the
		 * GID attribute.
		 */
		ret = set_addr_netns_by_gid_rcu(addr);
		if (ret) {
			rcu_read_unlock();
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(dst, dst_in, addr, seq);

		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		}
	}
	if (src_in->sa_family == AF_INET) {
		ret = addr4_resolve(src_in, dst_in, addr, &rt);
		dst = &rt->dst;
	} else {
			ndev = dst->dev;
			dev_hold(ndev);
		ret = addr6_resolve(src_in, dst_in, addr, &dst);
	}

		dst_release(dst);
	if (ret) {
		rcu_read_unlock();
		goto done;
	}
	ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
	rcu_read_unlock();

	if (ndev) {
		if (ndev->flags & IFF_LOOPBACK)
			ret = rdma_translate_ip(dst_in, addr);
		else
			addr->bound_dev_if = ndev->ifindex;
		dev_put(ndev);
	}
	/*
	 * Resolve neighbor destination address if requested and
	 * only if src addr translation didn't fail.
	 */
	if (!ret && resolve_neigh)
		ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);

	if (src_in->sa_family == AF_INET)
		ip_rt_put(rt);
	else
		dst_release(dst);
done:
	/*
	 * Clear the addr net to go back to its original state, only if it was
	 * derived from GID attribute in this context.
	 */
	if (resolve_by_gid_attr)
		rdma_addr_set_net_defaults(addr);
	return ret;
}

@@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
		src_in = (struct sockaddr *)&req->src_addr;
		dst_in = (struct sockaddr *)&req->dst_addr;
		req->status = addr_resolve(src_in, dst_in, req->addr,
					   true, req->seq);
					   true, req->resolve_by_gid_attr,
					   req->seq);
		if (req->status && time_after_eq(jiffies, req->timeout)) {
			req->status = -ETIMEDOUT;
		} else if (req->status == -ENODATA) {
@@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
}

int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
		    struct rdma_dev_addr *addr, int timeout_ms,
		    struct rdma_dev_addr *addr, unsigned long timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
		    bool resolve_by_gid_attr, void *context)
{
	struct sockaddr *src_in, *dst_in;
	struct addr_req *req;
@@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
	req->addr = addr;
	req->callback = callback;
	req->context = context;
	req->resolve_by_gid_attr = resolve_by_gid_attr;
	INIT_DELAYED_WORK(&req->work, process_one_req);
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);

	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
	req->status = addr_resolve(src_in, dst_in, addr, true,
				   req->resolve_by_gid_attr, req->seq);
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
@@ -641,25 +716,53 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
}
EXPORT_SYMBOL(rdma_resolve_ip);

int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
int roce_resolve_route_from_path(struct sa_path_rec *rec,
				 const struct ib_gid_attr *attr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid, dgid;
	struct rdma_dev_addr dev_addr = {};
	int ret;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
	if (rec->roce.route_resolved)
		return 0;

	rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
	rdma_gid2ip(&dgid._sockaddr, &rec->dgid);

	if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
		return -EINVAL;

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}
	if (!attr || !attr->ndev)
		return -EINVAL;

	dev_addr.net = &init_net;
	dev_addr.sgid_attr = attr;

	return addr_resolve(src_in, dst_addr, addr, false, 0);
	ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
			   &dev_addr, false, true, 0);
	if (ret)
		return ret;

	if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
	     dev_addr.network == RDMA_NETWORK_IPV6) &&
	    rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
		return -EINVAL;

	rec->roce.route_resolved = true;
	return 0;
}

/**
 * rdma_addr_cancel - Cancel resolve ip request
 * @addr:	Pointer to address structure given previously
 *		during rdma_resolve_ip().
 * rdma_addr_cancel() is synchronous function which cancels any pending
 * request if there is any.
 */
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;
@@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
	 * guarentees no work is running and none will be started.
	 */
	cancel_delayed_work_sync(&found->work);

	if (found->callback)
		found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
			      found->addr, found->context);

	kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
@@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,

int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
				 u8 *dmac, const struct net_device *ndev,
				 u8 *dmac, const struct ib_gid_attr *sgid_attr,
				 int *hoplimit)
{
	struct rdma_dev_addr dev_addr;
@@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);

	memset(&dev_addr, 0, sizeof(dev_addr));
	dev_addr.bound_dev_if = ndev->ifindex;
	dev_addr.net = &init_net;
	dev_addr.sgid_attr = sgid_attr;

	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
			      &dev_addr, 1000, resolve_cb, &ctx);
			      &dev_addr, 1000, resolve_cb, true, &ctx);
	if (ret)
		return ret;

+55 −24
Original line number Diff line number Diff line
@@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
	u8 port_num = entry->attr.port_num;
	struct ib_gid_table *table = rdma_gid_table(device, port_num);

	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
		 device->name, port_num, entry->attr.index,
		 entry->attr.gid.raw);
	dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
		port_num, entry->attr.index, entry->attr.gid.raw);

	if (rdma_cap_roce_gid_table(device, port_num) &&
	    entry->state != GID_TABLE_ENTRY_INVALID)
@@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
{
	entry->state = GID_TABLE_ENTRY_VALID;

	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
		 entry->attr.device->name, entry->attr.port_num,
		 entry->attr.index, entry->attr.gid.raw);
	dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
		__func__, entry->attr.port_num, entry->attr.index,
		entry->attr.gid.raw);

	lockdep_assert_held(&table->lock);
	write_lock_irq(&table->rwlock);
@@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
	int ret;

	if (!attr->ndev) {
		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
		       __func__, attr->device->name, attr->port_num,
		       attr->index);
		dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
			__func__, attr->port_num, attr->index);
		return -EINVAL;
	}
	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
		ret = attr->device->add_gid(attr, &entry->context);
		if (ret) {
			pr_err("%s GID add failed device=%s port=%d index=%d\n",
			       __func__, attr->device->name, attr->port_num,
			       attr->index);
			dev_err(&attr->device->dev,
				"%s GID add failed port=%d index=%d\n",
				__func__, attr->port_num, attr->index);
			return ret;
		}
	}
@@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,

	lockdep_assert_held(&table->lock);

	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
		 ib_dev->name, port, ix,
		 table->data_vec[ix]->attr.gid.raw);
	dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
		ix, table->data_vec[ix]->attr.gid.raw);

	write_lock_irq(&table->rwlock);
	entry = table->data_vec[ix];
@@ -782,8 +779,8 @@ static void release_gid_table(struct ib_device *device, u8 port,
		if (is_gid_entry_free(table->data_vec[i]))
			continue;
		if (kref_read(&table->data_vec[i]->kref) > 1) {
			pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
			       device->name, i,
			dev_err(&device->dev,
				"GID entry ref leak for index %d ref=%d\n", i,
				kref_read(&table->data_vec[i]->kref));
			leak = true;
		}
@@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_hold_gid_attr);

/**
 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
 * which must be in UP state.
 *
 * @attr:Pointer to the GID attribute
 *
 * Returns pointer to netdevice if the netdevice was attached to GID and
 * netdevice is in UP state. Caller must hold RCU lock as this API
 * reads the netdev flags which can change while netdevice migrates to
 * different net namespace. Returns ERR_PTR with error code otherwise.
 *
 */
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
{
	struct ib_gid_table_entry *entry =
			container_of(attr, struct ib_gid_table_entry, attr);
	struct ib_device *device = entry->attr.device;
	struct net_device *ndev = ERR_PTR(-ENODEV);
	u8 port_num = entry->attr.port_num;
	struct ib_gid_table *table;
	unsigned long flags;
	bool valid;

	table = rdma_gid_table(device, port_num);

	read_lock_irqsave(&table->rwlock, flags);
	valid = is_gid_entry_valid(table->data_vec[attr->index]);
	if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
		ndev = attr->ndev;
	read_unlock_irqrestore(&table->rwlock, flags);
	return ndev;
}

static int config_non_roce_gid_cache(struct ib_device *device,
				     u8 port, int gid_tbl_len)
{
@@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
			continue;
		ret = device->query_gid(device, port, i, &gid_attr.gid);
		if (ret) {
			pr_warn("query_gid failed (%d) for %s (index %d)\n",
				ret, device->name, i);
			dev_warn(&device->dev,
				 "query_gid failed (%d) for index %d\n", ret,
				 i);
			goto err;
		}
		gid_attr.index = i;
@@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,

	ret = ib_query_port(device, port, tprops);
	if (ret) {
		pr_warn("ib_query_port failed (%d) for %s\n",
			ret, device->name);
		dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
		goto err;
	}

@@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
	for (i = 0; i < pkey_cache->table_len; ++i) {
		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
		if (ret) {
			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
				ret, device->name, i);
			dev_warn(&device->dev,
				 "ib_query_pkey failed (%d) for index %d\n",
				 ret, i);
			goto err;
		}
	}
+6 −3
Original line number Diff line number Diff line
@@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
	if (ret)
		goto unlock;

	cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
			   cm_id_priv);
	ret = cm_init_av_by_path(param->alternate_path, NULL,
				 &cm_id_priv->alt_av, cm_id_priv);
	if (ret)
		goto unlock;

	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
	cm_id_priv->tid = lap_msg->hdr.tid;
	ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
	cm_dev->going_down = 0;
	cm_dev->device = device_create(&cm_class, &ib_device->dev,
				       MKDEV(0, 0), NULL,
				       "%s", ib_device->name);
				       "%s", dev_name(&ib_device->dev));
	if (IS_ERR(cm_dev->device)) {
		kfree(cm_dev);
		return;
Loading