Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c84a39c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (46 commits)
  IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
  IB/mthca: Make all device methods truly reentrant
  IB/mthca: Fix memory leak on modify_qp error paths
  IB/uverbs: Factor out common idr code
  IB/uverbs: Don't decrement usecnt on error paths
  IB/uverbs: Release lock on error path
  IB/cm: Use address handle helpers
  IB/sa: Add ib_init_ah_from_path()
  IB: Add ib_init_ah_from_wc()
  IB/ucm: Get rid of duplicate P_Key parameter
  IB/srp: Factor out common request reset code
  IB/srp: Support SRP rev. 10 targets
  [SCSI] srp.h: Add I/O Class values
  IB/fmr: Use device's max_map_map_per_fmr attribute in FMR pool.
  IB/mthca: Fill in max_map_per_fmr device attribute
  IB/ipath: Add client reregister event generation
  IB/mthca: Add client reregister event generation
  IB: Move struct port_info from ipath to <rdma/ib_smi.h>
  IPoIB: Handle client reregister events
  IB: Add client reregister event type
  ...
parents d0b952a9 9ead190b
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
IP OVER INFINIBAND

  The ib_ipoib driver is an implementation of the IP over InfiniBand
  protocol as specified by the latest Internet-Drafts issued by the
  IETF ipoib working group.  It is a "native" implementation in the
  sense of setting the interface type to ARPHRD_INFINIBAND and the
  hardware address length to 20 (earlier proprietary implementations
  protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
  working group.  It is a "native" implementation in the sense of
  setting the interface type to ARPHRD_INFINIBAND and the hardware
  address length to 20 (earlier proprietary implementations
  masqueraded to the kernel as ethernet interfaces).

Partitions and P_Keys
@@ -53,3 +53,7 @@ References

  IETF IP over InfiniBand (ipoib) Working Group
    http://ietf.org/html.charters/ipoib-charter.html
  Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
    http://ietf.org/rfc/rfc4391.txt 
  IP over InfiniBand (IPoIB) Architecture (RFC 4392)
    http://ietf.org/rfc/rfc4392.txt 
+5 −0
Original line number Diff line number Diff line
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
	  libibverbs, libibcm and a hardware driver library from
	  <http://www.openib.org>.

config INFINIBAND_ADDR_TRANS
	bool
	depends on INFINIBAND && INET
	default y

source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"

+9 −2
Original line number Diff line number Diff line
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o

obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
					ib_cm.o
					ib_cm.o $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o

@@ -12,8 +14,13 @@ ib_sa-y := sa_query.o

ib_cm-y :=			cm.o

rdma_cm-y :=			cma.o

ib_addr-y :=			addr.o

ib_umad-y :=			user_mad.o

ib_ucm-y :=			ucm.o

ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o
ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o \
				uverbs_marshall.o
+367 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
 * This Software is licensed under one of the following licenses:
 *
 * 1) under the terms of the "Common Public License 1.0" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/cpl.php.
 *
 * 2) under the terms of the "The BSD License" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/bsd-license.php.
 *
 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 *    copy of which is available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/gpl-license.php.
 *
 * Licensee has the right to choose one of the above licenses.
 *
 * Redistributions of source code must retain the above copyright
 * notice and one of the license notices.
 *
 * Redistributions in binary form must reproduce both the above copyright
 * notice, one of the license notices in the documentation
 * and/or other materials provided with the distribution.
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <rdma/ib_addr.h>

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");

struct addr_req {
	struct list_head list;
	struct sockaddr src_addr;
	struct sockaddr dst_addr;
	struct rdma_dev_addr *addr;
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
};

static void process_req(void *data);

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static DECLARE_WORK(work, process_req, NULL);
static struct workqueue_struct *addr_wq;

static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     unsigned char *dst_dev_addr)
{
	switch (dev->type) {
	case ARPHRD_INFINIBAND:
		dev_addr->dev_type = IB_NODE_CA;
		break;
	default:
		return -EADDRNOTAVAIL;
	}

	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
	return 0;
}

int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
	struct net_device *dev;
	u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
	int ret;

	dev = ip_dev_find(ip);
	if (!dev)
		return -EADDRNOTAVAIL;

	ret = copy_addr(dev_addr, dev, NULL);
	dev_put(dev);
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	cancel_delayed_work(&work);

	delay = time - jiffies;
	if ((long)delay <= 0)
		delay = 1;

	queue_delayed_work(addr_wq, &work, delay);
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
		if (time_after(req->timeout, temp_req->timeout))
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

static void addr_send_arp(struct sockaddr_in *dst_in)
{
	struct rtable *rt;
	struct flowi fl;
	u32 dst_ip = dst_in->sin_addr.s_addr;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	if (ip_route_output_key(&rt, &fl))
		return;

	arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
		 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
	ip_rt_put(rt);
}

static int addr_resolve_remote(struct sockaddr_in *src_in,
			       struct sockaddr_in *dst_in,
			       struct rdma_dev_addr *addr)
{
	u32 src_ip = src_in->sin_addr.s_addr;
	u32 dst_ip = dst_in->sin_addr.s_addr;
	struct flowi fl;
	struct rtable *rt;
	struct neighbour *neigh;
	int ret;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	fl.nl_u.ip4_u.saddr = src_ip;
	ret = ip_route_output_key(&rt, &fl);
	if (ret)
		goto out;

	/* If the device does ARP internally, return 'done' */
	if (rt->idev->dev->flags & IFF_NOARP) {
		copy_addr(addr, rt->idev->dev, NULL);
		goto put;
	}

	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
	if (!neigh) {
		ret = -ENODATA;
		goto put;
	}

	if (!(neigh->nud_state & NUD_VALID)) {
		ret = -ENODATA;
		goto release;
	}

	if (!src_ip) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = rt->rt_src;
	}

	ret = copy_addr(addr, neigh->dev, neigh->ha);
release:
	neigh_release(neigh);
put:
	ip_rt_put(rt);
out:
	return ret;
}

static void process_req(void *data)
{
	struct addr_req *req, *temp_req;
	struct sockaddr_in *src_in, *dst_in;
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->status) {
			src_in = (struct sockaddr_in *) &req->src_addr;
			dst_in = (struct sockaddr_in *) &req->dst_addr;
			req->status = addr_resolve_remote(src_in, dst_in,
							  req->addr);
		}
		if (req->status && time_after(jiffies, req->timeout))
			req->status = -ETIMEDOUT;
		else if (req->status == -ENODATA)
			continue;

		list_del(&req->list);
		list_add_tail(&req->list, &done_list);
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
		req->callback(req->status, &req->src_addr, req->addr,
			      req->context);
		kfree(req);
	}
}

static int addr_resolve_local(struct sockaddr_in *src_in,
			      struct sockaddr_in *dst_in,
			      struct rdma_dev_addr *addr)
{
	struct net_device *dev;
	u32 src_ip = src_in->sin_addr.s_addr;
	u32 dst_ip = dst_in->sin_addr.s_addr;
	int ret;

	dev = ip_dev_find(dst_ip);
	if (!dev)
		return -EADDRNOTAVAIL;

	if (ZERONET(src_ip)) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = dst_ip;
		ret = copy_addr(addr, dev, dev->dev_addr);
	} else if (LOOPBACK(src_ip)) {
		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	} else {
		ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	}

	dev_put(dev);
	return ret;
}

int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
	struct sockaddr_in *src_in, *dst_in;
	struct addr_req *req;
	int ret = 0;

	req = kmalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;
	memset(req, 0, sizeof *req);

	if (src_addr)
		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
	req->addr = addr;
	req->callback = callback;
	req->context = context;

	src_in = (struct sockaddr_in *) &req->src_addr;
	dst_in = (struct sockaddr_in *) &req->dst_addr;

	req->status = addr_resolve_local(src_in, dst_in, addr);
	if (req->status == -EADDRNOTAVAIL)
		req->status = addr_resolve_remote(src_in, dst_in, addr);

	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		addr_send_arp(dst_in);
		break;
	default:
		ret = req->status;
		kfree(req);
		break;
	}
	return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);

void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
			list_del(&req->list);
			list_add(&req->list, &req_list);
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
			 struct packet_type *pkt, struct net_device *orig_dev)
{
	struct arphdr *arp_hdr;

	arp_hdr = (struct arphdr *) skb->nh.raw;

	if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
	    arp_hdr->ar_op == htons(ARPOP_REPLY))
		set_timeout(jiffies);

	kfree_skb(skb);
	return 0;
}

static struct packet_type addr_arp = {
	.type           = __constant_htons(ETH_P_ARP),
	.func           = addr_arp_recv,
	.af_packet_priv = (void*) 1,
};

static int addr_init(void)
{
	addr_wq = create_singlethread_workqueue("ib_addr_wq");
	if (!addr_wq)
		return -ENOMEM;

	dev_add_pack(&addr_arp);
	return 0;
}

static void addr_cleanup(void)
{
	dev_remove_pack(&addr_arp);
	destroy_workqueue(addr_wq);
}

module_init(addr_init);
module_exit(addr_cleanup);
+29 −1
Original line number Diff line number Diff line
@@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_find_cached_pkey);

int ib_get_cached_lmc(struct ib_device *device,
		      u8                port_num,
		      u8                *lmc)
{
	unsigned long flags;
	int ret = 0;

	if (port_num < start_port(device) || port_num > end_port(device))
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);
	*lmc = device->cache.lmc_cache[port_num - start_port(device)];
	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);

static void ib_cache_update(struct ib_device *device,
			    u8                port)
{
@@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device,
	device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
	device->cache.gid_cache [port - start_port(device)] = gid_cache;

	device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;

	write_unlock_irq(&device->cache.lock);

	kfree(old_pkey_cache);
@@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device)
		kmalloc(sizeof *device->cache.gid_cache *
			(end_port(device) - start_port(device) + 1), GFP_KERNEL);

	if (!device->cache.pkey_cache || !device->cache.gid_cache) {
	device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
					  (end_port(device) -
					   start_port(device) + 1),
					  GFP_KERNEL);

	if (!device->cache.pkey_cache || !device->cache.gid_cache ||
	    !device->cache.lmc_cache) {
		printk(KERN_WARNING "Couldn't allocate cache "
		       "for %s\n", device->name);
		goto err;
@@ -333,6 +359,7 @@ static void ib_cache_setup_one(struct ib_device *device)
err:
	kfree(device->cache.pkey_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.lmc_cache);
}

static void ib_cache_cleanup_one(struct ib_device *device)
@@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)

	kfree(device->cache.pkey_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.lmc_cache);
}

static struct ib_client cache_client = {
Loading