Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c84a39c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (46 commits)
  IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
  IB/mthca: Make all device methods truly reentrant
  IB/mthca: Fix memory leak on modify_qp error paths
  IB/uverbs: Factor out common idr code
  IB/uverbs: Don't decrement usecnt on error paths
  IB/uverbs: Release lock on error path
  IB/cm: Use address handle helpers
  IB/sa: Add ib_init_ah_from_path()
  IB: Add ib_init_ah_from_wc()
  IB/ucm: Get rid of duplicate P_Key parameter
  IB/srp: Factor out common request reset code
  IB/srp: Support SRP rev. 10 targets
  [SCSI] srp.h: Add I/O Class values
  IB/fmr: Use device's max_map_map_per_fmr attribute in FMR pool.
  IB/mthca: Fill in max_map_per_fmr device attribute
  IB/ipath: Add client reregister event generation
  IB/mthca: Add client reregister event generation
  IB: Move struct port_info from ipath to <rdma/ib_smi.h>
  IPoIB: Handle client reregister events
  IB: Add client reregister event type
  ...
parents d0b952a9 9ead190b
Loading
Loading
Loading
Loading
+8 −4
Original line number Original line Diff line number Diff line
IP OVER INFINIBAND
IP OVER INFINIBAND


  The ib_ipoib driver is an implementation of the IP over InfiniBand
  The ib_ipoib driver is an implementation of the IP over InfiniBand
  protocol as specified by the latest Internet-Drafts issued by the
  protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
  IETF ipoib working group.  It is a "native" implementation in the
  working group.  It is a "native" implementation in the sense of
  sense of setting the interface type to ARPHRD_INFINIBAND and the
  setting the interface type to ARPHRD_INFINIBAND and the hardware
  hardware address length to 20 (earlier proprietary implementations
  address length to 20 (earlier proprietary implementations
  masqueraded to the kernel as ethernet interfaces).
  masqueraded to the kernel as ethernet interfaces).


Partitions and P_Keys
Partitions and P_Keys
@@ -53,3 +53,7 @@ References


  IETF IP over InfiniBand (ipoib) Working Group
  IETF IP over InfiniBand (ipoib) Working Group
    http://ietf.org/html.charters/ipoib-charter.html
    http://ietf.org/html.charters/ipoib-charter.html
  Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
    http://ietf.org/rfc/rfc4391.txt 
  IP over InfiniBand (IPoIB) Architecture (RFC 4392)
    http://ietf.org/rfc/rfc4392.txt 
+5 −0
Original line number Original line Diff line number Diff line
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
	  libibverbs, libibcm and a hardware driver library from
	  libibverbs, libibcm and a hardware driver library from
	  <http://www.openib.org>.
	  <http://www.openib.org>.


config INFINIBAND_ADDR_TRANS
	bool
	depends on INFINIBAND && INET
	default y

source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"


+9 −2
Original line number Original line Diff line number Diff line
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o

obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
					ib_cm.o
					ib_cm.o $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o


@@ -12,8 +14,13 @@ ib_sa-y := sa_query.o


ib_cm-y :=			cm.o
ib_cm-y :=			cm.o


rdma_cm-y :=			cma.o

ib_addr-y :=			addr.o

ib_umad-y :=			user_mad.o
ib_umad-y :=			user_mad.o


ib_ucm-y :=			ucm.o
ib_ucm-y :=			ucm.o


ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o
ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_mem.o \
				uverbs_marshall.o
+367 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
 * This Software is licensed under one of the following licenses:
 *
 * 1) under the terms of the "Common Public License 1.0" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/cpl.php.
 *
 * 2) under the terms of the "The BSD License" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/bsd-license.php.
 *
 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 *    copy of which is available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/gpl-license.php.
 *
 * Licensee has the right to choose one of the above licenses.
 *
 * Redistributions of source code must retain the above copyright
 * notice and one of the license notices.
 *
 * Redistributions in binary form must reproduce both the above copyright
 * notice, one of the license notices in the documentation
 * and/or other materials provided with the distribution.
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <rdma/ib_addr.h>

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");

struct addr_req {
	struct list_head list;
	struct sockaddr src_addr;
	struct sockaddr dst_addr;
	struct rdma_dev_addr *addr;
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
};

static void process_req(void *data);

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static DECLARE_WORK(work, process_req, NULL);
static struct workqueue_struct *addr_wq;

static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     unsigned char *dst_dev_addr)
{
	switch (dev->type) {
	case ARPHRD_INFINIBAND:
		dev_addr->dev_type = IB_NODE_CA;
		break;
	default:
		return -EADDRNOTAVAIL;
	}

	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
	return 0;
}

int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
	struct net_device *dev;
	u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
	int ret;

	dev = ip_dev_find(ip);
	if (!dev)
		return -EADDRNOTAVAIL;

	ret = copy_addr(dev_addr, dev, NULL);
	dev_put(dev);
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	cancel_delayed_work(&work);

	delay = time - jiffies;
	if ((long)delay <= 0)
		delay = 1;

	queue_delayed_work(addr_wq, &work, delay);
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
		if (time_after(req->timeout, temp_req->timeout))
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

static void addr_send_arp(struct sockaddr_in *dst_in)
{
	struct rtable *rt;
	struct flowi fl;
	u32 dst_ip = dst_in->sin_addr.s_addr;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	if (ip_route_output_key(&rt, &fl))
		return;

	arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
		 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
	ip_rt_put(rt);
}

static int addr_resolve_remote(struct sockaddr_in *src_in,
			       struct sockaddr_in *dst_in,
			       struct rdma_dev_addr *addr)
{
	u32 src_ip = src_in->sin_addr.s_addr;
	u32 dst_ip = dst_in->sin_addr.s_addr;
	struct flowi fl;
	struct rtable *rt;
	struct neighbour *neigh;
	int ret;

	memset(&fl, 0, sizeof fl);
	fl.nl_u.ip4_u.daddr = dst_ip;
	fl.nl_u.ip4_u.saddr = src_ip;
	ret = ip_route_output_key(&rt, &fl);
	if (ret)
		goto out;

	/* If the device does ARP internally, return 'done' */
	if (rt->idev->dev->flags & IFF_NOARP) {
		copy_addr(addr, rt->idev->dev, NULL);
		goto put;
	}

	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
	if (!neigh) {
		ret = -ENODATA;
		goto put;
	}

	if (!(neigh->nud_state & NUD_VALID)) {
		ret = -ENODATA;
		goto release;
	}

	if (!src_ip) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = rt->rt_src;
	}

	ret = copy_addr(addr, neigh->dev, neigh->ha);
release:
	neigh_release(neigh);
put:
	ip_rt_put(rt);
out:
	return ret;
}

static void process_req(void *data)
{
	struct addr_req *req, *temp_req;
	struct sockaddr_in *src_in, *dst_in;
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->status) {
			src_in = (struct sockaddr_in *) &req->src_addr;
			dst_in = (struct sockaddr_in *) &req->dst_addr;
			req->status = addr_resolve_remote(src_in, dst_in,
							  req->addr);
		}
		if (req->status && time_after(jiffies, req->timeout))
			req->status = -ETIMEDOUT;
		else if (req->status == -ENODATA)
			continue;

		list_del(&req->list);
		list_add_tail(&req->list, &done_list);
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
		req->callback(req->status, &req->src_addr, req->addr,
			      req->context);
		kfree(req);
	}
}

static int addr_resolve_local(struct sockaddr_in *src_in,
			      struct sockaddr_in *dst_in,
			      struct rdma_dev_addr *addr)
{
	struct net_device *dev;
	u32 src_ip = src_in->sin_addr.s_addr;
	u32 dst_ip = dst_in->sin_addr.s_addr;
	int ret;

	dev = ip_dev_find(dst_ip);
	if (!dev)
		return -EADDRNOTAVAIL;

	if (ZERONET(src_ip)) {
		src_in->sin_family = dst_in->sin_family;
		src_in->sin_addr.s_addr = dst_ip;
		ret = copy_addr(addr, dev, dev->dev_addr);
	} else if (LOOPBACK(src_ip)) {
		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	} else {
		ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
		if (!ret)
			memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	}

	dev_put(dev);
	return ret;
}

int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
	struct sockaddr_in *src_in, *dst_in;
	struct addr_req *req;
	int ret = 0;

	req = kmalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;
	memset(req, 0, sizeof *req);

	if (src_addr)
		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
	req->addr = addr;
	req->callback = callback;
	req->context = context;

	src_in = (struct sockaddr_in *) &req->src_addr;
	dst_in = (struct sockaddr_in *) &req->dst_addr;

	req->status = addr_resolve_local(src_in, dst_in, addr);
	if (req->status == -EADDRNOTAVAIL)
		req->status = addr_resolve_remote(src_in, dst_in, addr);

	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		addr_send_arp(dst_in);
		break;
	default:
		ret = req->status;
		kfree(req);
		break;
	}
	return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);

void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
			list_del(&req->list);
			list_add(&req->list, &req_list);
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
			 struct packet_type *pkt, struct net_device *orig_dev)
{
	struct arphdr *arp_hdr;

	arp_hdr = (struct arphdr *) skb->nh.raw;

	if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
	    arp_hdr->ar_op == htons(ARPOP_REPLY))
		set_timeout(jiffies);

	kfree_skb(skb);
	return 0;
}

static struct packet_type addr_arp = {
	.type           = __constant_htons(ETH_P_ARP),
	.func           = addr_arp_recv,
	.af_packet_priv = (void*) 1,
};

static int addr_init(void)
{
	addr_wq = create_singlethread_workqueue("ib_addr_wq");
	if (!addr_wq)
		return -ENOMEM;

	dev_add_pack(&addr_arp);
	return 0;
}

static void addr_cleanup(void)
{
	dev_remove_pack(&addr_arp);
	destroy_workqueue(addr_wq);
}

module_init(addr_init);
module_exit(addr_cleanup);
+29 −1
Original line number Original line Diff line number Diff line
@@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device,
}
}
EXPORT_SYMBOL(ib_find_cached_pkey);
EXPORT_SYMBOL(ib_find_cached_pkey);


int ib_get_cached_lmc(struct ib_device *device,
		      u8                port_num,
		      u8                *lmc)
{
	unsigned long flags;
	int ret = 0;

	if (port_num < start_port(device) || port_num > end_port(device))
		return -EINVAL;

	read_lock_irqsave(&device->cache.lock, flags);
	*lmc = device->cache.lmc_cache[port_num - start_port(device)];
	read_unlock_irqrestore(&device->cache.lock, flags);

	return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);

static void ib_cache_update(struct ib_device *device,
static void ib_cache_update(struct ib_device *device,
			    u8                port)
			    u8                port)
{
{
@@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device,
	device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
	device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
	device->cache.gid_cache [port - start_port(device)] = gid_cache;
	device->cache.gid_cache [port - start_port(device)] = gid_cache;


	device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;

	write_unlock_irq(&device->cache.lock);
	write_unlock_irq(&device->cache.lock);


	kfree(old_pkey_cache);
	kfree(old_pkey_cache);
@@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device)
		kmalloc(sizeof *device->cache.gid_cache *
		kmalloc(sizeof *device->cache.gid_cache *
			(end_port(device) - start_port(device) + 1), GFP_KERNEL);
			(end_port(device) - start_port(device) + 1), GFP_KERNEL);


	if (!device->cache.pkey_cache || !device->cache.gid_cache) {
	device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
					  (end_port(device) -
					   start_port(device) + 1),
					  GFP_KERNEL);

	if (!device->cache.pkey_cache || !device->cache.gid_cache ||
	    !device->cache.lmc_cache) {
		printk(KERN_WARNING "Couldn't allocate cache "
		printk(KERN_WARNING "Couldn't allocate cache "
		       "for %s\n", device->name);
		       "for %s\n", device->name);
		goto err;
		goto err;
@@ -333,6 +359,7 @@ err_cache:
err:
err:
	kfree(device->cache.pkey_cache);
	kfree(device->cache.pkey_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.lmc_cache);
}
}


static void ib_cache_cleanup_one(struct ib_device *device)
static void ib_cache_cleanup_one(struct ib_device *device)
@@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)


	kfree(device->cache.pkey_cache);
	kfree(device->cache.pkey_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.gid_cache);
	kfree(device->cache.lmc_cache);
}
}


static struct ib_client cache_client = {
static struct ib_client cache_client = {
Loading