Loading drivers/infiniband/core/Makefile +3 −9 Original line number Original line Diff line number Diff line infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ ib_cm.o iw_cm.o ib_addr.o \ $(infiniband-y) $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ Loading @@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_sa-y := sa_query.o multicast.o ib_cm-y := cm.o ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o Loading @@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o rdma_ucm-y := ucma.o rdma_ucm-y := ucma.o ib_addr-y := addr.o ib_umad-y := user_mad.o ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_ucm-y := ucm.o Loading drivers/infiniband/core/addr.c +194 −32 Original line number Original line Diff line number Diff line Loading @@ -46,10 +46,10 @@ #include <net/ip6_route.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> MODULE_AUTHOR("Sean Hefty"); #include "core_priv.h" MODULE_DESCRIPTION("IB Address Translation"); MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct addr_req { struct list_head list; struct list_head list; Loading @@ -62,8 +62,11 @@ struct addr_req { struct rdma_dev_addr *addr, void *context); struct rdma_dev_addr *addr, void *context); unsigned long timeout; unsigned long timeout; int status; int status; u32 seq; }; }; static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); static void process_req(struct work_struct *work); static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static DEFINE_MUTEX(lock); Loading @@ -71,6 +74,126 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; static struct workqueue_struct *addr_wq; static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, }; static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) { struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; int ret; if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return false; ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_addr_policy); if (ret) return false; return true; } static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) { const struct nlattr *head, *curr; union ib_gid gid; struct addr_req *req; int len, rem; int found = 0; head = (const struct nlattr *)nlmsg_data(nlh); len = nlmsg_len(nlh); nla_for_each_attr(curr, head, len, rem) { if (curr->nla_type == LS_NLA_TYPE_DGID) memcpy(&gid, nla_data(curr), nla_len(curr)); } mutex_lock(&lock); list_for_each_entry(req, &req_list, list) { if (nlh->nlmsg_seq != req->seq) continue; /* We set the DGID part, the rest was set earlier */ rdma_addr_set_dgid(req->addr, &gid); req->status = 0; found = 1; break; } mutex_unlock(&lock); if (!found) pr_info("Couldn't find request waiting for DGID: %pI6\n", &gid); } int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk) || !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); return skb->len; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct rdma_ls_ip_resolve_header *header; void *data; size_t size; int attrtype; int len; if (family == AF_INET) { size = sizeof(struct in_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; } else { size = sizeof(struct in6_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; } len = nla_total_size(sizeof(size)); len += NLMSG_ALIGN(sizeof(*header)); skb = nlmsg_new(len, GFP_KERNEL); if (!skb) return -ENOMEM; data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); if (!data) { nlmsg_free(skb); return -ENODATA; } /* Construct the family header first */ header = (struct rdma_ls_ip_resolve_header *) skb_put(skb, NLMSG_ALIGN(sizeof(*header))); header->ifindex = dev_addr->bound_dev_if; nla_put(skb, attrtype, size, daddr); /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. */ return -ENODATA; } int rdma_addr_size(struct sockaddr *addr) int rdma_addr_size(struct sockaddr *addr) { { switch (addr->sa_family) { switch (addr->sa_family) { Loading Loading @@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); mutex_unlock(&lock); } } static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { if (ibnl_chk_listeners(RDMA_NL_GROUP_LS)) return -EADDRNOTAVAIL; /* We fill in what we can, the response will fill the rest */ rdma_copy_addr(dev_addr, dst->dev, NULL); return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); } static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) const void *daddr) { { Loading @@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ret; return ret; } } static bool has_gateway(struct dst_entry *dst, sa_family_t family) { struct rtable *rt; struct rt6_info *rt6; if (family == AF_INET) { rt = container_of(dst, struct rtable, dst); return rt->rt_uses_gateway; } rt6 = container_of(dst, struct rt6_info, dst); return rt6->rt6i_flags & RTF_GATEWAY; } static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const struct sockaddr *dst_in, u32 seq) { const struct sockaddr_in *dst_in4 = (const struct sockaddr_in *)dst_in; const struct sockaddr_in6 *dst_in6 = (const struct sockaddr_in6 *)dst_in; const void *daddr = (dst_in->sa_family == AF_INET) ? (const void *)&dst_in4->sin_addr.s_addr : (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; /* Gateway + ARPHRD_INFINIBAND -> IB router */ if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); else return dst_fetch_ha(dst, dev_addr, daddr); } static int addr4_resolve(struct sockaddr_in *src_in, static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, struct rdma_dev_addr *addr, Loading @@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in, src_in->sin_family = AF_INET; src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; src_in->sin_addr.s_addr = fl4.saddr; /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're * routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network * type accordingly. */ */ if (rt->rt_uses_gateway) if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV4; addr->network = RDMA_NETWORK_IPV4; addr->hoplimit = ip4_dst_hoplimit(&rt->dst); addr->hoplimit = ip4_dst_hoplimit(&rt->dst); Loading Loading @@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, src_in->sin6_addr = fl6.saddr; src_in->sin6_addr = fl6.saddr; } } /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're * routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network * type accordingly. */ */ if (rt->rt6i_flags & RTF_GATEWAY) if (rt->rt6i_flags & RTF_GATEWAY && ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV6; addr->network = RDMA_NETWORK_IPV6; addr->hoplimit = ip6_dst_hoplimit(dst); addr->hoplimit = ip6_dst_hoplimit(dst); Loading @@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve_neigh(struct dst_entry *dst, const struct sockaddr *dst_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr) struct rdma_dev_addr *addr, u32 seq) { { if (dst->dev->flags & IFF_LOOPBACK) { if (dst->dev->flags & IFF_LOOPBACK) { int ret; int ret; Loading @@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, } } /* If the device doesn't do ARP internally */ /* If the device doesn't do ARP internally */ if (!(dst->dev->flags & IFF_NOARP)) { if (!(dst->dev->flags & IFF_NOARP)) const struct sockaddr_in *dst_in4 = return fetch_ha(dst, addr, dst_in, seq); (const struct sockaddr_in *)dst_in; const struct sockaddr_in6 *dst_in6 = (const struct sockaddr_in6 *)dst_in; return dst_fetch_ha(dst, addr, dst_in->sa_family == AF_INET ? (const void *)&dst_in4->sin_addr.s_addr : (const void *)&dst_in6->sin6_addr); } return rdma_copy_addr(addr, dst->dev, NULL); return rdma_copy_addr(addr, dst->dev, NULL); } } Loading @@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve(struct sockaddr *src_in, static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, struct rdma_dev_addr *addr, bool resolve_neigh) bool resolve_neigh, u32 seq) { { struct net_device *ndev; struct net_device *ndev; struct dst_entry *dst; struct dst_entry *dst; Loading @@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; return ret; if (resolve_neigh) if (resolve_neigh) ret = addr_resolve_neigh(&rt->dst, dst_in, addr); ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); ndev = rt->dst.dev; ndev = rt->dst.dev; dev_hold(ndev); dev_hold(ndev); Loading @@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; return ret; if (resolve_neigh) if (resolve_neigh) ret = addr_resolve_neigh(dst, dst_in, addr); ret = addr_resolve_neigh(dst, dst_in, addr, seq); ndev = dst->dev; ndev = dst->dev; dev_hold(ndev); dev_hold(ndev); Loading Loading @@ -412,7 +575,7 @@ static void process_req(struct work_struct *work) src_in = (struct sockaddr *) &req->src_addr; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, req->status = addr_resolve(src_in, dst_in, req->addr, true); true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; req->status = -ETIMEDOUT; else if (req->status == -ENODATA) else if (req->status == -ENODATA) Loading Loading @@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->context = context; req->client = client; req->client = client; atomic_inc(&client->refcount); atomic_inc(&client->refcount); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->status = addr_resolve(src_in, dst_in, addr, true); req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); switch (req->status) { switch (req->status) { case 0: case 0: req->timeout = jiffies; req->timeout = jiffies; Loading Loading @@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, src_in->sa_family = dst_addr->sa_family; src_in->sa_family = dst_addr->sa_family; } } return addr_resolve(src_in, dst_addr, addr, false); return addr_resolve(src_in, dst_addr, addr, false, 0); } } EXPORT_SYMBOL(rdma_resolve_ip_route); EXPORT_SYMBOL(rdma_resolve_ip_route); Loading Loading @@ -634,7 +798,7 @@ static struct notifier_block nb = { .notifier_call = netevent_callback .notifier_call = netevent_callback }; }; static int __init addr_init(void) int addr_init(void) { { addr_wq = create_singlethread_workqueue("ib_addr"); addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) if (!addr_wq) Loading @@ -642,15 +806,13 @@ static int __init addr_init(void) register_netevent_notifier(&nb); register_netevent_notifier(&nb); rdma_addr_register_client(&self); rdma_addr_register_client(&self); return 0; return 0; } } static void __exit addr_cleanup(void) void addr_cleanup(void) { { rdma_addr_unregister_client(&self); rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); destroy_workqueue(addr_wq); } } module_init(addr_init); module_exit(addr_cleanup); drivers/infiniband/core/core_priv.h +16 −0 Original line number Original line Diff line number Diff line Loading @@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, return _upper == upper; return _upper == upper; } } int addr_init(void); void addr_cleanup(void); int ib_mad_init(void); void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_set_timeout(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _CORE_PRIV_H */ #endif /* _CORE_PRIV_H */ drivers/infiniband/core/device.c +58 −0 Original line number Original line Diff line number Diff line Loading @@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } } EXPORT_SYMBOL(ib_get_net_dev_by_params); EXPORT_SYMBOL(ib_get_net_dev_by_params); static struct ibnl_client_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { .dump = ib_nl_handle_resolve_resp, .module = THIS_MODULE }, [RDMA_NL_LS_OP_SET_TIMEOUT] = { .dump = ib_nl_handle_set_timeout, .module = THIS_MODULE }, [RDMA_NL_LS_OP_IP_RESOLVE] = { .dump = ib_nl_handle_ip_res_resp, .module = THIS_MODULE }, }; static int ib_add_ibnl_clients(void) { return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table), ibnl_ls_cb_table); } static void ib_remove_ibnl_clients(void) { ibnl_remove_client(RDMA_NL_LS); } static int __init ib_core_init(void) static int __init ib_core_init(void) { { int ret; int ret; Loading Loading @@ -983,10 +1006,41 @@ static int __init ib_core_init(void) goto err_sysfs; goto err_sysfs; } } ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); goto err_ibnl; } ret = ib_mad_init(); if (ret) { pr_warn("Couldn't init IB MAD\n"); goto err_addr; } ret = ib_sa_init(); if (ret) { pr_warn("Couldn't init SA\n"); goto err_mad; } if (ib_add_ibnl_clients()) { pr_warn("Couldn't register ibnl clients\n"); goto err_sa; } ib_cache_setup(); ib_cache_setup(); return 0; return 0; err_sa: ib_sa_cleanup(); err_mad: ib_mad_cleanup(); err_addr: addr_cleanup(); err_ibnl: ibnl_cleanup(); err_sysfs: err_sysfs: class_unregister(&ib_class); class_unregister(&ib_class); err_comp: err_comp: Loading @@ -999,6 +1053,10 @@ static int __init ib_core_init(void) static void __exit ib_core_cleanup(void) static void __exit ib_core_cleanup(void) { { ib_cache_cleanup(); ib_cache_cleanup(); ib_remove_ibnl_clients(); ib_sa_cleanup(); ib_mad_cleanup(); addr_cleanup(); ibnl_cleanup(); ibnl_cleanup(); class_unregister(&ib_class); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq); Loading drivers/infiniband/core/mad.c +3 −10 Original line number Original line Diff line number Diff line Loading @@ -47,11 +47,7 @@ #include "smi.h" #include "smi.h" #include "opa_smi.h" #include "opa_smi.h" #include "agent.h" #include "agent.h" #include "core_priv.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; Loading Loading @@ -3316,7 +3312,7 @@ static struct ib_client mad_client = { .remove = ib_mad_remove_device .remove = ib_mad_remove_device }; }; static int __init ib_mad_init_module(void) int ib_mad_init(void) { { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); Loading @@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void) return 0; return 0; } } static void __exit ib_mad_cleanup_module(void) void ib_mad_cleanup(void) { { ib_unregister_client(&mad_client); ib_unregister_client(&mad_client); } } module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); Loading
drivers/infiniband/core/Makefile +3 −9 Original line number Original line Diff line number Diff line infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ ib_cm.o iw_cm.o ib_addr.o \ $(infiniband-y) $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ Loading @@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_sa-y := sa_query.o multicast.o ib_cm-y := cm.o ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o Loading @@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o rdma_ucm-y := ucma.o rdma_ucm-y := ucma.o ib_addr-y := addr.o ib_umad-y := user_mad.o ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_ucm-y := ucm.o Loading
drivers/infiniband/core/addr.c +194 −32 Original line number Original line Diff line number Diff line Loading @@ -46,10 +46,10 @@ #include <net/ip6_route.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> MODULE_AUTHOR("Sean Hefty"); #include "core_priv.h" MODULE_DESCRIPTION("IB Address Translation"); MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct addr_req { struct list_head list; struct list_head list; Loading @@ -62,8 +62,11 @@ struct addr_req { struct rdma_dev_addr *addr, void *context); struct rdma_dev_addr *addr, void *context); unsigned long timeout; unsigned long timeout; int status; int status; u32 seq; }; }; static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); static void process_req(struct work_struct *work); static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static DEFINE_MUTEX(lock); Loading @@ -71,6 +74,126 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; static struct workqueue_struct *addr_wq; static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, }; static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) { struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; int ret; if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return false; ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_addr_policy); if (ret) return false; return true; } static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) { const struct nlattr *head, *curr; union ib_gid gid; struct addr_req *req; int len, rem; int found = 0; head = (const struct nlattr *)nlmsg_data(nlh); len = nlmsg_len(nlh); nla_for_each_attr(curr, head, len, rem) { if (curr->nla_type == LS_NLA_TYPE_DGID) memcpy(&gid, nla_data(curr), nla_len(curr)); } mutex_lock(&lock); list_for_each_entry(req, &req_list, list) { if (nlh->nlmsg_seq != req->seq) continue; /* We set the DGID part, the rest was set earlier */ rdma_addr_set_dgid(req->addr, &gid); req->status = 0; found = 1; break; } mutex_unlock(&lock); if (!found) pr_info("Couldn't find request waiting for DGID: %pI6\n", &gid); } int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk) || !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); return skb->len; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct rdma_ls_ip_resolve_header *header; void *data; size_t size; int attrtype; int len; if (family == AF_INET) { size = sizeof(struct in_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; } else { size = sizeof(struct in6_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; } len = nla_total_size(sizeof(size)); len += NLMSG_ALIGN(sizeof(*header)); skb = nlmsg_new(len, GFP_KERNEL); if (!skb) return -ENOMEM; data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); if (!data) { nlmsg_free(skb); return -ENODATA; } /* Construct the family header first */ header = (struct rdma_ls_ip_resolve_header *) skb_put(skb, NLMSG_ALIGN(sizeof(*header))); header->ifindex = dev_addr->bound_dev_if; nla_put(skb, attrtype, size, daddr); /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. */ return -ENODATA; } int rdma_addr_size(struct sockaddr *addr) int rdma_addr_size(struct sockaddr *addr) { { switch (addr->sa_family) { switch (addr->sa_family) { Loading Loading @@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); mutex_unlock(&lock); } } static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { if (ibnl_chk_listeners(RDMA_NL_GROUP_LS)) return -EADDRNOTAVAIL; /* We fill in what we can, the response will fill the rest */ rdma_copy_addr(dev_addr, dst->dev, NULL); return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); } static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) const void *daddr) { { Loading @@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ret; return ret; } } static bool has_gateway(struct dst_entry *dst, sa_family_t family) { struct rtable *rt; struct rt6_info *rt6; if (family == AF_INET) { rt = container_of(dst, struct rtable, dst); return rt->rt_uses_gateway; } rt6 = container_of(dst, struct rt6_info, dst); return rt6->rt6i_flags & RTF_GATEWAY; } static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const struct sockaddr *dst_in, u32 seq) { const struct sockaddr_in *dst_in4 = (const struct sockaddr_in *)dst_in; const struct sockaddr_in6 *dst_in6 = (const struct sockaddr_in6 *)dst_in; const void *daddr = (dst_in->sa_family == AF_INET) ? (const void *)&dst_in4->sin_addr.s_addr : (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; /* Gateway + ARPHRD_INFINIBAND -> IB router */ if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); else return dst_fetch_ha(dst, dev_addr, daddr); } static int addr4_resolve(struct sockaddr_in *src_in, static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, struct rdma_dev_addr *addr, Loading @@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in, src_in->sin_family = AF_INET; src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; src_in->sin_addr.s_addr = fl4.saddr; /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're * routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network * type accordingly. */ */ if (rt->rt_uses_gateway) if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV4; addr->network = RDMA_NETWORK_IPV4; addr->hoplimit = ip4_dst_hoplimit(&rt->dst); addr->hoplimit = ip4_dst_hoplimit(&rt->dst); Loading Loading @@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, src_in->sin6_addr = fl6.saddr; src_in->sin6_addr = fl6.saddr; } } /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're * routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network * type accordingly. */ */ if (rt->rt6i_flags & RTF_GATEWAY) if (rt->rt6i_flags & RTF_GATEWAY && ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV6; addr->network = RDMA_NETWORK_IPV6; addr->hoplimit = ip6_dst_hoplimit(dst); addr->hoplimit = ip6_dst_hoplimit(dst); Loading @@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve_neigh(struct dst_entry *dst, const struct sockaddr *dst_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr) struct rdma_dev_addr *addr, u32 seq) { { if (dst->dev->flags & IFF_LOOPBACK) { if (dst->dev->flags & IFF_LOOPBACK) { int ret; int ret; Loading @@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, } } /* If the device doesn't do ARP internally */ /* If the device doesn't do ARP internally */ if (!(dst->dev->flags & IFF_NOARP)) { if (!(dst->dev->flags & IFF_NOARP)) const struct sockaddr_in *dst_in4 = return fetch_ha(dst, addr, dst_in, seq); (const struct sockaddr_in *)dst_in; const struct sockaddr_in6 *dst_in6 = (const struct sockaddr_in6 *)dst_in; return dst_fetch_ha(dst, addr, dst_in->sa_family == AF_INET ? (const void *)&dst_in4->sin_addr.s_addr : (const void *)&dst_in6->sin6_addr); } return rdma_copy_addr(addr, dst->dev, NULL); return rdma_copy_addr(addr, dst->dev, NULL); } } Loading @@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve(struct sockaddr *src_in, static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, struct rdma_dev_addr *addr, bool resolve_neigh) bool resolve_neigh, u32 seq) { { struct net_device *ndev; struct net_device *ndev; struct dst_entry *dst; struct dst_entry *dst; Loading @@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; return ret; if (resolve_neigh) if (resolve_neigh) ret = addr_resolve_neigh(&rt->dst, dst_in, addr); ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); ndev = rt->dst.dev; ndev = rt->dst.dev; dev_hold(ndev); dev_hold(ndev); Loading @@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; return ret; if (resolve_neigh) if (resolve_neigh) ret = addr_resolve_neigh(dst, dst_in, addr); ret = addr_resolve_neigh(dst, dst_in, addr, seq); ndev = dst->dev; ndev = dst->dev; dev_hold(ndev); dev_hold(ndev); Loading Loading @@ -412,7 +575,7 @@ static void process_req(struct work_struct *work) src_in = (struct sockaddr *) &req->src_addr; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, req->status = addr_resolve(src_in, dst_in, req->addr, true); true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; req->status = -ETIMEDOUT; else if (req->status == -ENODATA) else if (req->status == -ENODATA) Loading Loading @@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->context = context; req->client = client; req->client = client; atomic_inc(&client->refcount); atomic_inc(&client->refcount); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->status = addr_resolve(src_in, dst_in, addr, true); req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); switch (req->status) { switch (req->status) { case 0: case 0: req->timeout = jiffies; req->timeout = jiffies; Loading Loading @@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, src_in->sa_family = dst_addr->sa_family; src_in->sa_family = dst_addr->sa_family; } } return addr_resolve(src_in, dst_addr, addr, false); return addr_resolve(src_in, dst_addr, addr, false, 0); } } EXPORT_SYMBOL(rdma_resolve_ip_route); EXPORT_SYMBOL(rdma_resolve_ip_route); Loading Loading @@ -634,7 +798,7 @@ static struct notifier_block nb = { .notifier_call = netevent_callback .notifier_call = netevent_callback }; }; static int __init addr_init(void) int addr_init(void) { { addr_wq = create_singlethread_workqueue("ib_addr"); addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) if (!addr_wq) Loading @@ -642,15 +806,13 @@ static int __init addr_init(void) register_netevent_notifier(&nb); register_netevent_notifier(&nb); rdma_addr_register_client(&self); rdma_addr_register_client(&self); return 0; return 0; } } static void __exit addr_cleanup(void) void addr_cleanup(void) { { rdma_addr_unregister_client(&self); rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); destroy_workqueue(addr_wq); } } module_init(addr_init); module_exit(addr_cleanup);
drivers/infiniband/core/core_priv.h +16 −0 Original line number Original line Diff line number Diff line Loading @@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, return _upper == upper; return _upper == upper; } } int addr_init(void); void addr_cleanup(void); int ib_mad_init(void); void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_set_timeout(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _CORE_PRIV_H */ #endif /* _CORE_PRIV_H */
drivers/infiniband/core/device.c +58 −0 Original line number Original line Diff line number Diff line Loading @@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } } EXPORT_SYMBOL(ib_get_net_dev_by_params); EXPORT_SYMBOL(ib_get_net_dev_by_params); static struct ibnl_client_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { .dump = ib_nl_handle_resolve_resp, .module = THIS_MODULE }, [RDMA_NL_LS_OP_SET_TIMEOUT] = { .dump = ib_nl_handle_set_timeout, .module = THIS_MODULE }, [RDMA_NL_LS_OP_IP_RESOLVE] = { .dump = ib_nl_handle_ip_res_resp, .module = THIS_MODULE }, }; static int ib_add_ibnl_clients(void) { return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table), ibnl_ls_cb_table); } static void ib_remove_ibnl_clients(void) { ibnl_remove_client(RDMA_NL_LS); } static int __init ib_core_init(void) static int __init ib_core_init(void) { { int ret; int ret; Loading Loading @@ -983,10 +1006,41 @@ static int __init ib_core_init(void) goto err_sysfs; goto err_sysfs; } } ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); goto err_ibnl; } ret = ib_mad_init(); if (ret) { pr_warn("Couldn't init IB MAD\n"); goto err_addr; } ret = ib_sa_init(); if (ret) { pr_warn("Couldn't init SA\n"); goto err_mad; } if (ib_add_ibnl_clients()) { pr_warn("Couldn't register ibnl clients\n"); goto err_sa; } ib_cache_setup(); ib_cache_setup(); return 0; return 0; err_sa: ib_sa_cleanup(); err_mad: ib_mad_cleanup(); err_addr: addr_cleanup(); err_ibnl: ibnl_cleanup(); err_sysfs: err_sysfs: class_unregister(&ib_class); class_unregister(&ib_class); err_comp: err_comp: Loading @@ -999,6 +1053,10 @@ static int __init ib_core_init(void) static void __exit ib_core_cleanup(void) static void __exit ib_core_cleanup(void) { { ib_cache_cleanup(); ib_cache_cleanup(); ib_remove_ibnl_clients(); ib_sa_cleanup(); ib_mad_cleanup(); addr_cleanup(); ibnl_cleanup(); ibnl_cleanup(); class_unregister(&ib_class); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq); Loading
drivers/infiniband/core/mad.c +3 −10 Original line number Original line Diff line number Diff line Loading @@ -47,11 +47,7 @@ #include "smi.h" #include "smi.h" #include "opa_smi.h" #include "opa_smi.h" #include "agent.h" #include "agent.h" #include "core_priv.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; Loading Loading @@ -3316,7 +3312,7 @@ static struct ib_client mad_client = { .remove = ib_mad_remove_device .remove = ib_mad_remove_device }; }; static int __init ib_mad_init_module(void) int ib_mad_init(void) { { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); Loading @@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void) return 0; return 0; } } static void __exit ib_mad_cleanup_module(void) void ib_mad_cleanup(void) { { ib_unregister_client(&mad_client); ib_unregister_client(&mad_client); } } module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module);