Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a1cdde8c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull rdma updates from Jason Gunthorpe:
 "This has been a quiet cycle for RDMA, the big bulk is the usual
  smallish driver updates and bug fixes. About four new uAPI related
  things. Not as much Szykaller patches this time, the bugs it finds are
  getting harder to fix.

  Summary:

   - More work cleaning up the RDMA CM code

   - Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
     i40iw, iw_cxgb4, mlx5, rxe

   - Driver specific resource tracking and reporting via netlink

   - Continued work for name space support from Parav

   - MPLS support for the verbs flow steering uAPI

   - A few tricky IPoIB fixes improving robustness

   - HFI1 driver support for the '16B' management packet format

   - Some auditing to not print kernel pointers via %llx or similar

   - Mark the entire 'UCM' user-space interface as BROKEN with the
     intent to remove it entirely. The user space side of this was long
     ago replaced with RDMA-CM and syzkaller is finding bugs in the
     residual UCM interface nobody wishes to fix because nobody uses it.

   - Purge more bogus BUG_ON's from Leon

   - 'flow counters' verbs uAPI

   - T10 fixups for iser/isert, these are Acked by Martin but going
     through the RDMA tree due to dependencies"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
  RDMA/mlx5: Update SPDX tags to show proper license
  RDMA/restrack: Change SPDX tag to properly reflect license
  IB/hfi1: Fix comment on default hdr entry size
  IB/hfi1: Rename exp_lock to exp_mutex
  IB/hfi1: Add bypass register defines and replace blind constants
  IB/hfi1: Remove unused variable
  IB/hfi1: Ensure VL index is within bounds
  IB/hfi1: Fix user context tail allocation for DMA_RTAIL
  IB/hns: Use zeroing memory allocator instead of allocator/memset
  infiniband: fix a possible use-after-free bug
  iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
  IB/isert: use T10-PI check mask definitions from core layer
  IB/iser: use T10-PI check mask definitions from core layer
  RDMA/core: introduce check masks for T10-PI offload
  IB/isert: fix T10-pi check mask setting
  IB/mlx5: Add counters read support
  IB/mlx5: Add flow counters read support
  IB/mlx5: Add flow counters binding support
  IB/mlx5: Add counters create and destroy support
  IB/uverbs: Add support for flow counters
  ...
parents 3a3869f1 c1191a19
Loading
Loading
Loading
Loading
+11 −0
Original line number Original line Diff line number Diff line
@@ -35,6 +35,17 @@ config INFINIBAND_USER_ACCESS
	  libibverbs, libibcm and a hardware driver library from
	  libibverbs, libibcm and a hardware driver library from
	  rdma-core <https://github.com/linux-rdma/rdma-core>.
	  rdma-core <https://github.com/linux-rdma/rdma-core>.


config INFINIBAND_USER_ACCESS_UCM
	bool "Userspace CM (UCM, DEPRECATED)"
	depends on BROKEN
	depends on INFINIBAND_USER_ACCESS
	help
	  The UCM module has known security flaws, which no one is
	  interested to fix. The user-space part of this code was
	  dropped from the upstream a long time ago.

	  This option is DEPRECATED and planned to be removed.

config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
	bool "Allow experimental legacy verbs in new ioctl uAPI  (EXPERIMENTAL)"
	bool "Allow experimental legacy verbs in new ioctl uAPI  (EXPERIMENTAL)"
	depends on INFINIBAND_USER_ACCESS
	depends on INFINIBAND_USER_ACCESS
+5 −4
Original line number Original line Diff line number Diff line
@@ -5,15 +5,16 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_cm.o iw_cm.o \
obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_cm.o iw_cm.o \
					$(infiniband-y)
					$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
					$(user_access-y)
obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)


ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
				device.o fmr_pool.o cache.o netlink.o \
				device.o fmr_pool.o cache.o netlink.o \
				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
				multicast.o mad.o smi.o agent.o mad_rmpp.o \
				multicast.o mad.o smi.o agent.o mad_rmpp.o \
				security.o nldev.o restrack.o
				nldev.o restrack.o


ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -36,4 +37,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
				uverbs_std_types_mr.o
				uverbs_std_types_mr.o uverbs_std_types_counters.o
+59 −113
Original line number Original line Diff line number Diff line
@@ -56,7 +56,6 @@ struct addr_req {
	struct sockaddr_storage src_addr;
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
	struct sockaddr_storage dst_addr;
	struct rdma_dev_addr *addr;
	struct rdma_dev_addr *addr;
	struct rdma_addr_client *client;
	void *context;
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
			 struct rdma_dev_addr *addr, void *context);
@@ -68,11 +67,8 @@ struct addr_req {


static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);


static void process_req(struct work_struct *work);
static DEFINE_SPINLOCK(lock);

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
static struct workqueue_struct *addr_wq;


static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
@@ -112,7 +108,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
			memcpy(&gid, nla_data(curr), nla_len(curr));
			memcpy(&gid, nla_data(curr), nla_len(curr));
	}
	}


	mutex_lock(&lock);
	spin_lock_bh(&lock);
	list_for_each_entry(req, &req_list, list) {
	list_for_each_entry(req, &req_list, list) {
		if (nlh->nlmsg_seq != req->seq)
		if (nlh->nlmsg_seq != req->seq)
			continue;
			continue;
@@ -122,7 +118,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
		found = 1;
		found = 1;
		break;
		break;
	}
	}
	mutex_unlock(&lock);
	spin_unlock_bh(&lock);


	if (!found)
	if (!found)
		pr_info("Couldn't find request waiting for DGID: %pI6\n",
		pr_info("Couldn't find request waiting for DGID: %pI6\n",
@@ -223,28 +219,6 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
}
EXPORT_SYMBOL(rdma_addr_size_kss);
EXPORT_SYMBOL(rdma_addr_size_kss);


static struct rdma_addr_client self;

void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
		    const struct net_device *dev,
		    const struct net_device *dev,
		    const unsigned char *dst_dev_addr)
		    const unsigned char *dst_dev_addr)
@@ -302,7 +276,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
}
}
EXPORT_SYMBOL(rdma_translate_ip);
EXPORT_SYMBOL(rdma_translate_ip);


static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
static void set_timeout(struct addr_req *req, unsigned long time)
{
{
	unsigned long delay;
	unsigned long delay;


@@ -310,23 +284,15 @@ static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
	if ((long)delay < 0)
	if ((long)delay < 0)
		delay = 0;
		delay = 0;


	mod_delayed_work(addr_wq, delayed_work, delay);
	mod_delayed_work(addr_wq, &req->work, delay);
}
}


static void queue_req(struct addr_req *req)
static void queue_req(struct addr_req *req)
{
{
	struct addr_req *temp_req;
	spin_lock_bh(&lock);

	list_add_tail(&req->list, &req_list);
	mutex_lock(&lock);
	set_timeout(req, req->timeout);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
	spin_unlock_bh(&lock);
		if (time_after_eq(req->timeout, temp_req->timeout))
			break;
	}

	list_add(&req->list, &temp_req->list);

	set_timeout(&req->work, req->timeout);
	mutex_unlock(&lock);
}
}


static int ib_nl_fetch_ha(const struct dst_entry *dst,
static int ib_nl_fetch_ha(const struct dst_entry *dst,
@@ -584,7 +550,6 @@ static void process_one_req(struct work_struct *_work)
	struct addr_req *req;
	struct addr_req *req;
	struct sockaddr *src_in, *dst_in;
	struct sockaddr *src_in, *dst_in;


	mutex_lock(&lock);
	req = container_of(_work, struct addr_req, work.work);
	req = container_of(_work, struct addr_req, work.work);


	if (req->status == -ENODATA) {
	if (req->status == -ENODATA) {
@@ -596,72 +561,33 @@ static void process_one_req(struct work_struct *_work)
			req->status = -ETIMEDOUT;
			req->status = -ETIMEDOUT;
		} else if (req->status == -ENODATA) {
		} else if (req->status == -ENODATA) {
			/* requeue the work for retrying again */
			/* requeue the work for retrying again */
			set_timeout(&req->work, req->timeout);
			spin_lock_bh(&lock);
			mutex_unlock(&lock);
			if (!list_empty(&req->list))
				set_timeout(req, req->timeout);
			spin_unlock_bh(&lock);
			return;
			return;
		}
		}
	}
	}
	list_del(&req->list);
	mutex_unlock(&lock);

	/*
	 * Although the work will normally have been canceled by the
	 * workqueue, it can still be requeued as long as it is on the
	 * req_list, so it could have been requeued before we grabbed &lock.
	 * We need to cancel it after it is removed from req_list to really be
	 * sure it is safe to free.
	 */
	cancel_delayed_work(&req->work);


	req->callback(req->status, (struct sockaddr *)&req->src_addr,
	req->callback(req->status, (struct sockaddr *)&req->src_addr,
		req->addr, req->context);
		req->addr, req->context);
	put_client(req->client);
	req->callback = NULL;
	kfree(req);
}


static void process_req(struct work_struct *work)
	spin_lock_bh(&lock);
{
	if (!list_empty(&req->list)) {
	struct addr_req *req, *temp_req;
		/*
	struct sockaddr *src_in, *dst_in;
		 * Although the work will normally have been canceled by the
	struct list_head done_list;
		 * workqueue, it can still be requeued as long as it is on the

		 * req_list.
	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->status == -ENODATA) {
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
			req->status = addr_resolve(src_in, dst_in, req->addr,
						   true, req->seq);
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
			else if (req->status == -ENODATA) {
				set_timeout(&req->work, req->timeout);
				continue;
			}
		}
		list_move_tail(&req->list, &done_list);
	}

	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
		/* It is safe to cancel other work items from this work item
		 * because at a time there can be only one work item running
		 * with this single threaded work queue.
		 */
		 */
		cancel_delayed_work(&req->work);
		cancel_delayed_work(&req->work);
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
		list_del_init(&req->list);
			req->addr, req->context);
		put_client(req->client);
		kfree(req);
		kfree(req);
	}
	}
	spin_unlock_bh(&lock);
}
}


int rdma_resolve_ip(struct rdma_addr_client *client,
int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
		    struct rdma_dev_addr *addr, int timeout_ms,
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
				     struct rdma_dev_addr *addr, void *context),
@@ -693,8 +619,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
	req->addr = addr;
	req->addr = addr;
	req->callback = callback;
	req->callback = callback;
	req->context = context;
	req->context = context;
	req->client = client;
	atomic_inc(&client->refcount);
	INIT_DELAYED_WORK(&req->work, process_one_req);
	INIT_DELAYED_WORK(&req->work, process_one_req);
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);


@@ -710,7 +634,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
		break;
		break;
	default:
	default:
		ret = req->status;
		ret = req->status;
		atomic_dec(&client->refcount);
		goto err;
		goto err;
	}
	}
	return ret;
	return ret;
@@ -742,18 +665,36 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
void rdma_addr_cancel(struct rdma_dev_addr *addr)
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
{
	struct addr_req *req, *temp_req;
	struct addr_req *req, *temp_req;
	struct addr_req *found = NULL;


	mutex_lock(&lock);
	spin_lock_bh(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			/*
			req->timeout = jiffies;
			 * Removing from the list means we take ownership of
			list_move(&req->list, &req_list);
			 * the req
			set_timeout(&req->work, req->timeout);
			 */
			list_del_init(&req->list);
			found = req;
			break;
			break;
		}
		}
	}
	}
	mutex_unlock(&lock);
	spin_unlock_bh(&lock);

	if (!found)
		return;

	/*
	 * sync canceling the work after removing it from the req_list
	 * guarentees no work is running and none will be started.
	 */
	cancel_delayed_work_sync(&found->work);

	if (found->callback)
		found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
			      found->addr, found->context);

	kfree(found);
}
}
EXPORT_SYMBOL(rdma_addr_cancel);
EXPORT_SYMBOL(rdma_addr_cancel);


@@ -791,7 +732,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
	dev_addr.net = &init_net;
	dev_addr.net = &init_net;


	init_completion(&ctx.comp);
	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
	ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
			      &dev_addr, 1000, resolve_cb, &ctx);
			      &dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
	if (ret)
		return ret;
		return ret;
@@ -810,11 +751,17 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
static int netevent_callback(struct notifier_block *self, unsigned long event,
static int netevent_callback(struct notifier_block *self, unsigned long event,
	void *ctx)
	void *ctx)
{
{
	struct addr_req *req;

	if (event == NETEVENT_NEIGH_UPDATE) {
	if (event == NETEVENT_NEIGH_UPDATE) {
		struct neighbour *neigh = ctx;
		struct neighbour *neigh = ctx;


		if (neigh->nud_state & NUD_VALID)
		if (neigh->nud_state & NUD_VALID) {
			set_timeout(&work, jiffies);
			spin_lock_bh(&lock);
			list_for_each_entry(req, &req_list, list)
				set_timeout(req, jiffies);
			spin_unlock_bh(&lock);
		}
	}
	}
	return 0;
	return 0;
}
}
@@ -830,14 +777,13 @@ int addr_init(void)
		return -ENOMEM;
		return -ENOMEM;


	register_netevent_notifier(&nb);
	register_netevent_notifier(&nb);
	rdma_addr_register_client(&self);


	return 0;
	return 0;
}
}


void addr_cleanup(void)
void addr_cleanup(void)
{
{
	rdma_addr_unregister_client(&self);
	unregister_netevent_notifier(&nb);
	unregister_netevent_notifier(&nb);
	destroy_workqueue(addr_wq);
	destroy_workqueue(addr_wq);
	WARN_ON(!list_empty(&req_list));
}
}
+49 −63
Original line number Original line Diff line number Diff line
@@ -125,6 +125,16 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
}
}
EXPORT_SYMBOL(ib_cache_gid_type_str);
EXPORT_SYMBOL(ib_cache_gid_type_str);


/** rdma_is_zero_gid - Check if given GID is zero or not.
 * @gid:	GID to check
 * Returns true if given GID is zero, returns false otherwise.
 */
bool rdma_is_zero_gid(const union ib_gid *gid)
{
	return !memcmp(gid, &zgid, sizeof(*gid));
}
EXPORT_SYMBOL(rdma_is_zero_gid);

int ib_cache_gid_parse_type_str(const char *buf)
int ib_cache_gid_parse_type_str(const char *buf)
{
{
	unsigned int i;
	unsigned int i;
@@ -149,6 +159,11 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);


static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
{
	return device->cache.ports[port - rdma_start_port(device)].gid;
}

static void del_roce_gid(struct ib_device *device, u8 port_num,
static void del_roce_gid(struct ib_device *device, u8 port_num,
			 struct ib_gid_table *table, int ix)
			 struct ib_gid_table *table, int ix)
{
{
@@ -231,7 +246,7 @@ static int add_modify_gid(struct ib_gid_table *table,
		 * So ignore such behavior for IB link layer and don't
		 * So ignore such behavior for IB link layer and don't
		 * fail the call, but don't add such entry to GID cache.
		 * fail the call, but don't add such entry to GID cache.
		 */
		 */
		if (!memcmp(gid, &zgid, sizeof(*gid)))
		if (rdma_is_zero_gid(gid))
			return 0;
			return 0;
	}
	}


@@ -264,7 +279,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,


	if (rdma_protocol_roce(ib_dev, port))
	if (rdma_protocol_roce(ib_dev, port))
		del_roce_gid(ib_dev, port, table, ix);
		del_roce_gid(ib_dev, port, table, ix);
	memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
	memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
	table->data_vec[ix].context = NULL;
	table->data_vec[ix].context = NULL;
}
}
@@ -363,10 +378,10 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
	 * IB spec version 1.3 section 4.1.1 point (6) and
	 * IB spec version 1.3 section 4.1.1 point (6) and
	 * section 12.7.10 and section 12.7.20
	 * section 12.7.10 and section 12.7.20
	 */
	 */
	if (!memcmp(gid, &zgid, sizeof(*gid)))
	if (rdma_is_zero_gid(gid))
		return -EINVAL;
		return -EINVAL;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	mutex_lock(&table->lock);
	mutex_lock(&table->lock);


@@ -433,7 +448,7 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
	int ret = 0;
	int ret = 0;
	int ix;
	int ix;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	mutex_lock(&table->lock);
	mutex_lock(&table->lock);


@@ -472,7 +487,7 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
	int ix;
	int ix;
	bool deleted = false;
	bool deleted = false;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	mutex_lock(&table->lock);
	mutex_lock(&table->lock);


@@ -496,7 +511,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
{
{
	struct ib_gid_table *table;
	struct ib_gid_table *table;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	if (index < 0 || index >= table->sz)
	if (index < 0 || index >= table->sz)
		return -EINVAL;
		return -EINVAL;
@@ -589,7 +604,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
	if (!rdma_is_port_valid(ib_dev, port))
	if (!rdma_is_port_valid(ib_dev, port))
		return -ENOENT;
		return -ENOENT;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	if (ndev)
	if (ndev)
		mask |= GID_ATTR_FIND_MASK_NETDEV;
		mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -647,7 +662,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
	    !rdma_protocol_roce(ib_dev, port))
	    !rdma_protocol_roce(ib_dev, port))
		return -EPROTONOSUPPORT;
		return -EPROTONOSUPPORT;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	read_lock_irqsave(&table->rwlock, flags);
	read_lock_irqsave(&table->rwlock, flags);
	for (i = 0; i < table->sz; i++) {
	for (i = 0; i < table->sz; i++) {
@@ -724,8 +739,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,


	mutex_lock(&table->lock);
	mutex_lock(&table->lock);
	for (i = 0; i < table->sz; ++i) {
	for (i = 0; i < table->sz; ++i) {
		if (memcmp(&table->data_vec[i].gid, &zgid,
		if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
			   sizeof(table->data_vec[i].gid))) {
			del_gid(ib_dev, port, table, i);
			del_gid(ib_dev, port, table, i);
			deleted = true;
			deleted = true;
		}
		}
@@ -747,7 +761,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
	unsigned int gid_type;
	unsigned int gid_type;
	unsigned long mask;
	unsigned long mask;


	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
	table = rdma_gid_table(ib_dev, port);


	mask = GID_ATTR_FIND_MASK_GID_TYPE |
	mask = GID_ATTR_FIND_MASK_GID_TYPE |
	       GID_ATTR_FIND_MASK_DEFAULT |
	       GID_ATTR_FIND_MASK_DEFAULT |
@@ -772,7 +786,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
	}
	}
}
}


static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
				      struct ib_gid_table *table)
				      struct ib_gid_table *table)
{
{
	unsigned int i;
	unsigned int i;
@@ -783,8 +797,7 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
	num_default_gids = hweight_long(roce_gid_type_mask);
	num_default_gids = hweight_long(roce_gid_type_mask);
	for (i = 0; i < num_default_gids && i < table->sz; i++) {
	for (i = 0; i < num_default_gids && i < table->sz; i++) {
		struct ib_gid_table_entry *entry =
		struct ib_gid_table_entry *entry = &table->data_vec[i];
			&table->data_vec[i];


		entry->props |= GID_TABLE_ENTRY_DEFAULT;
		entry->props |= GID_TABLE_ENTRY_DEFAULT;
		current_gid = find_next_bit(&roce_gid_type_mask,
		current_gid = find_next_bit(&roce_gid_type_mask,
@@ -792,59 +805,42 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
					    current_gid);
					    current_gid);
		entry->attr.gid_type = current_gid++;
		entry->attr.gid_type = current_gid++;
	}
	}
}


	return 0;

static void gid_table_release_one(struct ib_device *ib_dev)
{
	struct ib_gid_table *table;
	u8 port;

	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		table = ib_dev->cache.ports[port].gid;
		release_gid_table(table);
		ib_dev->cache.ports[port].gid = NULL;
	}
}
}


static int _gid_table_setup_one(struct ib_device *ib_dev)
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
{
	u8 port;
	u8 port;
	struct ib_gid_table *table;
	struct ib_gid_table *table;
	int err = 0;


	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		u8 rdma_port = port + rdma_start_port(ib_dev);
		u8 rdma_port = port + rdma_start_port(ib_dev);


		table =
		table =	alloc_gid_table(
			alloc_gid_table(
				ib_dev->port_immutable[rdma_port].gid_tbl_len);
				ib_dev->port_immutable[rdma_port].gid_tbl_len);
		if (!table) {
		if (!table)
			err = -ENOMEM;
			goto rollback_table_setup;
			goto rollback_table_setup;
		}


		err = gid_table_reserve_default(ib_dev,
		gid_table_reserve_default(ib_dev, rdma_port, table);
						port + rdma_start_port(ib_dev),
						table);
		if (err)
			goto rollback_table_setup;
		ib_dev->cache.ports[port].gid = table;
		ib_dev->cache.ports[port].gid = table;
	}
	}

	return 0;
	return 0;


rollback_table_setup:
rollback_table_setup:
	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
	gid_table_release_one(ib_dev);
		table = ib_dev->cache.ports[port].gid;
	return -ENOMEM;

		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
				       table);
		release_gid_table(table);
	}

	return err;
}

static void gid_table_release_one(struct ib_device *ib_dev)
{
	struct ib_gid_table *table;
	u8 port;

	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		table = ib_dev->cache.ports[port].gid;
		release_gid_table(table);
		ib_dev->cache.ports[port].gid = NULL;
	}
}
}


static void gid_table_cleanup_one(struct ib_device *ib_dev)
static void gid_table_cleanup_one(struct ib_device *ib_dev)
@@ -886,7 +882,7 @@ int ib_get_cached_gid(struct ib_device *device,
	if (!rdma_is_port_valid(device, port_num))
	if (!rdma_is_port_valid(device, port_num))
		return -EINVAL;
		return -EINVAL;


	table = device->cache.ports[port_num - rdma_start_port(device)].gid;
	table = rdma_gid_table(device, port_num);
	read_lock_irqsave(&table->rwlock, flags);
	read_lock_irqsave(&table->rwlock, flags);
	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
	read_unlock_irqrestore(&table->rwlock, flags);
	read_unlock_irqrestore(&table->rwlock, flags);
@@ -1104,7 +1100,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,


	gid_attr.device = device;
	gid_attr.device = device;
	gid_attr.port_num = port;
	gid_attr.port_num = port;
	table = device->cache.ports[port - rdma_start_port(device)].gid;
	table = rdma_gid_table(device, port);


	mutex_lock(&table->lock);
	mutex_lock(&table->lock);
	for (i = 0; i < gid_tbl_len; ++i) {
	for (i = 0; i < gid_tbl_len; ++i) {
@@ -1137,7 +1133,7 @@ static void ib_cache_update(struct ib_device *device,
	if (!rdma_is_port_valid(device, port))
	if (!rdma_is_port_valid(device, port))
		return;
		return;


	table = device->cache.ports[port - rdma_start_port(device)].gid;
	table = rdma_gid_table(device, port);


	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
	if (!tprops)
	if (!tprops)
@@ -1300,13 +1296,3 @@ void ib_cache_cleanup_one(struct ib_device *device)
	flush_workqueue(ib_wq);
	flush_workqueue(ib_wq);
	gid_table_cleanup_one(device);
	gid_table_cleanup_one(device);
}
}

void __init ib_cache_setup(void)
{
	roce_gid_mgmt_init();
}

void __exit ib_cache_cleanup(void)
{
	roce_gid_mgmt_cleanup();
}
+51 −11
Original line number Original line Diff line number Diff line
@@ -452,6 +452,32 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
	cm_id_priv->private_data_len = private_data_len;
	cm_id_priv->private_data_len = private_data_len;
}
}


static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
			      struct ib_grh *grh, struct cm_av *av)
{
	struct rdma_ah_attr new_ah_attr;
	int ret;

	av->port = port;
	av->pkey_index = wc->pkey_index;

	/*
	 * av->ah_attr might be initialized based on past wc during incoming
	 * connect request or while sending out connect request. So initialize
	 * a new ah_attr on stack. If initialization fails, old ah_attr is
	 * used for sending any responses. If initialization is successful,
	 * than new ah_attr is used by overwriting old one.
	 */
	ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
				      port->port_num, wc,
				      grh, &new_ah_attr);
	if (ret)
		return ret;

	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
	return 0;
}

static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
				   struct ib_grh *grh, struct cm_av *av)
				   struct ib_grh *grh, struct cm_av *av)
{
{
@@ -509,6 +535,7 @@ static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
			      struct cm_id_private *cm_id_priv)
			      struct cm_id_private *cm_id_priv)
{
{
	struct rdma_ah_attr new_ah_attr;
	struct cm_device *cm_dev;
	struct cm_device *cm_dev;
	struct cm_port *port;
	struct cm_port *port;
	int ret;
	int ret;
@@ -524,15 +551,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
		return ret;
		return ret;


	av->port = port;
	av->port = port;

	/*
	 * av->ah_attr might be initialized based on wc or during
	 * request processing time. So initialize a new ah_attr on stack.
	 * If initialization fails, old ah_attr is used for sending any
	 * responses. If initialization is successful, than new ah_attr
	 * is used by overwriting the old one.
	 */
	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
					&av->ah_attr);
					&new_ah_attr);
	if (ret)
	if (ret)
		return ret;
		return ret;


	av->timeout = path->packet_life_time + 1;
	av->timeout = path->packet_life_time + 1;


	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
	if (ret)
		return ret;
		return ret;
	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
	return 0;
}
}


static int cm_alloc_id(struct cm_id_private *cm_id_priv)
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -1669,7 +1707,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
		spin_lock_irq(&cm_id_priv->lock);
		spin_lock_irq(&cm_id_priv->lock);
		work = cm_dequeue_work(cm_id_priv);
		work = cm_dequeue_work(cm_id_priv);
		spin_unlock_irq(&cm_id_priv->lock);
		spin_unlock_irq(&cm_id_priv->lock);
		BUG_ON(!work);
		if (!work)
			return;

		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
						&work->cm_event);
						&work->cm_event);
		cm_free_work(work);
		cm_free_work(work);
@@ -3189,12 +3229,6 @@ static int cm_lap_handler(struct cm_work *work)
	if (!cm_id_priv)
	if (!cm_id_priv)
		return -EINVAL;
		return -EINVAL;


	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
				      work->mad_recv_wc->recv_buf.grh,
				      &cm_id_priv->av);
	if (ret)
		goto deref;

	param = &work->cm_event.param.lap_rcvd;
	param = &work->cm_event.param.lap_rcvd;
	memset(&work->path[0], 0, sizeof(work->path[1]));
	memset(&work->path[0], 0, sizeof(work->path[1]));
	cm_path_set_rec_type(work->port->cm_dev->ib_device,
	cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3239,10 +3273,16 @@ static int cm_lap_handler(struct cm_work *work)
		goto unlock;
		goto unlock;
	}
	}


	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
	cm_id_priv->tid = lap_msg->hdr.tid;
				 work->mad_recv_wc->recv_buf.grh,
				 &cm_id_priv->av);
	if (ret)
		goto unlock;

	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
			   cm_id_priv);
			   cm_id_priv);
	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
	cm_id_priv->tid = lap_msg->hdr.tid;
	ret = atomic_inc_and_test(&cm_id_priv->work_count);
	ret = atomic_inc_and_test(&cm_id_priv->work_count);
	if (!ret)
	if (!ret)
		list_add_tail(&work->list, &cm_id_priv->work_list);
		list_add_tail(&work->list, &cm_id_priv->work_list);
Loading