Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c929fee authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull infiniband updates from Roland Dreier:
 "Main batch of InfiniBand/RDMA changes for 3.19:

   - On-demand paging support in core midlayer and mlx5 driver.  This
     lets userspace create non-pinned memory regions and have the
     adapter HW trigger page faults.
   - iSER and IPoIB updates and fixes.
   - Low-level HW driver updates for cxgb4, mlx4 and ocrdma.
   - Other miscellaneous fixes"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (56 commits)
  IB/mlx5: Implement on demand paging by adding support for MMU notifiers
  IB/mlx5: Add support for RDMA read/write responder page faults
  IB/mlx5: Handle page faults
  IB/mlx5: Page faults handling infrastructure
  IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation
  IB/mlx5: Changes in memory region creation to support on-demand paging
  IB/mlx5: Implement the ODP capability query verb
  mlx5_core: Add support for page faults events and low level handling
  mlx5_core: Re-add MLX5_DEV_CAP_FLAG_ON_DMND_PG flag
  IB/srp: Allow newline separator for connection string
  IB/core: Implement support for MMU notifiers regarding on demand paging regions
  IB/core: Add support for on demand paging regions
  IB/core: Add flags for on demand paging support
  IB/core: Add support for extended query device caps
  IB/mlx5: Add function to read WQE from user-space
  IB/core: Add umem function to read data from user-space
  IB/core: Replace ib_umem's offset field with a full address
  IB/mlx5: Enhance UMR support to allow partial page table update
  IB/mlx5: Remove per-MR pas and dma pointers
  RDMA/ocrdma: Always resolve destination mac from GRH for UD QPs
  ...
parents 018cb13e a7cfef21
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -38,6 +38,17 @@ config INFINIBAND_USER_MEM
	depends on INFINIBAND_USER_ACCESS != n
	default y

config INFINIBAND_ON_DEMAND_PAGING
	bool "InfiniBand on-demand paging support"
	depends on INFINIBAND_USER_MEM
	select MMU_NOTIFIER
	default y
	---help---
	  On demand paging support for the InfiniBand subsystem.
	  Together with driver support this allows registration of
	  memory regions without pinning their pages, fetching the
	  pages on demand instead.

config INFINIBAND_ADDR_TRANS
	bool
	depends on INFINIBAND
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
				device.o fmr_pool.o cache.o netlink.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o

ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o

+2 −2
Original line number Diff line number Diff line
@@ -176,8 +176,8 @@ static void set_timeout(unsigned long time)
	unsigned long delay;

	delay = time - jiffies;
	if ((long)delay <= 0)
		delay = 1;
	if ((long)delay < 0)
		delay = 0;

	mod_delayed_work(addr_wq, &work, delay);
}
+8 −3
Original line number Diff line number Diff line
@@ -525,17 +525,22 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
	if (status)
		process_join_error(group, status);
	else {
		int mgids_changed, is_mgid0;
		ib_find_pkey(group->port->dev->device, group->port->port_num,
			     be16_to_cpu(rec->pkey), &pkey_index);

		spin_lock_irq(&group->port->lock);
		group->rec = *rec;
		if (group->state == MCAST_BUSY &&
		    group->pkey_index == MCAST_INVALID_PKEY_INDEX)
			group->pkey_index = pkey_index;
		if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
		mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
				       sizeof(group->rec.mgid));
		group->rec = *rec;
		if (mgids_changed) {
			rb_erase(&group->node, &group->port->table);
			mcast_insert(group->port, group, 1);
			is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
					   sizeof(mgid0));
			mcast_insert(group->port, group, is_mgid0);
		}
		spin_unlock_irq(&group->port->lock);
	}
+66 −6
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include <linux/slab.h>
#include <rdma/ib_umem_odp.h>

#include "uverbs.h"

@@ -69,6 +70,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d

/**
 * ib_umem_get - Pin and DMA map userspace memory.
 *
 * If access flags indicate ODP memory, avoid pinning. Instead, stores
 * the mm for future page fault handling in conjunction with MMU notifiers.
 *
 * @context: userspace context to pin memory for
 * @addr: userspace virtual address to start at
 * @size: length of region to pin
@@ -103,17 +108,30 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,

	umem->context   = context;
	umem->length    = size;
	umem->offset    = addr & ~PAGE_MASK;
	umem->address   = addr;
	umem->page_size = PAGE_SIZE;
	umem->pid       = get_task_pid(current, PIDTYPE_PID);
	/*
	 * We ask for writable memory if any access flags other than
	 * "remote read" are set.  "Local write" and "remote write"
	 * We ask for writable memory if any of the following
	 * access flags are set.  "Local write" and "remote write"
	 * obviously require write access.  "Remote atomic" can do
	 * things like fetch and add, which will modify memory, and
	 * "MW bind" can change permissions by binding a window.
	 */
	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
	umem->writable  = !!(access &
		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));

	if (access & IB_ACCESS_ON_DEMAND) {
		ret = ib_umem_odp_get(context, umem);
		if (ret) {
			kfree(umem);
			return ERR_PTR(ret);
		}
		return umem;
	}

	umem->odp_data = NULL;

	/* We assume the memory is from hugetlb until proved otherwise */
	umem->hugetlb   = 1;
@@ -132,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
	if (!vma_list)
		umem->hugetlb = 0;

	npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
	npages = ib_umem_num_pages(umem);

	down_write(&current->mm->mmap_sem);

@@ -235,6 +253,11 @@ void ib_umem_release(struct ib_umem *umem)
	struct task_struct *task;
	unsigned long diff;

	if (umem->odp_data) {
		ib_umem_odp_release(umem);
		return;
	}

	__ib_umem_release(umem->context->device, umem, 1);

	task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -246,7 +269,7 @@ void ib_umem_release(struct ib_umem *umem)
	if (!mm)
		goto out;

	diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
	diff = ib_umem_num_pages(umem);

	/*
	 * We may be called with the mm's mmap_sem already held.  This
@@ -283,6 +306,9 @@ int ib_umem_page_count(struct ib_umem *umem)
	int n;
	struct scatterlist *sg;

	if (umem->odp_data)
		return ib_umem_num_pages(umem);

	shift = ilog2(umem->page_size);

	n = 0;
@@ -292,3 +318,37 @@ int ib_umem_page_count(struct ib_umem *umem)
	return n;
}
EXPORT_SYMBOL(ib_umem_page_count);

/*
 * Copy from the given ib_umem's pages to the given buffer.
 *
 * umem - the umem to copy from
 * offset - offset to start copying from
 * dst - destination buffer
 * length - buffer length
 *
 * Returns 0 on success, or an error code.
 */
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
		      size_t length)
{
	size_t end = offset + length;
	int ret;

	if (offset > umem->length || length > umem->length - offset) {
		pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
		       offset, umem->length, end);
		return -EINVAL;
	}

	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
				 offset + ib_umem_offset(umem));

	if (ret < 0)
		return ret;
	else if (ret != length)
		return -EINVAL;
	else
		return 0;
}
EXPORT_SYMBOL(ib_umem_copy_from);
Loading