Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d10bcf94 authored by Shiraz Saleem's avatar Shiraz Saleem Committed by Jason Gunthorpe
Browse files

RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs



Combine contiguous regions of PAGE_SIZE pages into single scatter list
entry while building the scatter table for a umem. This minimizes the
number of the entries in the scatter list and reduces the DMA mapping
overhead, particularly with the IOMMU.

Set default max_seg_size in core for IB devices to 2G and do not combine
if we exceed this limit.

Also, purge npages in struct ib_umem as we now DMA map the umem SGL with
sg_nents and npage computation is not needed. Drivers should now be using
ib_umem_num_pages(), so fix the last stragglers.

Move npages tracking to ib_umem_odp as ODP drivers still need it.

Suggested-by: default avatarJason Gunthorpe <jgg@ziepe.ca>
Reviewed-by: default avatarMichael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Acked-by: default avatarAdit Ranadive <aditr@vmware.com>
Signed-off-by: default avatarShiraz Saleem <shiraz.saleem@intel.com>
Tested-by: default avatarGal Pressman <galpress@amazon.com>
Tested-by: default avatarSelvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent c7252a65
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -1089,6 +1089,9 @@ static void setup_dma_device(struct ib_device *device)
		WARN_ON_ONCE(!parent);
		device->dma_device = parent;
	}
	/* Setup default max segment size for all IB devices */
	dma_set_max_seg_size(device->dma_device, SZ_2G);

}

/*
+81 −20
Original line number Diff line number Diff line
@@ -39,25 +39,22 @@
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <rdma/ib_umem_odp.h>

#include "uverbs.h"


static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
	struct scatterlist *sg;
	struct sg_page_iter sg_iter;
	struct page *page;
	int i;

	if (umem->nmap > 0)
		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
				umem->npages,
		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
				DMA_BIDIRECTIONAL);

	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {

		page = sg_page(sg);
	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
		page = sg_page_iter_page(&sg_iter);
		if (!PageDirty(page) && umem->writable && dirty)
			set_page_dirty_lock(page);
		put_page(page);
@@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
	sg_free_table(&umem->sg_head);
}

/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
 *
 * sg: current scatterlist entry
 * page_list: array of npage struct page pointers
 * npages: number of pages in page_list
 * max_seg_sz: maximum segment size in bytes
 * nents: [out] number of entries in the scatterlist
 *
 * Return new end of scatterlist
 */
static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
						struct page **page_list,
						unsigned long npages,
						unsigned int max_seg_sz,
						int *nents)
{
	unsigned long first_pfn;
	unsigned long i = 0;
	bool update_cur_sg = false;
	bool first = !sg_page(sg);

	/* Check if new page_list is contiguous with end of previous page_list.
	 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
	 */
	if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
		       page_to_pfn(page_list[0])))
		update_cur_sg = true;

	while (i != npages) {
		unsigned long len;
		struct page *first_page = page_list[i];

		first_pfn = page_to_pfn(first_page);

		/* Compute the number of contiguous pages we have starting
		 * at i
		 */
		for (len = 0; i != npages &&
			      first_pfn + len == page_to_pfn(page_list[i]);
		     len++)
			i++;

		/* Squash N contiguous pages from page_list into current sge */
		if (update_cur_sg &&
		    ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) {
			sg_set_page(sg, sg_page(sg),
				    sg->length + (len << PAGE_SHIFT), 0);
			update_cur_sg = false;
			continue;
		}

		/* Squash N contiguous pages into next sge or first sge */
		if (!first)
			sg = sg_next(sg);

		(*nents)++;
		sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
		first = false;
	}

	return sg;
}

/**
 * ib_umem_get - Pin and DMA map userspace memory.
 *
@@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
	int ret;
	int i;
	unsigned long dma_attrs = 0;
	struct scatterlist *sg, *sg_list_start;
	struct scatterlist *sg;
	unsigned int gup_flags = FOLL_WRITE;

	if (!udata)
@@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
	if (!umem->writable)
		gup_flags |= FOLL_FORCE;

	sg_list_start = umem->sg_head.sgl;
	sg = umem->sg_head.sgl;

	while (npages) {
		down_read(&mm->mmap_sem);
@@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
			goto umem_release;
		}

		umem->npages += ret;
		cur_base += ret * PAGE_SIZE;
		npages   -= ret;

		sg = ib_umem_add_sg_table(sg, page_list, ret,
			dma_get_max_seg_size(context->device->dma_device),
			&umem->sg_nents);

		/* Continue to hold the mmap_sem as vma_list access
		 * needs to be protected.
		 */
		for_each_sg(sg_list_start, sg, ret, i) {
		for (i = 0; i < ret && umem->hugetlb; i++) {
			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
				umem->hugetlb = 0;

			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
		}
		up_read(&mm->mmap_sem);

		/* preparing for next loop */
		sg_list_start = sg;
		up_read(&mm->mmap_sem);
	}

	sg_mark_end(sg);

	umem->nmap = ib_dma_map_sg_attrs(context->device,
				  umem->sg_head.sgl,
				  umem->npages,
				  umem->sg_nents,
				  DMA_BIDIRECTIONAL,
				  dma_attrs);

@@ -320,8 +381,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
		return -EINVAL;
	}

	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
				 offset + ib_umem_offset(umem));
	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem),
				 dst, length, offset + ib_umem_offset(umem));

	if (ret < 0)
		return ret;
+2 −2
Original line number Diff line number Diff line
@@ -526,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
		}
		umem_odp->dma_list[page_index] = dma_addr | access_mask;
		umem_odp->page_list[page_index] = page;
		umem->npages++;
		umem_odp->npages++;
	} else if (umem_odp->page_list[page_index] == page) {
		umem_odp->dma_list[page_index] |= access_mask;
	} else {
@@ -752,7 +752,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
			}
			umem_odp->page_list[idx] = NULL;
			umem_odp->dma_list[idx] = 0;
			umem->npages--;
			umem_odp->npages--;
		}
	}
	mutex_unlock(&umem_odp->umem_mutex);
+1 −1
Original line number Diff line number Diff line
@@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,

	ib_umem_odp_unmap_dma_pages(umem_odp, start, end);

	if (unlikely(!umem->npages && mr->parent &&
	if (unlikely(!umem_odp->npages && mr->parent &&
		     !umem_odp->dying)) {
		WRITE_ONCE(umem_odp->dying, 1);
		atomic_inc(&mr->parent->num_leaf_free);
+6 −5
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
	union pvrdma_cmd_resp rsp;
	struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
	struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
	int ret;
	int ret, npages;

	if (length == 0 || length > dev->dsr->caps.max_mr_size) {
		dev_warn(&dev->pdev->dev, "invalid mem region length\n");
@@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
		return ERR_CAST(umem);
	}

	if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
	npages = ib_umem_num_pages(umem);
	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
		dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
			 umem->npages);
			 npages);
		ret = -EINVAL;
		goto err_umem;
	}
@@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
	mr->mmr.size = length;
	mr->umem = umem;

	ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
	ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false);
	if (ret) {
		dev_warn(&dev->pdev->dev,
			 "could not allocate page directory\n");
@@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
	cmd->length = length;
	cmd->pd_handle = to_vpd(pd)->pd_handle;
	cmd->access_flags = access_flags;
	cmd->nchunks = umem->npages;
	cmd->nchunks = npages;
	cmd->pdir_dma = mr->pdir.dir_dma;

	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
Loading