Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc5dfd59 authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

Merge branch 'hmm-devmem-cleanup.4' into rdma.git hmm



Christoph Hellwig says:

====================
Below is a series that cleans up the dev_pagemap interface so that it is
more easily usable, which removes the need to wrap it in hmm and thus
allowing to kill a lot of code

Changes since v3:
 - pull in "mm/swap: Fix release_pages() when releasing devmap pages" and
   rebase the other patches on top of that
 - fold the hmm_devmem_add_resource into the DEVICE_PUBLIC memory removal
   patch
 - remove _vm_normal_page as it isn't needed without DEVICE_PUBLIC memory
 - pick up various ACKs

Changes since v2:
 - fix nvdimm kunit build
 - add a new memory type for device dax
 - fix a few issues in intermediate patches that didn't show up in the end
   result
 - incorporate feedback from Michal Hocko, including killing of
   the DEVICE_PUBLIC memory type entirely

Changes since v1:
 - rebase
 - also switch p2pdma to the internal refcount
 - add type checking for pgmap->type
 - rename the migrate method to migrate_to_ram
 - cleanup the altmap_valid flag
 - various tidbits from the reviews
====================

Conflicts resolved by:
 - Keeping Ira's version of the code in swap.c
 - Using the delete for the section in hmm.rst
 - Using the delete for the devmap code in hmm.c and .h

* branch 'hmm-devmem-cleanup.4': (24 commits)
  mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  mm: remove the HMM config option
  mm: sort out the DEVICE_PRIVATE Kconfig mess
  mm: simplify ZONE_DEVICE page private data
  mm: remove hmm_devmem_add
  mm: remove hmm_vma_alloc_locked_page
  nouveau: use devm_memremap_pages directly
  nouveau: use alloc_page_vma directly
  PCI/P2PDMA: use the dev_pagemap internal refcount
  device-dax: use the dev_pagemap internal refcount
  memremap: provide an optional internal refcount in struct dev_pagemap
  memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag
  memremap: remove the data field in struct dev_pagemap
  memremap: add a migrate_to_ram method to struct dev_pagemap_ops
  memremap: lift the devmap_enable manipulation into devm_memremap_pages
  memremap: pass a struct dev_pagemap to ->kill and ->cleanup
  memremap: move dev_pagemap callbacks into a separate structure
  memremap: validate the pagemap type passed to devm_memremap_pages
  mm: factor out a devm_request_free_mem_region helper
  mm: export alloc_pages_vma
  ...

Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parents 9ec3f4cb b6b346a0
Loading
Loading
Loading
Loading
+0 −27
Original line number Diff line number Diff line
@@ -336,33 +336,6 @@ directly using struct page for device memory which left most kernel code paths
unaware of the difference. We only need to make sure that no one ever tries to
map those pages from the CPU side.

HMM provides a set of helpers to register and hotplug device memory as a new
region needing a struct page. This is offered through a very simple API::

 struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
                                   struct device *device,
                                   unsigned long size);
 void hmm_devmem_remove(struct hmm_devmem *devmem);

The hmm_devmem_ops is where most of the important things are::

 struct hmm_devmem_ops {
     void (*free)(struct hmm_devmem *devmem, struct page *page);
     vm_fault_t (*fault)(struct hmm_devmem *devmem,
                  struct vm_area_struct *vma,
                  unsigned long addr,
                  struct page *page,
                  unsigned flags,
                  pmd_t *pmdp);
 };

The first callback (free()) happens when the last reference on a device page is
dropped. This means the device page is now free and no longer used by anyone.
The second callback happens whenever the CPU tries to access a device page
which it cannot do. This second callback must trigger a migration back to
system memory.


Migration to and from device memory
===================================

+1 −9
Original line number Diff line number Diff line
@@ -131,17 +131,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long nr_pages = size >> PAGE_SHIFT;
	struct page *page;
	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
	int ret;

	/*
	 * If we have an altmap then we need to skip over any reserved PFNs
	 * when querying the zone.
	 */
	page = pfn_to_page(start_pfn);
	if (altmap)
		page += vmem_altmap_offset(altmap);

	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);

	/* Remove htab bolted mappings for this section of memory */
+2 −6
Original line number Diff line number Diff line
@@ -1213,13 +1213,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long nr_pages = size >> PAGE_SHIFT;
	struct page *page = pfn_to_page(start_pfn);
	struct zone *zone;
	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
	struct zone *zone = page_zone(page);

	/* With altmap the first mapped page is offset from @start */
	if (altmap)
		page += vmem_altmap_offset(altmap);
	zone = page_zone(page);
	__remove_pages(zone, start_pfn, nr_pages, altmap);
	kernel_physical_mapping_remove(start, start + size);
}
+0 −4
Original line number Diff line number Diff line
@@ -43,8 +43,6 @@ struct dax_region {
 * @target_node: effective numa node if dev_dax memory range is onlined
 * @dev - device core
 * @pgmap - pgmap for memmap setup / lifetime (driver owned)
 * @ref: pgmap reference count (driver owned)
 * @cmp: @ref final put completion (driver owned)
 */
struct dev_dax {
	struct dax_region *region;
@@ -52,8 +50,6 @@ struct dev_dax {
	int target_node;
	struct device dev;
	struct dev_pagemap pgmap;
	struct percpu_ref ref;
	struct completion cmp;
};

static inline struct dev_dax *to_dev_dax(struct device *dev)
+1 −40
Original line number Diff line number Diff line
@@ -14,37 +14,6 @@
#include "dax-private.h"
#include "bus.h"

static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
{
	return container_of(ref, struct dev_dax, ref);
}

static void dev_dax_percpu_release(struct percpu_ref *ref)
{
	struct dev_dax *dev_dax = ref_to_dev_dax(ref);

	dev_dbg(&dev_dax->dev, "%s\n", __func__);
	complete(&dev_dax->cmp);
}

static void dev_dax_percpu_exit(struct percpu_ref *ref)
{
	struct dev_dax *dev_dax = ref_to_dev_dax(ref);

	dev_dbg(&dev_dax->dev, "%s\n", __func__);
	wait_for_completion(&dev_dax->cmp);
	percpu_ref_exit(ref);
}

static void dev_dax_percpu_kill(struct percpu_ref *data)
{
	struct percpu_ref *ref = data;
	struct dev_dax *dev_dax = ref_to_dev_dax(ref);

	dev_dbg(&dev_dax->dev, "%s\n", __func__);
	percpu_ref_kill(ref);
}

static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
		const char *func)
{
@@ -459,15 +428,7 @@ int dev_dax_probe(struct device *dev)
		return -EBUSY;
	}

	init_completion(&dev_dax->cmp);
	rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
			GFP_KERNEL);
	if (rc)
		return rc;

	dev_dax->pgmap.ref = &dev_dax->ref;
	dev_dax->pgmap.kill = dev_dax_percpu_kill;
	dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
	dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
	if (IS_ERR(addr))
		return PTR_ERR(addr);
Loading