Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0dc12e8 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Linus Torvalds
Browse files

mm/memory_hotplug: optimize memory hotplug

During memory hotplugging we traverse struct pages three times:

1. memset(0) in sparse_add_one_section()
2. loop in __add_section() to set do: set_page_node(page, nid); and
   SetPageReserved(page);
3. loop in memmap_init_zone() to call __init_single_pfn()

This patch removes the first two loops, and leaves only loop 3.  All
struct pages are initialized in one place, the same as it is done during
boot.

The benefits:

 - We improve memory hotplug performance because we are not evicting the
   cache several times and also reduce loop branching overhead.

 - Remove condition from hotpath in __init_single_pfn(), that was added
   in order to fix the problem that was reported by Bharata in the above
   email thread, thus also improve performance during normal boot.

 - Make memory hotplug more similar to the boot memory initialization
   path because we zero and initialize struct pages only in one
   function.

 - Simplifies memory hotplug struct page initialization code, and thus
   enables future improvements, such as multi-threading the
   initialization of struct pages in order to improve hotplug
   performance even further on larger machines.

[pasha.tatashin@oracle.com: v5]
  Link: http://lkml.kernel.org/r/20180228030308.1116-7-pasha.tatashin@oracle.com
Link: http://lkml.kernel.org/r/20180215165920.8570-7-pasha.tatashin@oracle.com


Signed-off-by: default avatarPavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent fc44f7f9
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -407,6 +407,8 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,

	if (!mem_blk)
		return -EFAULT;

	mem_blk->nid = nid;
	if (!node_online(nid))
		return 0;

+1 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ struct memory_block {
	void *hw;			/* optional pointer to fw/hw data */
	int (*phys_callback)(struct memory_block *);
	struct device dev;
	int nid;			/* NID for this memory block */
};

int arch_get_memory_phys_device(unsigned long start_pfn);
+8 −19
Original line number Diff line number Diff line
@@ -250,7 +250,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
		struct vmem_altmap *altmap, bool want_memblock)
{
	int ret;
	int i;

	if (pfn_valid(phys_start_pfn))
		return -EEXIST;
@@ -259,23 +258,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
	if (ret < 0)
		return ret;

	/*
	 * Make all the pages reserved so that nobody will stumble over half
	 * initialized state.
	 * FIXME: We also have to associate it with a node because page_to_nid
	 * relies on having page with the proper node.
	 */
	for (i = 0; i < PAGES_PER_SECTION; i++) {
		unsigned long pfn = phys_start_pfn + i;
		struct page *page;
		if (!pfn_valid(pfn))
			continue;

		page = pfn_to_page(pfn);
		set_page_node(page, nid);
		SetPageReserved(page);
	}

	if (!want_memblock)
		return 0;

@@ -908,8 +890,15 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
	int nid;
	int ret;
	struct memory_notify arg;
	struct memory_block *mem;

	/*
	 * We can't use pfn_to_nid() because nid might be stored in struct page
	 * which is not yet initialized. Instead, we find nid from memory block.
	 */
	mem = find_memory_block(__pfn_to_section(pfn));
	nid = mem->nid;

	nid = pfn_to_nid(pfn);
	/* associate pfn range with the zone */
	zone = move_pfn_range(online_type, nid, pfn, nr_pages);

+10 −18
Original line number Diff line number Diff line
@@ -1143,9 +1143,8 @@ static void free_one_page(struct zone *zone,
}

static void __meminit __init_single_page(struct page *page, unsigned long pfn,
				unsigned long zone, int nid, bool zero)
				unsigned long zone, int nid)
{
	if (zero)
	mm_zero_struct_page(page);
	set_page_links(page, zone, nid, pfn);
	init_page_count(page);
@@ -1160,12 +1159,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
#endif
}

static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
					int nid, bool zero)
{
	return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero);
}

#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static void __meminit init_reserved_page(unsigned long pfn)
{
@@ -1184,7 +1177,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
		if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
			break;
	}
	__init_single_pfn(pfn, zid, nid, true);
	__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
}
#else
static inline void init_reserved_page(unsigned long pfn)
@@ -1501,7 +1494,7 @@ static unsigned long __init deferred_init_pages(int nid, int zid,
		} else {
			page++;
		}
		__init_single_page(page, pfn, zid, nid, true);
		__init_single_page(page, pfn, zid, nid);
		nr_pages++;
	}
	return (nr_pages);
@@ -5434,6 +5427,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
	pg_data_t *pgdat = NODE_DATA(nid);
	unsigned long pfn;
	unsigned long nr_initialised = 0;
	struct page *page;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
	struct memblock_region *r = NULL, *tmp;
#endif
@@ -5486,6 +5480,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
#endif

not_early:
		page = pfn_to_page(pfn);
		__init_single_page(page, pfn, zone, nid);
		if (context == MEMMAP_HOTPLUG)
			SetPageReserved(page);

		/*
		 * Mark the block movable so that blocks are reserved for
		 * movable at startup. This will force kernel allocations
@@ -5502,15 +5501,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
		 * because this is done early in sparse_add_one_section
		 */
		if (!(pfn & (pageblock_nr_pages - 1))) {
			struct page *page = pfn_to_page(pfn);

			__init_single_page(page, pfn, zone, nid,
					context != MEMMAP_HOTPLUG);
			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
			cond_resched();
		} else {
			__init_single_pfn(pfn, zone, nid,
					context != MEMMAP_HOTPLUG);
		}
	}
}
+7 −1
Original line number Diff line number Diff line
@@ -779,7 +779,13 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
		goto out;
	}

	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
#ifdef CONFIG_DEBUG_VM
	/*
	 * Poison uninitialized struct pages in order to catch invalid flags
	 * combinations.
	 */
	memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
#endif

	section_mark_present(ms);