Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03e85f9d authored by Oscar Salvador's avatar Oscar Salvador Committed by Linus Torvalds
Browse files

mm/page_alloc: Introduce free_area_init_core_hotplug

Currently, whenever a new node is created/re-used from the memhotplug
path, we call free_area_init_node()->free_area_init_core().  But there is
some code that we do not really need to run when we are coming from such
path.

free_area_init_core() performs the following actions:

1) Initializes pgdat internals, such as spinlock, waitqueues and more.
2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on
   when creating hash tables.
3) Account number of managed_pages per zone, substracting dma_reserved and
   memmap pages.
4) Initializes some fields of the zone structure data
5) Calls init_currently_empty_zone to initialize all the freelists
6) Calls memmap_init to initialize all pages belonging to certain zone

When called from memhotplug path, free_area_init_core() only performs
actions #1 and #4.

Action #2 is pointless as the zones do not have any pages since either the
node was freed, or we are re-using it, eitherway all zones belonging to
this node should have 0 pages.  For the same reason, action #3 results
always in manages_pages being 0.

Action #5 and #6 are performed later on when onlining the pages:
 online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone()
 online_pages()->move_pfn_range_to_zone()->memmap_init_zone()

This patch does two things:

First, moves the node/zone initializtion to their own function, so it
allows us to create a small version of free_area_init_core, where we only
perform:

1) Initialization of pgdat internals, such as spinlock, waitqueues and more
4) Initialization of some fields of the zone structure data

These two functions are: pgdat_init_internals() and zone_init_internals().

The second thing this patch does, is to introduce
free_area_init_core_hotplug(), the memhotplug version of
free_area_init_core():

Currently, we call free_area_init_node() from the memhotplug path.  In
there, we set some pgdat's fields, and call calculate_node_totalpages().
calculate_node_totalpages() calculates the # of pages the node has.

Since the node is either new, or we are re-using it, the zones belonging
to this node should not have any pages, so there is no point to calculate
this now.

Actually, we re-set these values to 0 later on with the calls to:

reset_node_managed_pages()
reset_node_present_pages()

The # of pages per node and the # of pages per zone will be calculated when
onlining the pages:

online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range()
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range()

Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace
__paginginit with __init, so their code gets freed up.

[osalvador@techadventures.net: fix section usage]
  Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net
[osalvador@suse.de: v6]
  Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net
Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net


Signed-off-by: default avatarOscar Salvador <osalvador@suse.de>
Reviewed-by: default avatarPavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0188dc98
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
static inline void remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */

extern void __ref free_area_init_core_hotplug(int nid);
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
		void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
+1 −1
Original line number Diff line number Diff line
@@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)

extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
extern void __init free_area_init_node(int nid, unsigned long * zones_size,
		unsigned long zone_start_pfn, unsigned long *zholes_size);
extern void free_initmem(void);

+5 −11
Original line number Diff line number Diff line
@@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
	struct pglist_data *pgdat;
	unsigned long zones_size[MAX_NR_ZONES] = {0};
	unsigned long zholes_size[MAX_NR_ZONES] = {0};
	unsigned long start_pfn = PFN_DOWN(start);

	pgdat = NODE_DATA(nid);
@@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)

	/* we can use NODE_DATA(nid) from here */

	pgdat->node_id = nid;
	pgdat->node_start_pfn = start_pfn;

	/* init node's zones as empty zones, we don't have any present pages.*/
	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
	free_area_init_core_hotplug(nid);
	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);

	/*
@@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
	 */
	build_all_zonelists(pgdat);

	/*
	 * zone->managed_pages is set to an approximate value in
	 * free_area_init_core(), which will cause
	 * /sys/device/system/node/nodeX/meminfo has wrong data.
	 * So reset it to 0 before any memory is onlined.
	 */
	reset_node_managed_pages(pgdat);

	/*
	 * When memory is hot-added, all the memory is in offline state. So
	 * clear all zones' present_pages because they will be updated in
	 * online_pages() and offline_pages().
	 */
	reset_node_managed_pages(pgdat);
	reset_node_present_pages(pgdat);

	return pgdat;
+54 −24
Original line number Diff line number Diff line
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE

/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
void __meminit set_pageblock_order(void)
void __init set_pageblock_order(void)
{
	unsigned int order;

@@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
 * include/linux/pageblock-flags.h for the values of pageblock_order based on
 * the kernel config
 */
void __meminit set_pageblock_order(void)
void __init set_pageblock_order(void)
{
}

#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */

static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages,
static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
						unsigned long present_pages)
{
	unsigned long pages = spanned_pages;
@@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
#endif

/*
 * Set up the zone data structures:
 *   - mark all pages reserved
 *   - mark all memory queues empty
 *   - clear the memory bitmaps
 *
 * NOTE: pgdat should get zeroed by caller.
 */
static void __meminit free_area_init_core(struct pglist_data *pgdat)
static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
{
	enum zone_type j;
	int nid = pgdat->node_id;

	pgdat_resize_init(pgdat);

	pgdat_init_numabalancing(pgdat);
@@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
	pgdat_page_ext_init(pgdat);
	spin_lock_init(&pgdat->lru_lock);
	lruvec_init(node_lruvec(pgdat));
}

static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
							unsigned long remaining_pages)
{
	zone->managed_pages = remaining_pages;
	zone_set_nid(zone, nid);
	zone->name = zone_names[idx];
	zone->zone_pgdat = NODE_DATA(nid);
	spin_lock_init(&zone->lock);
	zone_seqlock_init(zone);
	zone_pcp_init(zone);
}

/*
 * Set up the zone data structures
 * - init pgdat internals
 * - init all zones belonging to this node
 *
 * NOTE: this function is only called during memory hotplug
 */
#ifdef CONFIG_MEMORY_HOTPLUG
void __ref free_area_init_core_hotplug(int nid)
{
	enum zone_type z;
	pg_data_t *pgdat = NODE_DATA(nid);

	pgdat_init_internals(pgdat);
	for (z = 0; z < MAX_NR_ZONES; z++)
		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
}
#endif

/*
 * Set up the zone data structures:
 *   - mark all pages reserved
 *   - mark all memory queues empty
 *   - clear the memory bitmaps
 *
 * NOTE: pgdat should get zeroed by caller.
 * NOTE: this function is only called during early init.
 */
static void __init free_area_init_core(struct pglist_data *pgdat)
{
	enum zone_type j;
	int nid = pgdat->node_id;

	pgdat_init_internals(pgdat);
	pgdat->per_cpu_nodestats = &boot_nodestats;

	for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
		 * when the bootmem allocator frees pages into the buddy system.
		 * And all highmem pages will be managed by the buddy system.
		 */
		zone->managed_pages = freesize;
		zone_set_nid(zone, nid);
		zone->name = zone_names[j];
		zone->zone_pgdat = pgdat;
		spin_lock_init(&zone->lock);
		zone_seqlock_init(zone);
		zone_pcp_init(zone);
		zone_init_internals(zone, j, nid, freesize);

		if (!size)
			continue;
@@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
#endif

void __meminit free_area_init_node(int nid, unsigned long *zones_size,
void __init free_area_init_node(int nid, unsigned long *zones_size,
				   unsigned long node_start_pfn,
				   unsigned long *zholes_size)
{
@@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
 * may be accessed (for example page_to_pfn() on some configuration accesses
 * flags). We must explicitly zero those struct pages.
 */
void __meminit zero_resv_unavail(void)
void __init zero_resv_unavail(void)
{
	phys_addr_t start, end;
	unsigned long pfn;