Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0c0e6195 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds
Browse files

memory unplug: page offline



Logic.
 - set all pages in  [start,end)  as isolated migration-type.
   by this, all free pages in the range will be not-for-use.
 - Migrate all LRU pages in the range.
 - Test all pages in the range's refcnt is zero or not.

Todo:
 - allocate migration destination page from better area.
 - confirm page_count(page)== 0 && PageReserved(page) page is safe to be freed..
 (I don't like this kind of page but..
 - Find out pages which cannot be migrated.
 - more running tests.
 - Use reclaim for unplugging other memory type area.

Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarYasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a5d76b54
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -305,6 +305,9 @@ config HOTPLUG_CPU
config ARCH_ENABLE_MEMORY_HOTPLUG
	def_bool y

config ARCH_ENABLE_MEMORY_HOTREMOVE
	def_bool y

config SCHED_SMT
	bool "SMT scheduler support"
	depends on SMP
+1 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ extern const char linux_proc_banner[];
#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
#define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
#define IS_ALIGNED(x,a)		(((x) % ((typeof(x))(a))) == 0)

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))

+4 −1
Original line number Diff line number Diff line
@@ -58,7 +58,10 @@ extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
extern void online_page(struct page *page);
/* VM interface that may be used by firmware interface */
extern int online_pages(unsigned long, unsigned long);

#ifdef CONFIG_MEMORY_HOTREMOVE
extern int offline_pages(unsigned long, unsigned long, unsigned long);
extern void __offline_isolated_pages(unsigned long, unsigned long);
#endif
/* reasonably generic interface to expand the physical pages in a zone  */
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
	unsigned long nr_pages);
+5 −0
Original line number Diff line number Diff line
@@ -139,6 +139,11 @@ config MEMORY_HOTPLUG_SPARSE
	def_bool y
	depends on SPARSEMEM && MEMORY_HOTPLUG

config MEMORY_HOTREMOVE
	bool "Allow for memory hot remove"
	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
	depends on MIGRATION

# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
# space can be handled with less contention: split it at this NR_CPUS.
+254 −0
Original line number Diff line number Diff line
@@ -23,6 +23,9 @@
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/cpuset.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>

#include <asm/tlbflush.h>

@@ -302,3 +305,254 @@ int add_memory(int nid, u64 start, u64 size)
	return ret;
}
EXPORT_SYMBOL_GPL(add_memory);

#ifdef CONFIG_MEMORY_HOTREMOVE
/*
 * Confirm all pages in a range [start, end) is belongs to the same zone.
 */
static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long pfn;
	struct zone *zone = NULL;
	struct page *page;
	int i;
	for (pfn = start_pfn;
	     pfn < end_pfn;
	     pfn += MAX_ORDER_NR_PAGES) {
		i = 0;
		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
			i++;
		if (i == MAX_ORDER_NR_PAGES)
			continue;
		page = pfn_to_page(pfn + i);
		if (zone && page_zone(page) != zone)
			return 0;
		zone = page_zone(page);
	}
	return 1;
}

/*
 * Scanning pfn is much easier than scanning lru list.
 * Scan pfn from start to end and Find LRU page.
 */
int scan_lru_pages(unsigned long start, unsigned long end)
{
	unsigned long pfn;
	struct page *page;
	for (pfn = start; pfn < end; pfn++) {
		if (pfn_valid(pfn)) {
			page = pfn_to_page(pfn);
			if (PageLRU(page))
				return pfn;
		}
	}
	return 0;
}

static struct page *
hotremove_migrate_alloc(struct page *page,
			unsigned long private,
			int **x)
{
	/* This should be improoooooved!! */
	return alloc_page(GFP_HIGHUSER_PAGECACHE);
}


#define NR_OFFLINE_AT_ONCE_PAGES	(256)
static int
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long pfn;
	struct page *page;
	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
	int not_managed = 0;
	int ret = 0;
	LIST_HEAD(source);

	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
		if (!pfn_valid(pfn))
			continue;
		page = pfn_to_page(pfn);
		if (!page_count(page))
			continue;
		/*
		 * We can skip free pages. And we can only deal with pages on
		 * LRU.
		 */
		ret = isolate_lru_page(page, &source);
		if (!ret) { /* Success */
			move_pages--;
		} else {
			/* Becasue we don't have big zone->lock. we should
			   check this again here. */
			if (page_count(page))
				not_managed++;
#ifdef CONFIG_DEBUG_VM
			printk(KERN_INFO "removing from LRU failed"
					 " %lx/%d/%lx\n",
				pfn, page_count(page), page->flags);
#endif
		}
	}
	ret = -EBUSY;
	if (not_managed) {
		if (!list_empty(&source))
			putback_lru_pages(&source);
		goto out;
	}
	ret = 0;
	if (list_empty(&source))
		goto out;
	/* this function returns # of failed pages */
	ret = migrate_pages(&source, hotremove_migrate_alloc, 0);

out:
	return ret;
}

/*
 * remove from free_area[] and mark all as Reserved.
 */
static int
offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
			void *data)
{
	__offline_isolated_pages(start, start + nr_pages);
	return 0;
}

static void
offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
{
	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
				offline_isolated_pages_cb);
}

/*
 * Check all pages in range, recoreded as memory resource, are isolated.
 */
static int
check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
			void *data)
{
	int ret;
	long offlined = *(long *)data;
	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
	offlined = nr_pages;
	if (!ret)
		*(long *)data += offlined;
	return ret;
}

static long
check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
{
	long offlined = 0;
	int ret;

	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
			check_pages_isolated_cb);
	if (ret < 0)
		offlined = (long)ret;
	return offlined;
}

extern void drain_all_local_pages(void);

int offline_pages(unsigned long start_pfn,
		  unsigned long end_pfn, unsigned long timeout)
{
	unsigned long pfn, nr_pages, expire;
	long offlined_pages;
	int ret, drain, retry_max;
	struct zone *zone;

	BUG_ON(start_pfn >= end_pfn);
	/* at least, alignment against pageblock is necessary */
	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
		return -EINVAL;
	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
		return -EINVAL;
	/* This makes hotplug much easier...and readable.
	   we assume this for now. .*/
	if (!test_pages_in_a_zone(start_pfn, end_pfn))
		return -EINVAL;
	/* set above range as isolated */
	ret = start_isolate_page_range(start_pfn, end_pfn);
	if (ret)
		return ret;
	nr_pages = end_pfn - start_pfn;
	pfn = start_pfn;
	expire = jiffies + timeout;
	drain = 0;
	retry_max = 5;
repeat:
	/* start memory hot removal */
	ret = -EAGAIN;
	if (time_after(jiffies, expire))
		goto failed_removal;
	ret = -EINTR;
	if (signal_pending(current))
		goto failed_removal;
	ret = 0;
	if (drain) {
		lru_add_drain_all();
		flush_scheduled_work();
		cond_resched();
		drain_all_local_pages();
	}

	pfn = scan_lru_pages(start_pfn, end_pfn);
	if (pfn) { /* We have page on LRU */
		ret = do_migrate_range(pfn, end_pfn);
		if (!ret) {
			drain = 1;
			goto repeat;
		} else {
			if (ret < 0)
				if (--retry_max == 0)
					goto failed_removal;
			yield();
			drain = 1;
			goto repeat;
		}
	}
	/* drain all zone's lru pagevec, this is asyncronous... */
	lru_add_drain_all();
	flush_scheduled_work();
	yield();
	/* drain pcp pages , this is synchrouns. */
	drain_all_local_pages();
	/* check again */
	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
	if (offlined_pages < 0) {
		ret = -EBUSY;
		goto failed_removal;
	}
	printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
	/* Ok, all of our target is islaoted.
	   We cannot do rollback at this point. */
	offline_isolated_pages(start_pfn, end_pfn);
	/* reset pagetype flags */
	start_isolate_page_range(start_pfn, end_pfn);
	/* removal success */
	zone = page_zone(pfn_to_page(start_pfn));
	zone->present_pages -= offlined_pages;
	zone->zone_pgdat->node_present_pages -= offlined_pages;
	totalram_pages -= offlined_pages;
	num_physpages -= offlined_pages;
	vm_total_pages = nr_free_pagecache_pages();
	writeback_set_ratelimit();
	return 0;

failed_removal:
	printk(KERN_INFO "memory offlining %lx to %lx failed\n",
		start_pfn, end_pfn);
	/* pushback to free area */
	undo_isolate_page_range(start_pfn, end_pfn);
	return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
Loading