Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e8589cc1 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds
Browse files

memcg: better migration handling



This patch changes page migration under memory controller to use a
different algorithm.  (thanks to Christoph for new idea.)

Before:
 - page_cgroup is migrated from an old page to a new page.
After:
 - a new page is accounted , no reuse of page_cgroup.

Pros:

 - We can avoid compliated lock depndencies and races in migration.

Cons:

 - new param to mem_cgroup_charge_common().

 - mem_cgroup_getref() is added for handling ref_cnt ping-pong.

This version simplifies complicated lock dependency in page migraiton
under memory resource controller.

  new refcnt sequence is following.

a mapped page:
  prepage_migration() ..... +1 to NEW page
  try_to_unmap()      ..... all refs to OLD page is gone.
  move_pages()        ..... +1 to NEW page if page cache.
  remap...            ..... all refs from *map* is added to NEW one.
  end_migration()     ..... -1 to New page.

  page's mapcount + (page_is_cache) refs are added to NEW one.

Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 508b7be0
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -50,9 +50,10 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
#define mm_match_cgroup(mm, cgroup)	\
	((cgroup) == mem_cgroup_from_task((mm)->owner))

extern int mem_cgroup_prepare_migration(struct page *page);
extern int
mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
extern void mem_cgroup_end_migration(struct page *page);
extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
extern int mem_cgroup_getref(struct page *page);

/*
 * For memory reclaim.
@@ -112,7 +113,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
	return 1;
}

static inline int mem_cgroup_prepare_migration(struct page *page)
static inline int
mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
	return 0;
}
@@ -121,8 +123,7 @@ static inline void mem_cgroup_end_migration(struct page *page)
{
}

static inline void
mem_cgroup_page_migration(struct page *page, struct page *newpage)
static inline void mem_cgroup_getref(struct page *page)
{
}

+65 −63
Original line number Diff line number Diff line
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 * < 0 if the cgroup is over its limit
 */
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
				gfp_t gfp_mask, enum charge_type ctype)
				gfp_t gfp_mask, enum charge_type ctype,
				struct mem_cgroup *memcg)
{
	struct mem_cgroup *mem;
	struct page_cgroup *pc;
@@ -569,6 +570,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
	 * thread group leader migrates. It's possible that mm is not
	 * set, if so charge the init_mm (happens for pagecache usage).
	 */
	if (!memcg) {
		if (!mm)
			mm = &init_mm;

@@ -579,6 +581,10 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
		 */
		css_get(&mem->css);
		rcu_read_unlock();
	} else {
		mem = memcg;
		css_get(&memcg->css);
	}

	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
		if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
{
	return mem_cgroup_charge_common(page, mm, gfp_mask,
				MEM_CGROUP_CHARGE_TYPE_MAPPED);
				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
}

int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
	if (!mm)
		mm = &init_mm;
	return mem_cgroup_charge_common(page, mm, gfp_mask,
				MEM_CGROUP_CHARGE_TYPE_CACHE);
				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
}

int mem_cgroup_getref(struct page *page)
{
	struct page_cgroup *pc;

	if (mem_cgroup_subsys.disabled)
		return 0;

	lock_page_cgroup(page);
	pc = page_get_page_cgroup(page);
	VM_BUG_ON(!pc);
	pc->ref_cnt++;
	unlock_page_cgroup(page);
	return 0;
}

/*
@@ -707,65 +728,39 @@ void mem_cgroup_uncharge_page(struct page *page)
}

/*
 * Returns non-zero if a page (under migration) has valid page_cgroup member.
 * Refcnt of page_cgroup is incremented.
 * Before starting migration, account against new page.
 */
int mem_cgroup_prepare_migration(struct page *page)
int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
	struct page_cgroup *pc;
	struct mem_cgroup *mem = NULL;
	enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
	int ret = 0;

	if (mem_cgroup_subsys.disabled)
		return 0;

	lock_page_cgroup(page);
	pc = page_get_page_cgroup(page);
	if (pc)
		pc->ref_cnt++;
	if (pc) {
		mem = pc->mem_cgroup;
		css_get(&mem->css);
		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
	}
	unlock_page_cgroup(page);
	return pc != NULL;
	if (mem) {
		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
			ctype, mem);
		css_put(&mem->css);
	}

void mem_cgroup_end_migration(struct page *page)
{
	mem_cgroup_uncharge_page(page);
	return ret;
}

/*
 * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
 * And no race with uncharge() routines because page_cgroup for *page*
 * has extra one reference by mem_cgroup_prepare_migration.
 */
void mem_cgroup_page_migration(struct page *page, struct page *newpage)
/* remove redundant charge */
void mem_cgroup_end_migration(struct page *newpage)
{
	struct page_cgroup *pc;
	struct mem_cgroup_per_zone *mz;
	unsigned long flags;

	lock_page_cgroup(page);
	pc = page_get_page_cgroup(page);
	if (!pc) {
		unlock_page_cgroup(page);
		return;
	}

	mz = page_cgroup_zoneinfo(pc);
	spin_lock_irqsave(&mz->lru_lock, flags);
	__mem_cgroup_remove_list(mz, pc);
	spin_unlock_irqrestore(&mz->lru_lock, flags);

	page_assign_page_cgroup(page, NULL);
	unlock_page_cgroup(page);

	pc->page = newpage;
	lock_page_cgroup(newpage);
	page_assign_page_cgroup(newpage, pc);

	mz = page_cgroup_zoneinfo(pc);
	spin_lock_irqsave(&mz->lru_lock, flags);
	__mem_cgroup_add_list(mz, pc);
	spin_unlock_irqrestore(&mz->lru_lock, flags);

	unlock_page_cgroup(newpage);
	mem_cgroup_uncharge_page(newpage);
}

/*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
		page = pc->page;
		get_page(page);
		spin_unlock_irqrestore(&mz->lru_lock, flags);
		/*
		 * Check if this page is on LRU. !LRU page can be found
		 * if it's under page migration.
		 */
		if (PageLRU(page)) {
			mem_cgroup_uncharge_page(page);
			put_page(page);
			if (--count <= 0) {
				count = FORCE_UNCHARGE_BATCH;
				cond_resched();
			}
		} else
			cond_resched();
		spin_lock_irqsave(&mz->lru_lock, flags);
	}
	spin_unlock_irqrestore(&mz->lru_lock, flags);
+15 −7
Original line number Diff line number Diff line
@@ -358,6 +358,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
	__inc_zone_page_state(newpage, NR_FILE_PAGES);

	write_unlock_irq(&mapping->tree_lock);
	if (!PageSwapCache(newpage)) {
		mem_cgroup_uncharge_page(page);
		mem_cgroup_getref(newpage);
	}

	return 0;
}
@@ -611,7 +615,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
		rc = fallback_migrate_page(mapping, newpage, page);

	if (!rc) {
		mem_cgroup_page_migration(page, newpage);
		remove_migration_ptes(page, newpage);
	} else
		newpage->mapping = NULL;
@@ -641,6 +644,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
		/* page was freed from under us. So we are done. */
		goto move_newpage;

	charge = mem_cgroup_prepare_migration(page, newpage);
	if (charge == -ENOMEM) {
		rc = -ENOMEM;
		goto move_newpage;
	}
	/* prepare cgroup just returns 0 or -ENOMEM */
	BUG_ON(charge);

	rc = -EAGAIN;
	if (TestSetPageLocked(page)) {
		if (!force)
@@ -692,19 +703,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
		goto rcu_unlock;
	}

	charge = mem_cgroup_prepare_migration(page);
	/* Establish migration ptes or remove ptes */
	try_to_unmap(page, 1);

	if (!page_mapped(page))
		rc = move_to_new_page(newpage, page);

	if (rc) {
	if (rc)
		remove_migration_ptes(page, page);
		if (charge)
			mem_cgroup_end_migration(page);
	} else if (charge)
 		mem_cgroup_end_migration(newpage);
rcu_unlock:
	if (rcu_locked)
		rcu_read_unlock();
@@ -725,6 +731,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
	}

move_newpage:
	if (!charge)
		mem_cgroup_end_migration(newpage);
	/*
	 * Move the new page to the LRU. If migration was not successful
	 * then this will free the page.