Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 02491447 authored by Daisuke Nishimura's avatar Daisuke Nishimura Committed by Linus Torvalds
Browse files

memcg: move charges of anonymous swap



This patch is another core part of this move-charge-at-task-migration
feature.  It enables moving charges of anonymous swaps.

To move the charge of swap, we need to exchange swap_cgroup's record.

In current implementation, swap_cgroup's record is protected by:

  - page lock: if the entry is on swap cache.
  - swap_lock: if the entry is not on swap cache.

This works well in usual swap-in/out activity.

But this behavior make the feature of moving swap charge check many
conditions to exchange swap_cgroup's record safely.

So I changed modification of swap_cgroup's recored(swap_cgroup_record())
to use xchg, and define a new function to cmpxchg swap_cgroup's record.

This patch also enables moving charge of non pte_present but not uncharged
swap caches, which can be exist on swap-out path, by getting the target
pages via find_get_page() as do_mincore() does.

[kosaki.motohiro@jp.fujitsu.com: fix ia64 build]
[akpm@linux-foundation.org: fix typos]
Signed-off-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8033b97c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -420,6 +420,8 @@ NOTE2: It is recommended to set the soft limit always below the hard limit,

Users can move charges associated with a task along with task migration, that
is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
This feature is not supported in !CONFIG_MMU environments because of lack of
page tables.

8.1 Interface

+2 −0
Original line number Diff line number Diff line
@@ -118,6 +118,8 @@ static inline void __init page_cgroup_init_flatmem(void)
#include <linux/swap.h>

#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
					unsigned short old, unsigned short new);
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
+9 −0
Original line number Diff line number Diff line
@@ -355,6 +355,7 @@ static inline void disable_swap_token(void)
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
extern void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
#else
static inline void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -485,6 +486,14 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
{
}

#ifdef CONFIG_CGROUP_MEM_RES_CTLR
static inline int
mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
{
	return 0;
}
#endif

#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
+147 −36
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
@@ -2270,6 +2271,54 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
	}
	rcu_read_unlock();
}

/**
 * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
 * @entry: swap entry to be moved
 * @from:  mem_cgroup which the entry is moved from
 * @to:  mem_cgroup which the entry is moved to
 *
 * It succeeds only when the swap_cgroup's record for this entry is the same
 * as the mem_cgroup's id of @from.
 *
 * Returns 0 on success, -EINVAL on failure.
 *
 * The caller must have charged to @to, IOW, called res_counter_charge() about
 * both res and memsw, and called css_get().
 */
static int mem_cgroup_move_swap_account(swp_entry_t entry,
				struct mem_cgroup *from, struct mem_cgroup *to)
{
	unsigned short old_id, new_id;

	old_id = css_id(&from->css);
	new_id = css_id(&to->css);

	if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
		if (!mem_cgroup_is_root(from))
			res_counter_uncharge(&from->memsw, PAGE_SIZE);
		mem_cgroup_swap_statistics(from, false);
		mem_cgroup_put(from);
		/*
		 * we charged both to->res and to->memsw, so we should uncharge
		 * to->res.
		 */
		if (!mem_cgroup_is_root(to))
			res_counter_uncharge(&to->res, PAGE_SIZE);
		mem_cgroup_swap_statistics(to, true);
		mem_cgroup_get(to);
		css_put(&to->css);

		return 0;
	}
	return -EINVAL;
}
#else
static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
				struct mem_cgroup *from, struct mem_cgroup *to)
{
	return -EINVAL;
}
#endif

/*
@@ -2949,6 +2998,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
	return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
}

#ifdef CONFIG_MMU
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
					struct cftype *cft, u64 val)
{
@@ -2967,6 +3017,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,

	return 0;
}
#else
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
					struct cftype *cft, u64 val)
{
	return -ENOSYS;
}
#endif


/* For read statistics */
@@ -3489,6 +3546,7 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
	return ret;
}

#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
#define PRECHARGE_COUNT_AT_ONCE	256
static int mem_cgroup_do_precharge(unsigned long count)
@@ -3544,77 +3602,124 @@ one_by_one:
	}
	return ret;
}
#else	/* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
				struct cgroup *cgroup,
				struct task_struct *p,
				bool threadgroup)
{
	return 0;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
				struct cgroup *cgroup,
				struct task_struct *p,
				bool threadgroup)
{
}
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
				struct cgroup *cont,
				struct cgroup *old_cont,
				struct task_struct *p,
				bool threadgroup)
{
}
#endif

/**
 * is_target_pte_for_mc - check a pte whether it is valid for move charge
 * @vma: the vma the pte to be checked belongs
 * @addr: the address corresponding to the pte to be checked
 * @ptent: the pte to be checked
 * @target: the pointer the target page will be stored(can be NULL)
 * @target: the pointer the target page or swap ent will be stored(can be NULL)
 *
 * Returns
 *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
 *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
 *     move charge. if @target is not NULL, the page is stored in target->page
 *     with extra refcnt got(Callers should handle it).
 *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
 *     target for charge migration. if @target is not NULL, the entry is stored
 *     in target->ent.
 *
 * Called with pte lock held.
 */
/* We add a new member later. */
union mc_target {
	struct page	*page;
	swp_entry_t	ent;
};

/* We add a new type later. */
enum mc_target_type {
	MC_TARGET_NONE,	/* not used */
	MC_TARGET_PAGE,
	MC_TARGET_SWAP,
};

static int is_target_pte_for_mc(struct vm_area_struct *vma,
		unsigned long addr, pte_t ptent, union mc_target *target)
{
	struct page *page;
	struct page *page = NULL;
	struct page_cgroup *pc;
	int ret = 0;
	swp_entry_t ent = { .val = 0 };
	int usage_count = 0;
	bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
					&mc.to->move_charge_at_immigrate);

	if (!pte_present(ptent))
	if (!pte_present(ptent)) {
		/* TODO: handle swap of shmes/tmpfs */
		if (pte_none(ptent) || pte_file(ptent))
			return 0;

		else if (is_swap_pte(ptent)) {
			ent = pte_to_swp_entry(ptent);
			if (!move_anon || non_swap_entry(ent))
				return 0;
			usage_count = mem_cgroup_count_swap_user(ent, &page);
		}
	} else {
		page = vm_normal_page(vma, addr, ptent);
		if (!page || !page_mapped(page))
			return 0;
		/*
	 * TODO: We don't move charges of file(including shmem/tmpfs) pages for
	 * now.
		 * TODO: We don't move charges of file(including shmem/tmpfs)
		 * pages for now.
		 */
		if (!move_anon || !PageAnon(page))
			return 0;
		if (!get_page_unless_zero(page))
			return 0;
		usage_count = page_mapcount(page);
	}
	if (usage_count > 1) {
		/*
	 * TODO: We don't move charges of shared(used by multiple processes)
	 * pages for now.
		 * TODO: We don't move charges of shared(used by multiple
		 * processes) pages for now.
		 */
	if (page_mapcount(page) > 1)
		return 0;
	if (!get_page_unless_zero(page))
		if (page)
			put_page(page);
		return 0;

	}
	if (page) {
		pc = lookup_page_cgroup(page);
		/*
	 * Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
	 * checks the pc is valid or not under the lock.
		 * Do only loose check w/o page_cgroup lock.
		 * mem_cgroup_move_account() checks the pc is valid or not under
		 * the lock.
		 */
		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
			ret = MC_TARGET_PAGE;
			if (target)
				target->page = page;
		}

		if (!ret || !target)
			put_page(page);

	}
	/* throught */
	if (ent.val && do_swap_account && !ret &&
			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
		ret = MC_TARGET_SWAP;
		if (target)
			target->ent = ent;
	}
	return ret;
}

@@ -3754,6 +3859,7 @@ retry:
		int type;
		struct page *page;
		struct page_cgroup *pc;
		swp_entry_t ent;

		if (!mc.precharge)
			break;
@@ -3775,6 +3881,11 @@ retry:
put:			/* is_target_pte_for_mc() gets the page */
			put_page(page);
			break;
		case MC_TARGET_SWAP:
			ent = target.ent;
			if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to))
				mc.precharge--;
			break;
		default:
			break;
		}
+32 −2
Original line number Diff line number Diff line
@@ -334,6 +334,37 @@ not_enough_page:
	return -ENOMEM;
}

/**
 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
 * @end: swap entry to be cmpxchged
 * @old: old id
 * @new: new id
 *
 * Returns old id at success, 0 at failure.
 * (There is no mem_cgroup useing 0 as its id)
 */
unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
					unsigned short old, unsigned short new)
{
	int type = swp_type(ent);
	unsigned long offset = swp_offset(ent);
	unsigned long idx = offset / SC_PER_PAGE;
	unsigned long pos = offset & SC_POS_MASK;
	struct swap_cgroup_ctrl *ctrl;
	struct page *mappage;
	struct swap_cgroup *sc;

	ctrl = &swap_cgroup_ctrl[type];

	mappage = ctrl->map[idx];
	sc = page_address(mappage);
	sc += pos;
	if (cmpxchg(&sc->id, old, new) == old)
		return old;
	else
		return 0;
}

/**
 * swap_cgroup_record - record mem_cgroup for this swp_entry.
 * @ent: swap entry to be recorded into
@@ -358,8 +389,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
	mappage = ctrl->map[idx];
	sc = page_address(mappage);
	sc += pos;
	old = sc->id;
	sc->id = id;
	old = xchg(&sc->id, id);

	return old;
}
Loading