Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 925b7673 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

mm: make per-memcg LRU lists exclusive



Now that all code that operated on global per-zone LRU lists is
converted to operate on per-memory cgroup LRU lists instead, there is no
reason to keep the double-LRU scheme around any longer.

The pc->lru member is removed and page->lru is linked directly to the
per-memory cgroup LRU lists, which removes two pointers from a
descriptor that exists for every page frame in the system.

Signed-off-by: default avatarJohannes Weiner <jweiner@redhat.com>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Signed-off-by: default avatarYing Han <yinghan@google.com>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: default avatarMichal Hocko <mhocko@suse.cz>
Reviewed-by: default avatarKirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6290df54
Loading
Loading
Loading
Loading
+22 −29
Original line number Diff line number Diff line
@@ -32,14 +32,6 @@ enum mem_cgroup_page_stat_item {
	MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
};

extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
					struct list_head *dst,
					unsigned long *scanned, int order,
					isolate_mode_t mode,
					struct zone *z,
					struct mem_cgroup *mem_cont,
					int active, int file);

struct mem_cgroup_reclaim_cookie {
	struct zone *zone;
	int priority;
@@ -69,13 +61,14 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);

extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
					gfp_t gfp_mask);
extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru(struct page *page);
extern void mem_cgroup_move_lists(struct page *page,
				  enum lru_list from, enum lru_list to);

struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
				       enum lru_list);
void mem_cgroup_lru_del_list(struct page *, enum lru_list);
void mem_cgroup_lru_del(struct page *);
struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
					 enum lru_list, enum lru_list);

/* For coalescing uncharge for reducing memcg' overhead*/
extern void mem_cgroup_uncharge_start(void);
@@ -223,33 +216,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
{
}

static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
{
}

static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
						    struct mem_cgroup *memcg)
{
	return ;
	return &zone->lruvec;
}

static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
						     struct page *page,
						     enum lru_list lru)
{
	return ;
	return &zone->lruvec;
}

static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
{
	return ;
}

static inline void mem_cgroup_del_lru(struct page *page)
static inline void mem_cgroup_lru_del(struct page *page)
{
	return ;
}

static inline void
mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
						       struct page *page,
						       enum lru_list from,
						       enum lru_list to)
{
	return &zone->lruvec;
}

static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
+8 −13
Original line number Diff line number Diff line
@@ -21,27 +21,22 @@ static inline int page_is_file_cache(struct page *page)
	return !PageSwapBacked(page);
}

static inline void
__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
		       struct list_head *head)
{
	list_add(&page->lru, head);
	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
	mem_cgroup_add_lru_list(page, l);
}

static inline void
add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
	__add_page_to_lru_list(zone, page, l, &zone->lruvec.lists[l]);
	struct lruvec *lruvec;

	lruvec = mem_cgroup_lru_add_list(zone, page, l);
	list_add(&page->lru, &lruvec->lists[l]);
	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
}

static inline void
del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
	mem_cgroup_lru_del_list(page, l);
	list_del(&page->lru);
	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
	mem_cgroup_del_lru_list(page, l);
}

/**
@@ -64,7 +59,6 @@ del_page_from_lru(struct zone *zone, struct page *page)
{
	enum lru_list l;

	list_del(&page->lru);
	if (PageUnevictable(page)) {
		__ClearPageUnevictable(page);
		l = LRU_UNEVICTABLE;
@@ -75,8 +69,9 @@ del_page_from_lru(struct zone *zone, struct page *page)
			l += LRU_ACTIVE;
		}
	}
	mem_cgroup_lru_del_list(page, l);
	list_del(&page->lru);
	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
	mem_cgroup_del_lru_list(page, l);
}

/**
+0 −1
Original line number Diff line number Diff line
@@ -31,7 +31,6 @@ enum {
struct page_cgroup {
	unsigned long flags;
	struct mem_cgroup *mem_cgroup;
	struct list_head lru;		/* per cgroup LRU list */
};

void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+151 −160
Original line number Diff line number Diff line
@@ -995,6 +995,27 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
}
EXPORT_SYMBOL(mem_cgroup_count_vm_event);

/**
 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
 * @zone: zone of the wanted lruvec
 * @mem: memcg of the wanted lruvec
 *
 * Returns the lru list vector holding pages for the given @zone and
 * @mem.  This can be the global zone lruvec, if the memory controller
 * is disabled.
 */
struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
				      struct mem_cgroup *memcg)
{
	struct mem_cgroup_per_zone *mz;

	if (mem_cgroup_disabled())
		return &zone->lruvec;

	mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
	return &mz->lruvec;
}

/*
 * Following LRU functions are allowed to be used without PCG_LOCK.
 * Operations are called by routine of global LRU independently from memcg.
@@ -1009,104 +1030,123 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
 * When moving account, the page is not on LRU. It's isolated.
 */

void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
/**
 * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
 * @zone: zone of the page
 * @page: the page
 * @lru: current lru
 *
 * This function accounts for @page being added to @lru, and returns
 * the lruvec for the given @zone and the memcg @page is charged to.
 *
 * The callsite is then responsible for physically linking the page to
 * the returned lruvec->lists[@lru].
 */
struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
				       enum lru_list lru)
{
	struct page_cgroup *pc;
	struct mem_cgroup_per_zone *mz;
	struct mem_cgroup *memcg;
	struct page_cgroup *pc;

	if (mem_cgroup_disabled())
		return;
		return &zone->lruvec;

	pc = lookup_page_cgroup(page);
	/* can happen while we handle swapcache. */
	if (!TestClearPageCgroupAcctLRU(pc))
		return;
	VM_BUG_ON(!pc->mem_cgroup);
	VM_BUG_ON(PageCgroupAcctLRU(pc));
	/*
	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
	 * removed from global LRU.
	 * putback:				charge:
	 * SetPageLRU				SetPageCgroupUsed
	 * smp_mb				smp_mb
	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
	 *
	 * Ensure that one of the two sides adds the page to the memcg
	 * LRU during a race.
	 */
	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
	/* huge page split is done under lru_lock. so, we have no races. */
	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
	VM_BUG_ON(list_empty(&pc->lru));
	list_del_init(&pc->lru);
}

void mem_cgroup_del_lru(struct page *page)
{
	mem_cgroup_del_lru_list(page, page_lru(page));
}

	smp_mb();
	/*
 * Writeback is about to end against a page which has been marked for immediate
 * reclaim.  If it still appears to be reclaimable, move it to the tail of the
 * inactive list.
	 * If the page is uncharged, it may be freed soon, but it
	 * could also be swap cache (readahead, swapoff) that needs to
	 * be reclaimable in the future.  root_mem_cgroup will babysit
	 * it for the time being.
	 */
void mem_cgroup_rotate_reclaimable_page(struct page *page)
{
	struct mem_cgroup_per_zone *mz;
	struct page_cgroup *pc;
	enum lru_list lru = page_lru(page);

	if (mem_cgroup_disabled())
		return;

	pc = lookup_page_cgroup(page);
	/* unused page is not rotated. */
	if (!PageCgroupUsed(pc))
		return;
	if (PageCgroupUsed(pc)) {
		/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
		smp_rmb();
	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
	list_move_tail(&pc->lru, &mz->lruvec.lists[lru]);
		memcg = pc->mem_cgroup;
		SetPageCgroupAcctLRU(pc);
	} else
		memcg = root_mem_cgroup;
	mz = page_cgroup_zoneinfo(memcg, page);
	/* compound_order() is stabilized through lru_lock */
	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
	return &mz->lruvec;
}

void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
/**
 * mem_cgroup_lru_del_list - account for removing an lru page
 * @page: the page
 * @lru: target lru
 *
 * This function accounts for @page being removed from @lru.
 *
 * The callsite is then responsible for physically unlinking
 * @page->lru.
 */
void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
{
	struct mem_cgroup_per_zone *mz;
	struct mem_cgroup *memcg;
	struct page_cgroup *pc;

	if (mem_cgroup_disabled())
		return;

	pc = lookup_page_cgroup(page);
	/* unused page is not rotated. */
	if (!PageCgroupUsed(pc))
		return;
	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
	smp_rmb();
	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
	list_move(&pc->lru, &mz->lruvec.lists[lru]);
	/*
	 * root_mem_cgroup babysits uncharged LRU pages, but
	 * PageCgroupUsed is cleared when the page is about to get
	 * freed.  PageCgroupAcctLRU remembers whether the
	 * LRU-accounting happened against pc->mem_cgroup or
	 * root_mem_cgroup.
	 */
	if (TestClearPageCgroupAcctLRU(pc)) {
		VM_BUG_ON(!pc->mem_cgroup);
		memcg = pc->mem_cgroup;
	} else
		memcg = root_mem_cgroup;
	mz = page_cgroup_zoneinfo(memcg, page);
	/* huge page split is done under lru_lock. so, we have no races. */
	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
}

void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
void mem_cgroup_lru_del(struct page *page)
{
	struct page_cgroup *pc;
	struct mem_cgroup_per_zone *mz;
	mem_cgroup_lru_del_list(page, page_lru(page));
}

	if (mem_cgroup_disabled())
		return;
	pc = lookup_page_cgroup(page);
	VM_BUG_ON(PageCgroupAcctLRU(pc));
	/*
	 * putback:				charge:
	 * SetPageLRU				SetPageCgroupUsed
	 * smp_mb				smp_mb
	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
/**
 * mem_cgroup_lru_move_lists - account for moving a page between lrus
 * @zone: zone of the page
 * @page: the page
 * @from: current lru
 * @to: target lru
 *
	 * Ensure that one of the two sides adds the page to the memcg
	 * LRU during a race.
 * This function accounts for @page being moved between the lrus @from
 * and @to, and returns the lruvec for the given @zone and the memcg
 * @page is charged to.
 *
 * The callsite is then responsible for physically relinking
 * @page->lru to the returned lruvec->lists[@to].
 */
	smp_mb();
	if (!PageCgroupUsed(pc))
		return;
	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
	smp_rmb();
	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
	/* huge page split is done under lru_lock. so, we have no races. */
	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
	SetPageCgroupAcctLRU(pc);
	list_add(&pc->lru, &mz->lruvec.lists[lru]);
struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
					 struct page *page,
					 enum lru_list from,
					 enum lru_list to)
{
	/* XXX: Optimize this, especially for @from == @to */
	mem_cgroup_lru_del_list(page, from);
	return mem_cgroup_lru_add_list(zone, page, to);
}

/*
@@ -1117,6 +1157,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 */
static void mem_cgroup_lru_del_before_commit(struct page *page)
{
	enum lru_list lru;
	unsigned long flags;
	struct zone *zone = page_zone(page);
	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1133,17 +1174,28 @@ static void mem_cgroup_lru_del_before_commit(struct page *page)
		return;

	spin_lock_irqsave(&zone->lru_lock, flags);
	lru = page_lru(page);
	/*
	 * Forget old LRU when this page_cgroup is *not* used. This Used bit
	 * is guarded by lock_page() because the page is SwapCache.
	 * The uncharged page could still be registered to the LRU of
	 * the stale pc->mem_cgroup.
	 *
	 * As pc->mem_cgroup is about to get overwritten, the old LRU
	 * accounting needs to be taken care of.  Let root_mem_cgroup
	 * babysit the page until the new memcg is responsible for it.
	 *
	 * The PCG_USED bit is guarded by lock_page() as the page is
	 * swapcache/pagecache.
	 */
	if (!PageCgroupUsed(pc))
		mem_cgroup_del_lru_list(page, page_lru(page));
	if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
		del_page_from_lru_list(zone, page, lru);
		add_page_to_lru_list(zone, page, lru);
	}
	spin_unlock_irqrestore(&zone->lru_lock, flags);
}

static void mem_cgroup_lru_add_after_commit(struct page *page)
{
	enum lru_list lru;
	unsigned long flags;
	struct zone *zone = page_zone(page);
	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1161,20 +1213,20 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
	if (likely(!PageLRU(page)))
		return;
	spin_lock_irqsave(&zone->lru_lock, flags);
	/* link when the page is linked to LRU but page_cgroup isn't */
	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
		mem_cgroup_add_lru_list(page, page_lru(page));
	spin_unlock_irqrestore(&zone->lru_lock, flags);
	lru = page_lru(page);
	/*
	 * If the page is not on the LRU, someone will soon put it
	 * there.  If it is, and also already accounted for on the
	 * memcg-side, it must be on the right lruvec as setting
	 * pc->mem_cgroup and PageCgroupUsed is properly ordered.
	 * Otherwise, root_mem_cgroup has been babysitting the page
	 * during the charge.  Move it to the new memcg now.
	 */
	if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
		del_page_from_lru_list(zone, page, lru);
		add_page_to_lru_list(zone, page, lru);
	}


void mem_cgroup_move_lists(struct page *page,
			   enum lru_list from, enum lru_list to)
{
	if (mem_cgroup_disabled())
		return;
	mem_cgroup_del_lru_list(page, from);
	mem_cgroup_add_lru_list(page, to);
	spin_unlock_irqrestore(&zone->lru_lock, flags);
}

/*
@@ -1282,68 +1334,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
	return &mz->reclaim_stat;
}

unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
					struct list_head *dst,
					unsigned long *scanned, int order,
					isolate_mode_t mode,
					struct zone *z,
					struct mem_cgroup *mem_cont,
					int active, int file)
{
	unsigned long nr_taken = 0;
	struct page *page;
	unsigned long scan;
	LIST_HEAD(pc_list);
	struct list_head *src;
	struct page_cgroup *pc, *tmp;
	int nid = zone_to_nid(z);
	int zid = zone_idx(z);
	struct mem_cgroup_per_zone *mz;
	int lru = LRU_FILE * file + active;
	int ret;

	BUG_ON(!mem_cont);
	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
	src = &mz->lruvec.lists[lru];

	scan = 0;
	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
		if (scan >= nr_to_scan)
			break;

		if (unlikely(!PageCgroupUsed(pc)))
			continue;

		page = lookup_cgroup_page(pc);

		if (unlikely(!PageLRU(page)))
			continue;

		scan++;
		ret = __isolate_lru_page(page, mode, file);
		switch (ret) {
		case 0:
			list_move(&page->lru, dst);
			mem_cgroup_del_lru(page);
			nr_taken += hpage_nr_pages(page);
			break;
		case -EBUSY:
			/* we don't affect global LRU but rotate in our LRU */
			mem_cgroup_rotate_lru_list(page, page_lru(page));
			break;
		default:
			break;
		}
	}

	*scanned = scan;

	trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken,
				      0, 0, 0, mode);

	return nr_taken;
}

#define mem_cgroup_from_res_counter(counter, member)	\
	container_of(counter, struct mem_cgroup, member)

@@ -3726,11 +3716,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
				int node, int zid, enum lru_list lru)
{
	struct zone *zone;
	struct mem_cgroup_per_zone *mz;
	struct page_cgroup *pc, *busy;
	unsigned long flags, loop;
	struct list_head *list;
	struct page *busy;
	struct zone *zone;
	int ret = 0;

	zone = &NODE_DATA(node)->node_zones[zid];
@@ -3742,6 +3732,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
	loop += 256;
	busy = NULL;
	while (loop--) {
		struct page_cgroup *pc;
		struct page *page;

		ret = 0;
@@ -3750,16 +3741,16 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
			spin_unlock_irqrestore(&zone->lru_lock, flags);
			break;
		}
		pc = list_entry(list->prev, struct page_cgroup, lru);
		if (busy == pc) {
			list_move(&pc->lru, list);
		page = list_entry(list->prev, struct page, lru);
		if (busy == page) {
			list_move(&page->lru, list);
			busy = NULL;
			spin_unlock_irqrestore(&zone->lru_lock, flags);
			continue;
		}
		spin_unlock_irqrestore(&zone->lru_lock, flags);

		page = lookup_cgroup_page(pc);
		pc = lookup_page_cgroup(page);

		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
		if (ret == -ENOMEM)
@@ -3767,7 +3758,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,

		if (ret == -EBUSY || ret == -EINVAL) {
			/* found lock contention or "pc" is obsolete. */
			busy = pc;
			busy = page;
			cond_resched();
		} else
			busy = NULL;
+0 −1
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
	pc->flags = 0;
	set_page_cgroup_array_id(pc, id);
	pc->mem_cgroup = NULL;
	INIT_LIST_HEAD(&pc->lru);
}
static unsigned long total_usage;

Loading