Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6b3ae58e authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

memcg: remove direct page_cgroup-to-page pointer



In struct page_cgroup, we have a full word for flags but only a few are
reserved.  Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.

This saves a full word for every page in a system with memory cgroups
enabled.

Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5564e88b
Loading
Loading
Loading
Loading
+58 −17
Original line number Original line Diff line number Diff line
#ifndef __LINUX_PAGE_CGROUP_H
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H


enum {
	/* flags for mem_cgroup */
	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
	PCG_CACHE, /* charged as cache */
	PCG_USED, /* this object is in use. */
	PCG_MIGRATION, /* under page migration */
	/* flags for mem_cgroup and file and I/O status */
	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
	/* No lock in page_cgroup */
	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
	__NR_PCG_FLAGS,
};

#ifndef __GENERATING_BOUNDS_H
#include <generated/bounds.h>

#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#include <linux/bit_spinlock.h>
#include <linux/bit_spinlock.h>

/*
/*
 * Page Cgroup can be considered as an extended mem_map.
 * Page Cgroup can be considered as an extended mem_map.
 * A page_cgroup page is associated with every page descriptor. The
 * A page_cgroup page is associated with every page descriptor. The
@@ -13,7 +31,6 @@
struct page_cgroup {
struct page_cgroup {
	unsigned long flags;
	unsigned long flags;
	struct mem_cgroup *mem_cgroup;
	struct mem_cgroup *mem_cgroup;
	struct page *page;
	struct list_head lru;		/* per cgroup LRU list */
	struct list_head lru;		/* per cgroup LRU list */
};
};


@@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void)
#endif
#endif


struct page_cgroup *lookup_page_cgroup(struct page *page);
struct page_cgroup *lookup_page_cgroup(struct page *page);

struct page *lookup_cgroup_page(struct page_cgroup *pc);
enum {
	/* flags for mem_cgroup */
	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
	PCG_CACHE, /* charged as cache */
	PCG_USED, /* this object is in use. */
	PCG_MIGRATION, /* under page migration */
	/* flags for mem_cgroup and file and I/O status */
	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
	/* No lock in page_cgroup */
	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
};


#define TESTPCGFLAG(uname, lname)			\
#define TESTPCGFLAG(uname, lname)			\
static inline int PageCgroup##uname(struct page_cgroup *pc)	\
static inline int PageCgroup##uname(struct page_cgroup *pc)	\
@@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
	local_irq_restore(*flags);
	local_irq_restore(*flags);
}
}


#ifdef CONFIG_SPARSEMEM
#define PCG_ARRAYID_WIDTH	SECTIONS_SHIFT
#else
#define PCG_ARRAYID_WIDTH	NODES_SHIFT
#endif

#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
#error Not enough space left in pc->flags to store page_cgroup array IDs
#endif

/* pc->flags: ARRAY-ID | FLAGS */

#define PCG_ARRAYID_MASK	((1UL << PCG_ARRAYID_WIDTH) - 1)

#define PCG_ARRAYID_OFFSET	(BITS_PER_LONG - PCG_ARRAYID_WIDTH)
/*
 * Zero the shift count for non-existant fields, to prevent compiler
 * warnings and ensure references are optimized away.
 */
#define PCG_ARRAYID_SHIFT	(PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))

static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
					    unsigned long id)
{
	pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
	pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
}

static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
{
	return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
}

#else /* CONFIG_CGROUP_MEM_RES_CTLR */
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;
struct page_cgroup;


@@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void)
{
{
}
}


#endif
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */


#include <linux/swap.h>
#include <linux/swap.h>


@@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type)
	return;
	return;
}
}


#endif
#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
#endif

#endif /* !__GENERATING_BOUNDS_H */

#endif /* __LINUX_PAGE_CGROUP_H */
+2 −0
Original line number Original line Diff line number Diff line
@@ -9,11 +9,13 @@
#include <linux/page-flags.h>
#include <linux/page-flags.h>
#include <linux/mmzone.h>
#include <linux/mmzone.h>
#include <linux/kbuild.h>
#include <linux/kbuild.h>
#include <linux/page_cgroup.h>


void foo(void)
void foo(void)
{
{
	/* The enum constants to put into include/generated/bounds.h */
	/* The enum constants to put into include/generated/bounds.h */
	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
	DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
	/* End of constants */
	/* End of constants */
}
}
+2 −2
Original line number Original line Diff line number Diff line
@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
		if (unlikely(!PageCgroupUsed(pc)))
		if (unlikely(!PageCgroupUsed(pc)))
			continue;
			continue;


		page = pc->page;
		page = lookup_cgroup_page(pc);


		if (unlikely(!PageLRU(page)))
		if (unlikely(!PageLRU(page)))
			continue;
			continue;
@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
		}
		}
		spin_unlock_irqrestore(&zone->lru_lock, flags);
		spin_unlock_irqrestore(&zone->lru_lock, flags);


		page = pc->page;
		page = lookup_cgroup_page(pc);


		ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
		ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
		if (ret == -ENOMEM)
		if (ret == -ENOMEM)
+55 −36
Original line number Original line Diff line number Diff line
@@ -11,12 +11,11 @@
#include <linux/swapops.h>
#include <linux/swapops.h>
#include <linux/kmemleak.h>
#include <linux/kmemleak.h>


static void __meminit
static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
{
{
	pc->flags = 0;
	pc->flags = 0;
	set_page_cgroup_array_id(pc, id);
	pc->mem_cgroup = NULL;
	pc->mem_cgroup = NULL;
	pc->page = pfn_to_page(pfn);
	INIT_LIST_HEAD(&pc->lru);
	INIT_LIST_HEAD(&pc->lru);
}
}
static unsigned long total_usage;
static unsigned long total_usage;
@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
	return base + offset;
	return base + offset;
}
}


struct page *lookup_cgroup_page(struct page_cgroup *pc)
{
	unsigned long pfn;
	struct page *page;
	pg_data_t *pgdat;

	pgdat = NODE_DATA(page_cgroup_array_id(pc));
	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
	page = pfn_to_page(pfn);
	VM_BUG_ON(pc != lookup_page_cgroup(page));
	return page;
}

static int __init alloc_node_page_cgroup(int nid)
static int __init alloc_node_page_cgroup(int nid)
{
{
	struct page_cgroup *base, *pc;
	struct page_cgroup *base, *pc;
@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)
		return -ENOMEM;
		return -ENOMEM;
	for (index = 0; index < nr_pages; index++) {
	for (index = 0; index < nr_pages; index++) {
		pc = base + index;
		pc = base + index;
		__init_page_cgroup(pc, start_pfn + index);
		init_page_cgroup(pc, nid);
	}
	}
	NODE_DATA(nid)->node_page_cgroup = base;
	NODE_DATA(nid)->node_page_cgroup = base;
	total_usage += table_size;
	total_usage += table_size;
@@ -105,15 +117,34 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
	return section->page_cgroup + pfn;
	return section->page_cgroup + pfn;
}
}


struct page *lookup_cgroup_page(struct page_cgroup *pc)
{
	struct mem_section *section;
	struct page *page;
	unsigned long nr;

	nr = page_cgroup_array_id(pc);
	section = __nr_to_section(nr);
	page = pfn_to_page(pc - section->page_cgroup);
	VM_BUG_ON(pc != lookup_page_cgroup(page));
	return page;
}

/* __alloc_bootmem...() is protected by !slab_available() */
/* __alloc_bootmem...() is protected by !slab_available() */
static int __init_refok init_section_page_cgroup(unsigned long pfn)
static int __init_refok init_section_page_cgroup(unsigned long pfn)
{
{
	struct mem_section *section = __pfn_to_section(pfn);
	struct page_cgroup *base, *pc;
	struct page_cgroup *base, *pc;
	struct mem_section *section;
	unsigned long table_size;
	unsigned long table_size;
	unsigned long nr;
	int nid, index;
	int nid, index;


	if (!section->page_cgroup) {
	nr = pfn_to_section_nr(pfn);
	section = __nr_to_section(nr);

	if (section->page_cgroup)
		return 0;

	nid = page_to_nid(pfn_to_page(pfn));
	nid = page_to_nid(pfn_to_page(pfn));
	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
	VM_BUG_ON(!slab_is_available());
	VM_BUG_ON(!slab_is_available());
@@ -133,18 +164,6 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
	 * causing kmemleak false positives.
	 * causing kmemleak false positives.
	 */
	 */
	kmemleak_not_leak(base);
	kmemleak_not_leak(base);
	} else {
		/*
 		 * We don't have to allocate page_cgroup again, but
		 * address of memmap may be changed. So, we have to initialize
		 * again.
		 */
		base = section->page_cgroup + pfn;
		table_size = 0;
		/* check address of memmap is changed or not. */
		if (base->page == pfn_to_page(pfn))
			return 0;
	}


	if (!base) {
	if (!base) {
		printk(KERN_ERR "page cgroup allocation failure\n");
		printk(KERN_ERR "page cgroup allocation failure\n");
@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)


	for (index = 0; index < PAGES_PER_SECTION; index++) {
	for (index = 0; index < PAGES_PER_SECTION; index++) {
		pc = base + index;
		pc = base + index;
		__init_page_cgroup(pc, pfn + index);
		init_page_cgroup(pc, nr);
	}
	}


	section->page_cgroup = base - pfn;
	section->page_cgroup = base - pfn;