Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 19770b32 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

mm: filter based on a nodemask as well as a gfp_mask



The MPOL_BIND policy creates a zonelist that is used for allocations
controlled by that mempolicy.  As the per-node zonelist is already being
filtered based on a zone id, this patch adds a version of __alloc_pages() that
takes a nodemask for further filtering.  This eliminates the need for
MPOL_BIND to create a custom zonelist.

A positive benefit of this is that allocations using MPOL_BIND now use the
local node's distance-ordered zonelist instead of a custom node-id-ordered
zonelist.  I.e., pages will be allocated from the closest allowed node with
available memory.

[Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework]
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Acked-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent dd1a239f
Loading
Loading
Loading
Loading
+3 −8
Original line number Diff line number Diff line
@@ -182,14 +182,9 @@ Components of Memory Policies
	    The Default mode does not use the optional set of nodes.

	MPOL_BIND:  This mode specifies that memory must come from the
	set of nodes specified by the policy.

	    The memory policy APIs do not specify an order in which the nodes
	    will be searched.  However, unlike "local allocation", the Bind
	    policy does not consider the distance between the nodes.  Rather,
	    allocations will fallback to the nodes specified by the policy in
	    order of numeric node id.  Like everything in Linux, this is subject
	    to change.
	set of nodes specified by the policy.  Memory will be allocated from
	the node in the set with sufficient free memory that is closest to
	the node where the allocation takes place.

	MPOL_PREFERRED:  This mode specifies that the allocation should be
	attempted from the single node specified in the policy.  If that
+5 −4
Original line number Diff line number Diff line
@@ -360,16 +360,17 @@ void invalidate_bdev(struct block_device *bdev)
 */
static void free_more_memory(void)
{
	struct zoneref *zrefs;
	struct zone *zone;
	int nid;

	wakeup_pdflush(1024);
	yield();

	for_each_online_node(nid) {
		zrefs = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
						gfp_zone(GFP_NOFS));
		if (zrefs->zone)
		(void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
						gfp_zone(GFP_NOFS), NULL,
						&zone);
		if (zone)
			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
						GFP_NOFS);
	}
+2 −2
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
void cpuset_update_task_memory_state(void);
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);

extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
@@ -103,7 +103,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
static inline void cpuset_init_current_mems_allowed(void) {}
static inline void cpuset_update_task_memory_state(void) {}

static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
{
	return 1;
}
+4 −0
Original line number Diff line number Diff line
@@ -182,6 +182,10 @@ static inline void arch_alloc_page(struct page *page, int order) { }

extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);

extern struct page *
__alloc_pages_nodemask(gfp_t, unsigned int,
				struct zonelist *, nodemask_t *nodemask);

static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
						unsigned int order)
{
+12 −7
Original line number Diff line number Diff line
@@ -54,19 +54,20 @@ struct mm_struct;
 * mmap_sem.
 *
 * Freeing policy:
 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
 * All other policies don't have any external state. mpol_free() handles this.
 * Mempolicy objects are reference counted.  A mempolicy will be freed when
 * mpol_free() decrements the reference count to zero.
 *
 * Copying policy objects:
 * For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this.
 * mpol_copy() allocates a new mempolicy and copies the specified mempolicy
 * to the new storage.  The reference count of the new object is initialized
 * to 1, representing the caller of mpol_copy().
 */
struct mempolicy {
	atomic_t refcnt;
	short policy; 	/* See MPOL_* above */
	union {
		struct zonelist  *zonelist;	/* bind */
		short 		 preferred_node; /* preferred */
		nodemask_t	 nodes;		/* interleave */
		nodemask_t	 nodes;		/* interleave/bind */
		/* undefined for default */
	} v;
	nodemask_t cpuset_mems_allowed;	/* mempolicy relative to these nodes */
@@ -151,7 +152,8 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);

extern struct mempolicy default_policy;
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
		unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
				unsigned long addr, gfp_t gfp_flags,
				struct mempolicy **mpol, nodemask_t **nodemask);
extern unsigned slab_node(struct mempolicy *policy);

extern enum zone_type policy_zone;
@@ -239,8 +241,11 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
}

static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
				unsigned long addr, gfp_t gfp_flags,
				struct mempolicy **mpol, nodemask_t **nodemask)
{
	*mpol = NULL;
	*nodemask = NULL;
	return node_zonelist(0, gfp_flags);
}

Loading