Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 54a6eb5c authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

mm: use two zonelist that are filtered by GFP mask



Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations.  Based on the zones
allowed by a gfp mask, one of these zonelists is selected.  All of these
zonelists consume memory and occupy cache lines.

This patch replaces the multiple zonelists per-node with two zonelists.  The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages.  The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.

An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.

Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Acked-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 18ea7e71
Loading
Loading
Loading
Loading
+7 −4
Original line number Original line Diff line number Diff line
@@ -603,15 +603,18 @@ void show_mem(void)
#ifdef CONFIG_DISCONTIGMEM
#ifdef CONFIG_DISCONTIGMEM
	{
	{
		struct zonelist *zl;
		struct zonelist *zl;
		int i, j, k;
		int i, j;


		for (i = 0; i < npmem_ranges; i++) {
		for (i = 0; i < npmem_ranges; i++) {
			zl = node_zonelist(i);
			for (j = 0; j < MAX_NR_ZONES; j++) {
			for (j = 0; j < MAX_NR_ZONES; j++) {
				zl = NODE_DATA(i)->node_zonelists + j;
				struct zone **z;
				struct zone *zone;


				printk("Zone list for zone %d on node %d: ", j, i);
				printk("Zone list for zone %d on node %d: ", j, i);
				for (k = 0; zl->zones[k] != NULL; k++) 
				for_each_zone_zonelist(zone, z, zl, j)
					printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
					printk("[%d/%s] ", zone_to_nid(zone),
								zone->name);
				printk("\n");
				printk("\n");
			}
			}
		}
		}
+6 −4
Original line number Original line Diff line number Diff line
@@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev)
 */
 */
static void free_more_memory(void)
static void free_more_memory(void)
{
{
	struct zonelist *zonelist;
	struct zone **zones;
	int nid;
	int nid;


	wakeup_pdflush(1024);
	wakeup_pdflush(1024);
	yield();
	yield();


	for_each_online_node(nid) {
	for_each_online_node(nid) {
		zonelist = node_zonelist(nid, GFP_NOFS);
		zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
		if (zonelist->zones[0])
						gfp_zone(GFP_NOFS));
			try_to_free_pages(zonelist, 0, GFP_NOFS);
		if (*zones)
			try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
						GFP_NOFS);
	}
	}
}
}


+11 −2
Original line number Original line Diff line number Diff line
@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 * virtual kernel addresses to the allocated page(s).
 * virtual kernel addresses to the allocated page(s).
 */
 */


static inline int gfp_zonelist(gfp_t flags)
{
	if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
		return 1;

	return 0;
}

/*
/*
 * We get the zone list from the current node and the gfp_mask.
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
 * There are many zonelists per node, two for each active zone.
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
 *
 *
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * optimized to &contig_page_data at compile-time.
 * optimized to &contig_page_data at compile-time.
 */
 */
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
{
	return NODE_DATA(nid)->node_zonelists + gfp_zone(flags);
	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
}
}


#ifndef HAVE_ARCH_FREE_PAGE
#ifndef HAVE_ARCH_FREE_PAGE
+43 −22
Original line number Original line Diff line number Diff line
@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 * The NUMA zonelists are doubled becausse we need zonelists that restrict the
 * The NUMA zonelists are doubled becausse we need zonelists that restrict the
 * allocations to a single node for GFP_THISNODE.
 * allocations to a single node for GFP_THISNODE.
 *
 *
 * [0 .. MAX_NR_ZONES -1] 		: Zonelists with fallback
 * [0]	: Zonelist with fallback
 * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1]  : No fallback (GFP_THISNODE)
 * [1]	: No fallback (GFP_THISNODE)
 */
 */
#define MAX_ZONELISTS (2 * MAX_NR_ZONES)
#define MAX_ZONELISTS 2




/*
/*
@@ -464,7 +464,7 @@ struct zonelist_cache {
	unsigned long last_full_zap;		/* when last zap'd (jiffies) */
	unsigned long last_full_zap;		/* when last zap'd (jiffies) */
};
};
#else
#else
#define MAX_ZONELISTS MAX_NR_ZONES
#define MAX_ZONELISTS 1
struct zonelist_cache;
struct zonelist_cache;
#endif
#endif


@@ -486,24 +486,6 @@ struct zonelist {
#endif
#endif
};
};


#ifdef CONFIG_NUMA
/*
 * Only custom zonelists like MPOL_BIND need to be filtered as part of
 * policies. As described in the comment for struct zonelist_cache, these
 * zonelists will not have a zlcache so zlcache_ptr will not be set. Use
 * that to determine if the zonelists needs to be filtered or not.
 */
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
	return !zonelist->zlcache_ptr;
}
#else
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
	return 0;
}
#endif /* CONFIG_NUMA */

#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region {
struct node_active_region {
	unsigned long start_pfn;
	unsigned long start_pfn;
@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone);
	     zone;					\
	     zone;					\
	     zone = next_zone(zone))
	     zone = next_zone(zone))


/* Returns the first zone at or below highest_zoneidx in a zonelist */
static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
					enum zone_type highest_zoneidx)
{
	struct zone **z;

	/* Find the first suitable zone to use for the allocation */
	z = zonelist->zones;
	while (*z && zone_idx(*z) > highest_zoneidx)
		z++;

	return z;
}

/* Returns the next zone at or below highest_zoneidx in a zonelist */
static inline struct zone **next_zones_zonelist(struct zone **z,
					enum zone_type highest_zoneidx)
{
	/* Find the next suitable zone to use for the allocation */
	while (*z && zone_idx(*z) > highest_zoneidx)
		z++;

	return z;
}

/**
 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
 * @zone - The current zone in the iterator
 * @z - The current pointer within zonelist->zones being iterated
 * @zlist - The zonelist being iterated
 * @highidx - The zone index of the highest zone to return
 *
 * This iterator iterates though all zones at or below a given zone index.
 */
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
	for (z = first_zones_zonelist(zlist, highidx), zone = *z++;	\
		zone;							\
		z = next_zones_zonelist(z, highidx), zone = *z++)

#ifdef CONFIG_SPARSEMEM
#ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h>
#include <asm/sparsemem.h>
#endif
#endif
+4 −4
Original line number Original line Diff line number Diff line
@@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
	struct mempolicy *mpol;
	struct mempolicy *mpol;
	struct zonelist *zonelist = huge_zonelist(vma, address,
	struct zonelist *zonelist = huge_zonelist(vma, address,
					htlb_alloc_mask, &mpol);
					htlb_alloc_mask, &mpol);
	struct zone **z;
	struct zone *zone, **z;


	for (z = zonelist->zones; *z; z++) {
	for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
		nid = zone_to_nid(*z);
		nid = zone_to_nid(zone);
		if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
		    !list_empty(&hugepage_freelists[nid])) {
		    !list_empty(&hugepage_freelists[nid])) {
			page = list_entry(hugepage_freelists[nid].next,
			page = list_entry(hugepage_freelists[nid].next,
					  struct page, lru);
					  struct page, lru);
Loading