Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca889e6c authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds
Browse files

[PATCH] Use Zoned VM Counters for NUMA statistics

The numa statistics are really event counters.  But they are per node and
so we have had special treatment for these counters through additional
fields on the pcp structure.  We can now use the per zone nature of the
zoned VM counters to realize these.

This will shrink the size of the pcp structure on NUMA systems.  We will
have some room to add additional per zone counters that will all still fit
in the same cacheline.

 Bits	Prior pcp size	  	Size after patch	We can add
 ------------------------------------------------------------------
 64	128 bytes (16 words)	80 bytes (10 words)	48
 32	 76 bytes (19 words)	56 bytes (14 words)	8 (64 byte cacheline)
							72 (128 byte)

Remove the special statistics for numa and replace them with zoned vm
counters.  This has the side effect that global sums of these events now
show up in /proc/vmstat.

Also take the opportunity to move the zone_statistics() function from
page_alloc.c into vmstat.c.

Discussions:
V2 http://marc.theaimsgroup.com/?t=115048227000002&r=1&w=2



Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Acked-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent bab1846a
Loading
Loading
Loading
Loading
+6 −28
Original line number Diff line number Diff line
@@ -94,28 +94,6 @@ static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);

static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
{
	unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign;
	unsigned long local_node, other_node;
	int i, cpu;
	pg_data_t *pg = NODE_DATA(dev->id);
	numa_hit = 0;
	numa_miss = 0;
	interleave_hit = 0;
	numa_foreign = 0;
	local_node = 0;
	other_node = 0;
	for (i = 0; i < MAX_NR_ZONES; i++) {
		struct zone *z = &pg->node_zones[i];
		for_each_online_cpu(cpu) {
			struct per_cpu_pageset *ps = zone_pcp(z,cpu);
			numa_hit += ps->numa_hit;
			numa_miss += ps->numa_miss;
			numa_foreign += ps->numa_foreign;
			interleave_hit += ps->interleave_hit;
			local_node += ps->local_node;
			other_node += ps->other_node;
		}
	}
	return sprintf(buf,
		       "numa_hit %lu\n"
		       "numa_miss %lu\n"
@@ -123,12 +101,12 @@ static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
		       "interleave_hit %lu\n"
		       "local_node %lu\n"
		       "other_node %lu\n",
		       numa_hit,
		       numa_miss,
		       numa_foreign,
		       interleave_hit,
		       local_node,
		       other_node);
		       node_page_state(dev->id, NUMA_HIT),
		       node_page_state(dev->id, NUMA_MISS),
		       node_page_state(dev->id, NUMA_FOREIGN),
		       node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
		       node_page_state(dev->id, NUMA_LOCAL),
		       node_page_state(dev->id, NUMA_OTHER));
}
static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);

+8 −9
Original line number Diff line number Diff line
@@ -57,6 +57,14 @@ enum zone_stat_item {
	NR_WRITEBACK,
	NR_UNSTABLE_NFS,	/* NFS unstable pages */
	NR_BOUNCE,
#ifdef CONFIG_NUMA
	NUMA_HIT,		/* allocated in intended node */
	NUMA_MISS,		/* allocated in non intended node */
	NUMA_FOREIGN,		/* was intended here, hit elsewhere */
	NUMA_INTERLEAVE_HIT,	/* interleaver preferred this zone */
	NUMA_LOCAL,		/* allocation from local node */
	NUMA_OTHER,		/* allocation from other node */
#endif
	NR_VM_ZONE_STAT_ITEMS };

struct per_cpu_pages {
@@ -71,15 +79,6 @@ struct per_cpu_pageset {
#ifdef CONFIG_SMP
	s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif

#ifdef CONFIG_NUMA
	unsigned long numa_hit;		/* allocated in intended node */
	unsigned long numa_miss;	/* allocated in non intended node */
	unsigned long numa_foreign;	/* was intended here, hit elsewhere */
	unsigned long interleave_hit; 	/* interleaver prefered this zone */
	unsigned long local_node;	/* allocation from local node */
	unsigned long other_node;	/* allocation from other node */
#endif
} ____cacheline_aligned_in_smp;

#ifdef CONFIG_NUMA
+9 −1
Original line number Diff line number Diff line
@@ -173,9 +173,15 @@ static inline unsigned long node_page_state(int node,
#endif
		zone_page_state(&zones[ZONE_DMA], item);
}

extern void zone_statistics(struct zonelist *, struct zone *);

#else

#define node_page_state(node, item) global_page_state(item)
#endif
#define zone_statistics(_zl,_z) do { } while (0)

#endif /* CONFIG_NUMA */

#define __add_zone_page_state(__z, __i, __d)	\
		__mod_zone_page_state(__z, __i, __d)
@@ -190,6 +196,8 @@ static inline void zap_zone_vm_stats(struct zone *zone)
	memset(zone->vm_stat, 0, sizeof(zone->vm_stat));
}

extern void inc_zone_state(struct zone *, enum zone_stat_item);

#ifdef CONFIG_SMP
void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int);
void __inc_zone_page_state(struct page *, enum zone_stat_item);
+2 −4
Original line number Diff line number Diff line
@@ -1209,10 +1209,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,

	zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
	page = __alloc_pages(gfp, order, zl);
	if (page && page_zone(page) == zl->zones[0]) {
		zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
		put_cpu();
	}
	if (page && page_zone(page) == zl->zones[0])
		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
	return page;
}

+1 −22
Original line number Diff line number Diff line
@@ -709,27 +709,6 @@ void drain_local_pages(void)
}
#endif /* CONFIG_PM */

static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
{
#ifdef CONFIG_NUMA
	pg_data_t *pg = z->zone_pgdat;
	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
	struct per_cpu_pageset *p;

	p = zone_pcp(z, cpu);
	if (pg == orig) {
		p->numa_hit++;
	} else {
		p->numa_miss++;
		zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
	}
	if (pg == NODE_DATA(numa_node_id()))
		p->local_node++;
	else
		p->other_node++;
#endif
}

/*
 * Free a 0-order page
 */
@@ -827,7 +806,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
	}

	__mod_page_state_zone(zone, pgalloc, 1 << order);
	zone_statistics(zonelist, zone, cpu);
	zone_statistics(zonelist, zone);
	local_irq_restore(flags);
	put_cpu();

Loading