Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 55779ec7 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

mm: fix vm-scalability regression in cgroup-aware workingset code

Commit 23047a96 ("mm: workingset: per-cgroup cache thrash
detection") added a page->mem_cgroup lookup to the cache eviction,
refault, and activation paths, as well as locking to the activation
path, and the vm-scalability tests showed a regression of -23%.

While the test in question is an artificial worst-case scenario that
doesn't occur in real workloads - reading two sparse files in parallel
at full CPU speed just to hammer the LRU paths - there is still some
optimizations that can be done in those paths.

Inline the lookup functions to eliminate calls.  Also, page->mem_cgroup
doesn't need to be stabilized when counting an activation; we merely
need to hold the RCU lock to prevent the memcg from being freed.

This cuts down on overhead quite a bit:

23047a96 063f6715e77a7be5770d6081fe
---------------- --------------------------
         %stddev     %change         %stddev
             \          |                \
  21621405 +- 0%     +11.3%   24069657 +- 2%  vm-scalability.throughput

[linux@roeck-us.net: drop unnecessary include file]
[hannes@cmpxchg.org: add WARN_ON_ONCE()s]
  Link: http://lkml.kernel.org/r/20160707194024.GA26580@cmpxchg.org
Link: http://lkml.kernel.org/r/20160624175101.GA3024@cmpxchg.org


Reported-by: default avatarYe Xiaolong <xiaolong.ye@intel.com>
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: default avatarGuenter Roeck <linux@roeck-us.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 400bc7fd
Loading
Loading
Loading
Loading
+42 −1
Original line number Diff line number Diff line
@@ -314,7 +314,48 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);

void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);

struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
static inline struct mem_cgroup_per_zone *
mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
{
	int nid = zone_to_nid(zone);
	int zid = zone_idx(zone);

	return &memcg->nodeinfo[nid]->zoneinfo[zid];
}

/**
 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
 * @zone: zone of the wanted lruvec
 * @memcg: memcg of the wanted lruvec
 *
 * Returns the lru list vector holding pages for the given @zone and
 * @mem.  This can be the global zone lruvec, if the memory controller
 * is disabled.
 */
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
						    struct mem_cgroup *memcg)
{
	struct mem_cgroup_per_zone *mz;
	struct lruvec *lruvec;

	if (mem_cgroup_disabled()) {
		lruvec = &zone->lruvec;
		goto out;
	}

	mz = mem_cgroup_zone_zoneinfo(memcg, zone);
	lruvec = &mz->lruvec;
out:
	/*
	 * Since a node can be onlined after the mem_cgroup was created,
	 * we have to be prepared to initialize lruvec->zone here;
	 * and if offlined then reonlined, we need to reinitialize it.
	 */
	if (unlikely(lruvec->zone != zone))
		lruvec->zone = zone;
	return lruvec;
}

struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);

bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
+10 −0
Original line number Diff line number Diff line
@@ -973,11 +973,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
{
	return page->mem_cgroup;
}
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
	WARN_ON_ONCE(!rcu_read_lock_held());
	return READ_ONCE(page->mem_cgroup);
}
#else
static inline struct mem_cgroup *page_memcg(struct page *page)
{
	return NULL;
}
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
	WARN_ON_ONCE(!rcu_read_lock_held());
	return NULL;
}
#endif

/*
+0 −42
Original line number Diff line number Diff line
@@ -323,15 +323,6 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);

#endif /* !CONFIG_SLOB */

static struct mem_cgroup_per_zone *
mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
{
	int nid = zone_to_nid(zone);
	int zid = zone_idx(zone);

	return &memcg->nodeinfo[nid]->zoneinfo[zid];
}

/**
 * mem_cgroup_css_from_page - css of the memcg associated with a page
 * @page: page of interest
@@ -943,39 +934,6 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
	     iter != NULL;				\
	     iter = mem_cgroup_iter(NULL, iter, NULL))

/**
 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
 * @zone: zone of the wanted lruvec
 * @memcg: memcg of the wanted lruvec
 *
 * Returns the lru list vector holding pages for the given @zone and
 * @mem.  This can be the global zone lruvec, if the memory controller
 * is disabled.
 */
struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
				      struct mem_cgroup *memcg)
{
	struct mem_cgroup_per_zone *mz;
	struct lruvec *lruvec;

	if (mem_cgroup_disabled()) {
		lruvec = &zone->lruvec;
		goto out;
	}

	mz = mem_cgroup_zone_zoneinfo(memcg, zone);
	lruvec = &mz->lruvec;
out:
	/*
	 * Since a node can be onlined after the mem_cgroup was created,
	 * we have to be prepared to initialize lruvec->zone here;
	 * and if offlined then reonlined, we need to reinitialize it.
	 */
	if (unlikely(lruvec->zone != zone))
		lruvec->zone = zone;
	return lruvec;
}

/**
 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
 * @page: the page
+6 −4
Original line number Diff line number Diff line
@@ -305,9 +305,10 @@ bool workingset_refault(void *shadow)
 */
void workingset_activation(struct page *page)
{
	struct mem_cgroup *memcg;
	struct lruvec *lruvec;

	lock_page_memcg(page);
	rcu_read_lock();
	/*
	 * Filter non-memcg pages here, e.g. unmap can call
	 * mark_page_accessed() on VDSO pages.
@@ -315,12 +316,13 @@ void workingset_activation(struct page *page)
	 * XXX: See workingset_refault() - this should return
	 * root_mem_cgroup even for !CONFIG_MEMCG.
	 */
	if (!mem_cgroup_disabled() && !page_memcg(page))
	memcg = page_memcg_rcu(page);
	if (!mem_cgroup_disabled() && !memcg)
		goto out;
	lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page));
	lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg);
	atomic_long_inc(&lruvec->inactive_age);
out:
	unlock_page_memcg(page);
	rcu_read_unlock();
}

/*