Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a52633d8 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

mm, vmscan: move lru_lock to the node

Node-based reclaim requires node-based LRUs and locking.  This is a
preparation patch that just moves the lru_lock to the node so later
patches are easier to review.  It is a mechanical change but note this
patch makes contention worse because the LRU lock is hotter and direct
reclaim and kswapd can contend on the same lock even when reclaiming
from different zones.

Link: http://lkml.kernel.org/r/1467970510-21195-3-git-send-email-mgorman@techsingularity.net


Signed-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Reviewed-by: default avatarMinchan Kim <minchan@kernel.org>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 75ef7184
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.

8. LRU
        Each memcg has its own private LRU. Now, its handling is under global
	VM's control (means that it's handled under global zone->lru_lock).
	VM's control (means that it's handled under global zone_lru_lock).
	Almost all routines around memcg's LRU is called by global LRU's
	list management functions under zone->lru_lock().
	list management functions under zone_lru_lock().

	A special function is mem_cgroup_isolate_pages(). This scans
	memcg's private LRU and call __isolate_lru_page() to extract a page
+2 −2
Original line number Diff line number Diff line
@@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
   Other lock order is following:
   PG_locked.
   mm->page_table_lock
       zone->lru_lock
       zone_lru_lock
	  lock_page_cgroup.
  In many cases, just lock_page_cgroup() is called.
  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
  zone->lru_lock, it has no lock of its own.
  zone_lru_lock, it has no lock of its own.

2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)

+1 −1
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ struct page {
	 */
	union {
		struct list_head lru;	/* Pageout list, eg. active_list
					 * protected by zone->lru_lock !
					 * protected by zone_lru_lock !
					 * Can be used as a generic list
					 * by the page owner.
					 */
+8 −2
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ struct free_area {
struct pglist_data;

/*
 * zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
 * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
 * So add a wild amount of padding here to ensure that they fall into separate
 * cachelines.  There are very few zone structures in the machine, so space
 * consumption is not a concern here.
@@ -496,7 +496,6 @@ struct zone {
	/* Write-intensive fields used by page reclaim */

	/* Fields commonly accessed by the page reclaim scanner */
	spinlock_t		lru_lock;
	struct lruvec		lruvec;

	/*
@@ -690,6 +689,9 @@ typedef struct pglist_data {
	/* Number of pages migrated during the rate limiting time interval */
	unsigned long numabalancing_migrate_nr_pages;
#endif
	/* Write-intensive fields used by page reclaim */
	ZONE_PADDING(_pad1_)
	spinlock_t		lru_lock;

#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
	/*
@@ -721,6 +723,10 @@ typedef struct pglist_data {

#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
static inline spinlock_t *zone_lru_lock(struct zone *zone)
{
	return &zone->zone_pgdat->lru_lock;
}

static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
+5 −5
Original line number Diff line number Diff line
@@ -752,7 +752,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
		 * if contended.
		 */
		if (!(low_pfn % SWAP_CLUSTER_MAX)
		    && compact_unlock_should_abort(&zone->lru_lock, flags,
		    && compact_unlock_should_abort(zone_lru_lock(zone), flags,
								&locked, cc))
			break;

@@ -813,7 +813,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
			if (unlikely(__PageMovable(page)) &&
					!PageIsolated(page)) {
				if (locked) {
					spin_unlock_irqrestore(&zone->lru_lock,
					spin_unlock_irqrestore(zone_lru_lock(zone),
									flags);
					locked = false;
				}
@@ -836,7 +836,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,

		/* If we already hold the lock, we can skip some rechecking */
		if (!locked) {
			locked = compact_trylock_irqsave(&zone->lru_lock,
			locked = compact_trylock_irqsave(zone_lru_lock(zone),
								&flags, cc);
			if (!locked)
				break;
@@ -899,7 +899,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
		 */
		if (nr_isolated) {
			if (locked) {
				spin_unlock_irqrestore(&zone->lru_lock,	flags);
				spin_unlock_irqrestore(zone_lru_lock(zone), flags);
				locked = false;
			}
			acct_isolated(zone, cc);
@@ -927,7 +927,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
		low_pfn = end_pfn;

	if (locked)
		spin_unlock_irqrestore(&zone->lru_lock, flags);
		spin_unlock_irqrestore(zone_lru_lock(zone), flags);

	/*
	 * Update the pageblock-skip information and cached scanner pfn,
Loading