Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6f739662 authored by Vinayak Menon's avatar Vinayak Menon
Browse files

mm: zbud: fix the locking scenarios with zcache



With zcache using zbud, strange locking scenarios are
observed. The first problem seen is:

Core 2 waiting on mapping->tree_lock which is taken by core 6
do_raw_spin_lock

raw_spin_lock_irq
atomic_cmpxchg
page_freeze_refs
__remove_mapping
shrink_page_list

Core 6 after taking mapping->tree_lock is waiting on zbud pool lock
which is held by core 5
zbud_alloc
zcache_store_page
__cleancache_put_page
cleancache_put_page
__delete_from_page_cache
spin_unlock_irq
__remove_mapping
shrink_page_list
shrink_inactive_list

Core 5 after taking zbud pool lock from zbud_free received an IRQ, and
after IRQ exit, softirqs were scheduled and end_page_writeback tried to
lock on mapping->tree_lock which is already held by Core 6. Deadlock.
do_raw_spin_lock
raw_spin_lock_irqsave
test_clear_page_writeba
end_page_writeback
ext4_finish_bio
ext4_end_bio
bio_endio
blk_update_request
end_clone_bio
bio_endio
blk_update_request
blk_update_bidi_request
blk_end_bidi_request
blk_end_request
mmc_blk_cmdq_complete_r
mmc_cmdq_softirq_done
blk_done_softirq
static_key_count
static_key_false
trace_softirq_exit
__do_softirq()
tick_irq_exit
irq_exit()
set_irq_regs
__handle_domain_irq
gic_handle_irq
el1_irq
exception
__list_del_entry
list_del
zbud_free
zcache_load_page
__cleancache_get_page(?

This shows that allowing softirqs while holding zbud pool lock
can result in deadlocks. To fix this, 'commit 6a1fdaa3
(mm: zbud: prevent softirq during zbud alloc, free and reclaim)'
decided to take spin_lock_bh during zbud_free, zbud_alloc and
zbud_reclaim. But this resulted in another deadlock.

spin_bug()
do_raw_spin_lock()
_raw_spin_lock_irqsave()
test_clear_page_writeback()
end_page_writeback()
ext4_finish_bio()
ext4_end_bio()
bio_endio()
blk_update_request()
end_clone_bio()
bio_endio()
blk_update_request()
blk_update_bidi_request()
blk_end_request()
mmc_blk_cmdq_complete_rq()
mmc_cmdq_softirq_done()
blk_done_softirq()
__do_softirq()
do_softirq()
__local_bh_enable_ip()
_raw_spin_unlock_bh()
zbud_alloc()
zcache_store_page()
__cleancache_put_page()
__delete_from_page_cache()
__remove_mapping()
shrink_page_list()

Here, the spin_unlock_bh resulted in explicit invocation of do_sofirq,
which resulted in the acquisition of mapping->tree_lock which was already
taken by __remove_mapping.

The new fix considers the following facts.
1) zcache_store_page is always be called from __delete_from_page_cache
with mapping->tree_lock held and interrupts disabled. Thus zbud_alloc
which is called only from zcache_store_page is always called with
interrupts disabled.
2) zbud_free and zbud_reclaim_page can be called from zcache with or
without interrupts disabled. So an interrupt while holding zbud pool
lock can result in do_softirq and acquisition of mapping->tree_lock.

(1) implies zbud_alloc need not explicitly disable bh. But disable
interrupts to make sure zbud_alloc is safe with zcache, irrespective
of future changes. This will fix the second scenario.
(2) implies zbud_free and zbud_reclaim_page should use spin_lock_irqsave,
so that interrupts will not be triggered and inturn softirqs.
spin_lock_bh can't be used because a spin_unlock_bh can triger a softirq
even in interrupt context. This will fix the first scenario.

Change-Id: Ibc810525dddf97614db41643642fec7472bd6a2c
Signed-off-by: default avatarVinayak Menon <vinmenon@codeaurora.org>
parent 450bd44d
Loading
Loading
Loading
Loading
+16 −13
Original line number Diff line number Diff line
@@ -342,6 +342,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
	struct zbud_header *zhdr = NULL;
	enum buddy bud;
	struct page *page;
	unsigned long flags;
	int found = 0;

	if (!size || (gfp & __GFP_HIGHMEM))
@@ -349,7 +350,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
		return -ENOSPC;
	chunks = size_to_chunks(size);
	spin_lock_bh(&pool->lock);
	spin_lock_irqsave(&pool->lock, flags);

	/* First, try to find an unbuddied zbud page. */
	zhdr = NULL;
@@ -368,11 +369,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
	}

	/* Couldn't find unbuddied zbud page, create new one */
	spin_unlock_bh(&pool->lock);
	spin_unlock_irqrestore(&pool->lock, flags);
	page = alloc_page(gfp);
	if (!page)
		return -ENOMEM;
	spin_lock_bh(&pool->lock);
	spin_lock_irqsave(&pool->lock, flags);
	pool->pages_nr++;
	zhdr = init_zbud_page(page);
	bud = FIRST;
@@ -400,7 +401,7 @@ found:
	*handle = encode_handle(zhdr, bud);
	if ((gfp & __GFP_ZERO) && found)
		memset((void *)*handle, 0, size);
	spin_unlock_bh(&pool->lock);
	spin_unlock_irqrestore(&pool->lock, flags);

	return 0;
}
@@ -419,8 +420,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
{
	struct zbud_header *zhdr;
	int freechunks;
	unsigned long flags;

	spin_lock_bh(&pool->lock);
	spin_lock_irqsave(&pool->lock, flags);
	zhdr = handle_to_zbud_header(handle);

	/* If first buddy, handle will be page aligned */
@@ -431,7 +433,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)

	if (zhdr->under_reclaim) {
		/* zbud page is under reclaim, reclaim will free */
		spin_unlock_bh(&pool->lock);
		spin_unlock_irqrestore(&pool->lock, flags);
		return;
	}

@@ -449,7 +451,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
	}

	spin_unlock_bh(&pool->lock);
	spin_unlock_irqrestore(&pool->lock, flags);
}

#define list_tail_entry(ptr, type, member) \
@@ -494,12 +496,13 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
{
	int i, ret, freechunks;
	struct zbud_header *zhdr;
	unsigned long flags;
	unsigned long first_handle = 0, last_handle = 0;

	spin_lock_bh(&pool->lock);
	spin_lock_irqsave(&pool->lock, flags);
	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
			retries == 0) {
		spin_unlock_bh(&pool->lock);
		spin_unlock_irqrestore(&pool->lock, flags);
		return -EINVAL;
	}
	for (i = 0; i < retries; i++) {
@@ -518,7 +521,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
			first_handle = encode_handle(zhdr, FIRST);
		if (zhdr->last_chunks)
			last_handle = encode_handle(zhdr, LAST);
		spin_unlock_bh(&pool->lock);
		spin_unlock_irqrestore(&pool->lock, flags);

		/* Issue the eviction callback(s) */
		if (first_handle) {
@@ -532,7 +535,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
				goto next;
		}
next:
		spin_lock_bh(&pool->lock);
		spin_lock_irqsave(&pool->lock, flags);
		zhdr->under_reclaim = false;
		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
			/*
@@ -541,7 +544,7 @@ next:
			 */
			free_zbud_page(zhdr);
			pool->pages_nr--;
			spin_unlock_bh(&pool->lock);
			spin_unlock_irqrestore(&pool->lock, flags);
			return 0;
		} else if (zhdr->first_chunks == 0 ||
				zhdr->last_chunks == 0) {
@@ -556,7 +559,7 @@ next:
		/* add to beginning of LRU */
		list_add(&zhdr->lru, &pool->lru);
	}
	spin_unlock_bh(&pool->lock);
	spin_unlock_irqrestore(&pool->lock, flags);
	return -EAGAIN;
}