Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 778dd893 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

tmpfs: fix race between umount and swapoff



The use of igrab() in swapoff's shmem_unuse_inode() is just as vulnerable
to umount as that in shmem_writepage().

Fix this instance by extending the protection of shmem_swaplist_mutex
right across shmem_unuse_inode(): while it's on the list, the inode cannot
be evicted (and the filesystem cannot be unmounted) without
shmem_evict_inode() taking that mutex to remove it from the list.

But since shmem_writepage() might take that mutex, we should avoid making
memory allocations or memcg charges while holding it: prepare them at the
outer level in shmem_unuse().  When mem_cgroup_cache_charge() was
originally placed, we didn't know until that point that the page from swap
was actually a shmem page; but nowadays it's noted in the swap_map, so
we're safe to charge upfront.  For the radix_tree, do as is done in
shmem_getpage(): preload upfront, but don't pin to the cpu; so we make a
habit of refreshing the node pool, but might dip into GFP_NOWAIT reserves
on occasion if subsequently preempted.

With the allocation and charge moved out from shmem_unuse_inode(),
we can also hold index map and info->lock over from finding the entry.

Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b1dea800
Loading
Loading
Loading
Loading
+43 −45
Original line number Original line Diff line number Diff line
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_


static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
{
{
	struct inode *inode;
	struct address_space *mapping;
	unsigned long idx;
	unsigned long idx;
	unsigned long size;
	unsigned long size;
	unsigned long limit;
	unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
	if (size > SHMEM_NR_DIRECT)
	if (size > SHMEM_NR_DIRECT)
		size = SHMEM_NR_DIRECT;
		size = SHMEM_NR_DIRECT;
	offset = shmem_find_swp(entry, ptr, ptr+size);
	offset = shmem_find_swp(entry, ptr, ptr+size);
	if (offset >= 0)
	if (offset >= 0) {
		shmem_swp_balance_unmap();
		goto found;
		goto found;
	}
	if (!info->i_indirect)
	if (!info->i_indirect)
		goto lost2;
		goto lost2;


@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
			if (size > ENTRIES_PER_PAGE)
			if (size > ENTRIES_PER_PAGE)
				size = ENTRIES_PER_PAGE;
				size = ENTRIES_PER_PAGE;
			offset = shmem_find_swp(entry, ptr, ptr+size);
			offset = shmem_find_swp(entry, ptr, ptr+size);
			shmem_swp_unmap(ptr);
			if (offset >= 0) {
			if (offset >= 0) {
				shmem_dir_unmap(dir);
				shmem_dir_unmap(dir);
				goto found;
				goto found;
			}
			}
			shmem_swp_unmap(ptr);
		}
		}
	}
	}
lost1:
lost1:
@@ -928,8 +930,7 @@ lost2:
	return 0;
	return 0;
found:
found:
	idx += offset;
	idx += offset;
	inode = igrab(&info->vfs_inode);
	ptr += offset;
	spin_unlock(&info->lock);


	/*
	/*
	 * Move _head_ to start search for next from here.
	 * Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
	 */
	 */
	if (shmem_swaplist.next != &info->swaplist)
	if (shmem_swaplist.next != &info->swaplist)
		list_move_tail(&shmem_swaplist, &info->swaplist);
		list_move_tail(&shmem_swaplist, &info->swaplist);
	mutex_unlock(&shmem_swaplist_mutex);


	error = 1;
	if (!inode)
		goto out;
	/*
	/*
	 * Charge page using GFP_KERNEL while we can wait.
	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
	 * Charged back to the user(not to caller) when swap account is used.
	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
	 * add_to_page_cache() will be called with GFP_NOWAIT.
	 * beneath us (pagelock doesn't help until the page is in pagecache).
	 */
	 */
	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
	mapping = info->vfs_inode.i_mapping;
	if (error)
	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
		goto out;
	/* which does mem_cgroup_uncharge_cache_page on error */
	error = radix_tree_preload(GFP_KERNEL);
	if (error) {
		mem_cgroup_uncharge_cache_page(page);
		goto out;
	}
	error = 1;

	spin_lock(&info->lock);
	ptr = shmem_swp_entry(info, idx, NULL);
	if (ptr && ptr->val == entry.val) {
		error = add_to_page_cache_locked(page, inode->i_mapping,
						idx, GFP_NOWAIT);
		/* does mem_cgroup_uncharge_cache_page on error */
	} else	/* we must compensate for our precharge above */
		mem_cgroup_uncharge_cache_page(page);


	if (error == -EEXIST) {
	if (error == -EEXIST) {
		struct page *filepage = find_get_page(inode->i_mapping, idx);
		struct page *filepage = find_get_page(mapping, idx);
		error = 1;
		error = 1;
		if (filepage) {
		if (filepage) {
			/*
			/*
@@ -990,14 +972,8 @@ found:
		swap_free(entry);
		swap_free(entry);
		error = 1;	/* not an error, but entry was found */
		error = 1;	/* not an error, but entry was found */
	}
	}
	if (ptr)
	shmem_swp_unmap(ptr);
	shmem_swp_unmap(ptr);
	spin_unlock(&info->lock);
	spin_unlock(&info->lock);
	radix_tree_preload_end();
out:
	unlock_page(page);
	page_cache_release(page);
	iput(inode);		/* allows for NULL */
	return error;
	return error;
}
}


@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
	struct list_head *p, *next;
	struct list_head *p, *next;
	struct shmem_inode_info *info;
	struct shmem_inode_info *info;
	int found = 0;
	int found = 0;
	int error;

	/*
	 * Charge page using GFP_KERNEL while we can wait, before taking
	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
	 * Charged back to the user (not to caller) when swap account is used.
	 * add_to_page_cache() will be called with GFP_NOWAIT.
	 */
	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
	if (error)
		goto out;
	/*
	 * Try to preload while we can wait, to not make a habit of
	 * draining atomic reserves; but don't latch on to this cpu,
	 * it's okay if sometimes we get rescheduled after this.
	 */
	error = radix_tree_preload(GFP_KERNEL);
	if (error)
		goto uncharge;
	radix_tree_preload_end();


	mutex_lock(&shmem_swaplist_mutex);
	mutex_lock(&shmem_swaplist_mutex);
	list_for_each_safe(p, next, &shmem_swaplist) {
	list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
		found = shmem_unuse_inode(info, entry, page);
		found = shmem_unuse_inode(info, entry, page);
		cond_resched();
		cond_resched();
		if (found)
		if (found)
			goto out;
			break;
	}
	}
	mutex_unlock(&shmem_swaplist_mutex);
	mutex_unlock(&shmem_swaplist_mutex);
	/*

	 * Can some race bring us here?  We've been holding page lock,
uncharge:
	 * so I think not; but would rather try again later than BUG()
	if (!found)
	 */
		mem_cgroup_uncharge_cache_page(page);
	if (found < 0)
		error = found;
out:
	unlock_page(page);
	unlock_page(page);
	page_cache_release(page);
	page_cache_release(page);
out:
	return error;
	return (found < 0) ? found : 0;
}
}


/*
/*