Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9ea7df53 authored by Jan Kara's avatar Jan Kara Committed by Al Viro
Browse files

ext4: Rewrite ext4_page_mkwrite() to use generic helpers



Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This
removes the need of using i_alloc_sem to avoid races with truncate which
seems to be the wrong locking order according to lock ordering documented in
mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to
be problematic because we can decide to flush delay-allocated blocks which
will acquire s_umount semaphore - again creating unpleasant lock dependency
if not directly a deadlock.

Also add a check for frozen filesystem so that we don't busyloop in page fault
when the filesystem is frozen.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 58268691
Loading
Loading
Loading
Loading
+55 −51
Original line number Original line Diff line number Diff line
@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	struct page *page = vmf->page;
	struct page *page = vmf->page;
	loff_t size;
	loff_t size;
	unsigned long len;
	unsigned long len;
	int ret = -EINVAL;
	int ret;
	void *fsdata;
	struct file *file = vma->vm_file;
	struct file *file = vma->vm_file;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct address_space *mapping = inode->i_mapping;
	struct address_space *mapping = inode->i_mapping;
	handle_t *handle;
	get_block_t *get_block;
	int retries = 0;


	/*
	/*
	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
	 * This check is racy but catches the common case. We rely on
	 * get i_mutex because we are already holding mmap_sem.
	 * __block_page_mkwrite() to do a reliable check.
	 */
	 */
	down_read(&inode->i_alloc_sem);
	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	size = i_size_read(inode);
	/* Delalloc case is easy... */
	if (page->mapping != mapping || size <= page_offset(page)
	if (test_opt(inode->i_sb, DELALLOC) &&
	    || !PageUptodate(page)) {
	    !ext4_should_journal_data(inode) &&
		/* page got truncated from under us? */
	    !ext4_nonda_switch(inode->i_sb)) {
		goto out_unlock;
		do {
			ret = __block_page_mkwrite(vma, vmf,
						   ext4_da_get_block_prep);
		} while (ret == -ENOSPC &&
		       ext4_should_retry_alloc(inode->i_sb, &retries));
		goto out_ret;
	}
	}
	ret = 0;


	lock_page(page);
	lock_page(page);
	wait_on_page_writeback(page);
	size = i_size_read(inode);
	if (PageMappedToDisk(page)) {
	/* Page got truncated from under us? */
		up_read(&inode->i_alloc_sem);
	if (page->mapping != mapping || page_offset(page) > size) {
		return VM_FAULT_LOCKED;
		unlock_page(page);
		ret = VM_FAULT_NOPAGE;
		goto out;
	}
	}


	if (page->index == size >> PAGE_CACHE_SHIFT)
	if (page->index == size >> PAGE_CACHE_SHIFT)
		len = size & ~PAGE_CACHE_MASK;
		len = size & ~PAGE_CACHE_MASK;
	else
	else
		len = PAGE_CACHE_SIZE;
		len = PAGE_CACHE_SIZE;

	/*
	/*
	 * return if we have all the buffers mapped. This avoid
	 * Return if we have all the buffers mapped. This avoids the need to do
	 * the need to call write_begin/write_end which does a
	 * journal_start/journal_stop which can block and take a long time
	 * journal_start/journal_stop which can block and take
	 * long time
	 */
	 */
	if (page_has_buffers(page)) {
	if (page_has_buffers(page)) {
		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
					ext4_bh_unmapped)) {
					ext4_bh_unmapped)) {
			up_read(&inode->i_alloc_sem);
			/* Wait so that we don't change page under IO */
			return VM_FAULT_LOCKED;
			wait_on_page_writeback(page);
			ret = VM_FAULT_LOCKED;
			goto out;
		}
		}
	}
	}
	unlock_page(page);
	unlock_page(page);
	/*
	/* OK, we need to fill the hole... */
	 * OK, we need to fill the hole... Do write_begin write_end
	if (ext4_should_dioread_nolock(inode))
	 * to do block allocation/reservation.We are not holding
		get_block = ext4_get_block_write;
	 * inode.i__mutex here. That allow * parallel write_begin,
	else
	 * write_end call. lock_page prevent this from happening
		get_block = ext4_get_block;
	 * on the same page though
retry_alloc:
	 */
	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
	ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
	if (IS_ERR(handle)) {
			len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
		ret = VM_FAULT_SIGBUS;
	if (ret < 0)
		goto out;
		goto out_unlock;
	}
	ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
	ret = __block_page_mkwrite(vma, vmf, get_block);
			len, len, page, fsdata);
	if (!ret && ext4_should_journal_data(inode)) {
	if (ret < 0)
		if (walk_page_buffers(handle, page_buffers(page), 0,
		goto out_unlock;
			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
	ret = 0;
			unlock_page(page);

	/*
	 * write_begin/end might have created a dirty page and someone
	 * could wander in and start the IO.  Make sure that hasn't
	 * happened.
	 */
	lock_page(page);
	wait_on_page_writeback(page);
	up_read(&inode->i_alloc_sem);
	return VM_FAULT_LOCKED;
out_unlock:
	if (ret)
			ret = VM_FAULT_SIGBUS;
			ret = VM_FAULT_SIGBUS;
	up_read(&inode->i_alloc_sem);
			goto out;
		}
		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
	}
	ext4_journal_stop(handle);
	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
		goto retry_alloc;
out_ret:
	ret = block_page_mkwrite_return(ret);
out:
	return ret;
	return ret;
}
}