Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5cffff9e authored by Wengang Wang's avatar Wengang Wang Committed by Sunil Mushran
Browse files

ocfs2: Fix ocfs2_page_mkwrite()



This patch address two shortcomings in ocfs2_page_mkwrite():
1. Makes the function return better VM_FAULT_* errors.
2. It handles a error that is triggered when a page is dropped from the mapping
due to memory pressure. This patch locks the page to prevent that.

[Patch was cleaned up by Sunil Mushran.]

Signed-off-by: default avatarWengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: default avatarSunil Mushran <sunil.mushran@oracle.com>
parent a035bff6
Loading
Loading
Loading
Loading
+43 −8
Original line number Diff line number Diff line
@@ -862,6 +862,12 @@ struct ocfs2_write_ctxt {
	struct page			*w_pages[OCFS2_MAX_CTXT_PAGES];
	struct page			*w_target_page;

	/*
	 * w_target_locked is used for page_mkwrite path indicating no unlocking
	 * against w_target_page in ocfs2_write_end_nolock.
	 */
	unsigned int			w_target_locked:1;

	/*
	 * ocfs2_write_end() uses this to know what the real range to
	 * write in the target should be.
@@ -895,6 +901,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)

static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
{
	int i;

	/*
	 * w_target_locked is only set to true in the page_mkwrite() case.
	 * The intent is to allow us to lock the target page from write_begin()
	 * to write_end(). The caller must hold a ref on w_target_page.
	 */
	if (wc->w_target_locked) {
		BUG_ON(!wc->w_target_page);
		for (i = 0; i < wc->w_num_pages; i++) {
			if (wc->w_target_page == wc->w_pages[i]) {
				wc->w_pages[i] = NULL;
				break;
			}
		}
		mark_page_accessed(wc->w_target_page);
		page_cache_release(wc->w_target_page);
	}
	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);

	brelse(wc->w_di_bh);
@@ -1132,20 +1156,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
			 */
			lock_page(mmap_page);

			/* Exit and let the caller retry */
			if (mmap_page->mapping != mapping) {
				WARN_ON(mmap_page->mapping);
				unlock_page(mmap_page);
				/*
				 * Sanity check - the locking in
				 * ocfs2_pagemkwrite() should ensure
				 * that this code doesn't trigger.
				 */
				ret = -EINVAL;
				mlog_errno(ret);
				ret = -EAGAIN;
				goto out;
			}

			page_cache_get(mmap_page);
			wc->w_pages[i] = mmap_page;
			wc->w_target_locked = true;
		} else {
			wc->w_pages[i] = find_or_create_page(mapping, index,
							     GFP_NOFS);
@@ -1160,6 +1181,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
			wc->w_target_page = wc->w_pages[i];
	}
out:
	if (ret)
		wc->w_target_locked = false;
	return ret;
}

@@ -1817,11 +1840,23 @@ int ocfs2_write_begin_nolock(struct file *filp,
	 */
	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
					 cluster_of_pages, mmap_page);
	if (ret) {
	if (ret && ret != -EAGAIN) {
		mlog_errno(ret);
		goto out_quota;
	}

	/*
	 * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
	 * the target page. In this case, we exit with no error and no target
	 * page. This will trigger the caller, page_mkwrite(), to re-try
	 * the operation.
	 */
	if (ret == -EAGAIN) {
		BUG_ON(wc->w_target_page);
		ret = 0;
		goto out_quota;
	}

	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
					  len);
	if (ret) {
+24 −29
Original line number Diff line number Diff line
@@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
				struct page *page)
{
	int ret;
	int ret = VM_FAULT_NOPAGE;
	struct inode *inode = file->f_path.dentry->d_inode;
	struct address_space *mapping = inode->i_mapping;
	loff_t pos = page_offset(page);
@@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
	void *fsdata;
	loff_t size = i_size_read(inode);

	/*
	 * Another node might have truncated while we were waiting on
	 * cluster locks.
	 * We don't check size == 0 before the shift. This is borrowed
	 * from do_generic_file_read.
	 */
	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
	if (unlikely(!size || page->index > last_index)) {
		ret = -EINVAL;
		goto out;
	}

	/*
	 * The i_size check above doesn't catch the case where nodes
	 * truncated and then re-extended the file. We'll re-check the
	 * page mapping after taking the page lock inside of
	 * ocfs2_write_begin_nolock().
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
		/*
		 * the page has been umapped in ocfs2_data_downconvert_worker.
		 * So return 0 here and let VFS retry.
	 * There are cases that lead to the page no longer bebongs to the
	 * mapping.
	 * 1) pagecache truncates locally due to memory pressure.
	 * 2) pagecache truncates when another is taking EX lock against 
	 * inode lock. see ocfs2_data_convert_worker.
	 * 
	 * The i_size check doesn't catch the case where nodes truncated and
	 * then re-extended the file. We'll re-check the page mapping after
	 * taking the page lock inside of ocfs2_write_begin_nolock().
	 *
	 * Let VM retry with these cases.
	 */
		ret = 0;
	if ((page->mapping != inode->i_mapping) ||
	    (!PageUptodate(page)) ||
	    (page_offset(page) >= size))
		goto out;
	}

	/*
	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
@@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
	if (ret) {
		if (ret != -ENOSPC)
			mlog_errno(ret);
		if (ret == -ENOMEM)
			ret = VM_FAULT_OOM;
		else
			ret = VM_FAULT_SIGBUS;
		goto out;
	}

	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
				     fsdata);
	if (ret < 0) {
		mlog_errno(ret);
	if (!locked_page) {
		ret = VM_FAULT_NOPAGE;
		goto out;
	}
	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
				     fsdata);
	BUG_ON(ret != len);
	ret = 0;
	ret = VM_FAULT_LOCKED;
out:
	return ret;
}
@@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)

out:
	ocfs2_unblock_signals(&oldset);
	if (ret)
		ret = VM_FAULT_SIGBUS;
	return ret;
}