Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90e775b7 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o
Browse files

ext4: fix lost truncate due to race with writeback



The following race can lead to a loss of i_disksize update from truncate
thus resulting in a wrong inode size if the inode size isn't updated
again before inode is reclaimed:

ext4_setattr()				mpage_map_and_submit_extent()
  EXT4_I(inode)->i_disksize = attr->ia_size;
  ...					  ...
					  disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT
					  /* False because i_size isn't
					   * updated yet */
					  if (disksize > i_size_read(inode))
					  /* True, because i_disksize is
					   * already truncated */
					  if (disksize > EXT4_I(inode)->i_disksize)
					    /* Overwrite i_disksize
					     * update from truncate */
					    ext4_update_i_disksize()
  i_size_write(inode, attr->ia_size);

For other places updating i_disksize such race cannot happen because
i_mutex prevents these races. Writeback is the only place where we do
not hold i_mutex and we cannot grab it there because of lock ordering.

We fix the race by doing both i_disksize and i_size update in truncate
atomically under i_data_sem and in mpage_map_and_submit_extent() we move
the check against i_size under i_data_sem as well.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
parent 5208386c
Loading
Loading
Loading
Loading
+20 −4
Original line number Diff line number Diff line
@@ -2432,16 +2432,32 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0
#endif

/* Update i_disksize. Requires i_mutex to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
	WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
		     !mutex_is_locked(&inode->i_mutex));
	down_write(&EXT4_I(inode)->i_data_sem);
	if (newsize > EXT4_I(inode)->i_disksize)
		EXT4_I(inode)->i_disksize = newsize;
	up_write(&EXT4_I(inode)->i_data_sem);
}

/*
	 * XXX: replace with spinlock if seen contended -bzzz
 * Update i_disksize after writeback has been started. Races with truncate
 * are avoided by checking i_size under i_data_sem.
 */
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
{
	loff_t i_size;

	down_write(&EXT4_I(inode)->i_data_sem);
	i_size = i_size_read(inode);
	if (newsize > i_size)
		newsize = i_size;
	if (newsize > EXT4_I(inode)->i_disksize)
		EXT4_I(inode)->i_disksize = newsize;
	up_write(&EXT4_I(inode)->i_data_sem);
	return ;
}

struct ext4_group_info {
+12 −5
Original line number Diff line number Diff line
@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,

	/* Update on-disk size after IO is submitted */
	disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
	if (disksize > i_size_read(inode))
		disksize = i_size_read(inode);
	if (disksize > EXT4_I(inode)->i_disksize) {
		int err2;

		ext4_update_i_disksize(inode, disksize);
		ext4_wb_update_i_disksize(inode, disksize);
		err2 = ext4_mark_inode_dirty(handle, inode);
		if (err2)
			ext4_error(inode->i_sb,
@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
				error = ext4_orphan_add(handle, inode);
				orphan = 1;
			}
			down_write(&EXT4_I(inode)->i_data_sem);
			EXT4_I(inode)->i_disksize = attr->ia_size;
			rc = ext4_mark_inode_dirty(handle, inode);
			if (!error)
				error = rc;
			/*
			 * We have to update i_size under i_data_sem together
			 * with i_disksize to avoid races with writeback code
			 * running ext4_wb_update_i_disksize().
			 */
			if (!error)
				i_size_write(inode, attr->ia_size);
			up_write(&EXT4_I(inode)->i_data_sem);
			ext4_journal_stop(handle);
			if (error) {
				ext4_orphan_del(NULL, inode);
				goto err_out;
			}
		}

		} else
			i_size_write(inode, attr->ia_size);

		/*
		 * Blocks are going to be removed from the inode. Wait
		 * for dio in flight.  Temporarily disable