Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fe8bc91c authored by Jan Kara's avatar Jan Kara
Browse files

ext3: Wait for proper transaction commit on fsync



We cannot rely on buffer dirty bits during fsync because pdflush can come
before fsync is called and clear dirty bits without forcing a transaction
commit. What we do is that we track which transaction has last changed
the inode and which transaction last changed allocation and force it to
disk on fsync.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
parent ea0174a7
Loading
Loading
Loading
Loading
+16 −20
Original line number Diff line number Diff line
@@ -46,19 +46,21 @@
int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
{
	struct inode *inode = dentry->d_inode;
	struct ext3_inode_info *ei = EXT3_I(inode);
	journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
	int ret = 0;
	tid_t commit_tid;

	if (inode->i_sb->s_flags & MS_RDONLY)
		return 0;

	J_ASSERT(ext3_journal_current_handle() == NULL);

	/*
	 * data=writeback:
	 * data=writeback,ordered:
	 *  The caller's filemap_fdatawrite()/wait will sync the data.
	 *  sync_inode() will sync the metadata
	 *
	 * data=ordered:
	 *  The caller's filemap_fdatawrite() will write the data and
	 *  sync_inode() will write the inode if it is dirty.  Then the caller's
	 *  filemap_fdatawait() will wait on the pages.
	 *  Metadata is in the journal, we wait for a proper transaction
	 *  to commit here.
	 *
	 * data=journal:
	 *  filemap_fdatawrite won't do anything (the buffers are clean).
@@ -73,22 +75,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
		goto out;
	}

	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
		goto flush;
	if (datasync)
		commit_tid = atomic_read(&ei->i_datasync_tid);
	else
		commit_tid = atomic_read(&ei->i_sync_tid);

	/*
	 * The VFS has written the file data.  If the inode is unaltered
	 * then we need not start a commit.
	 */
	if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
		struct writeback_control wbc = {
			.sync_mode = WB_SYNC_ALL,
			.nr_to_write = 0, /* sys_fsync did this */
		};
		ret = sync_inode(inode, &wbc);
	if (log_start_commit(journal, commit_tid)) {
		log_wait_commit(journal, commit_tid);
		goto out;
	}
flush:

	/*
	 * In case we didn't commit a transaction, we have to flush
	 * disk caches manually so that data really is on persistent
+31 −1
Original line number Diff line number Diff line
@@ -699,8 +699,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
	int err = 0;
	struct ext3_block_alloc_info *block_i;
	ext3_fsblk_t current_block;
	struct ext3_inode_info *ei = EXT3_I(inode);

	block_i = EXT3_I(inode)->i_block_alloc_info;
	block_i = ei->i_block_alloc_info;
	/*
	 * If we're splicing into a [td]indirect block (as opposed to the
	 * inode) then we need to get write access to the [td]indirect block
@@ -741,6 +742,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,

	inode->i_ctime = CURRENT_TIME_SEC;
	ext3_mark_inode_dirty(handle, inode);
	/* ext3_mark_inode_dirty already updated i_sync_tid */
	atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);

	/* had we spliced it onto indirect block? */
	if (where->bh) {
@@ -2754,6 +2757,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
	struct ext3_inode_info *ei;
	struct buffer_head *bh;
	struct inode *inode;
	journal_t *journal = EXT3_SB(sb)->s_journal;
	transaction_t *transaction;
	long ret;
	int block;

@@ -2831,6 +2836,30 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
		ei->i_data[block] = raw_inode->i_block[block];
	INIT_LIST_HEAD(&ei->i_orphan);

	/*
	 * Set transaction id's of transactions that have to be committed
	 * to finish f[data]sync. We set them to currently running transaction
	 * as we cannot be sure that the inode or some of its metadata isn't
	 * part of the transaction - the inode could have been reclaimed and
	 * now it is reread from disk.
	 */
	if (journal) {
		tid_t tid;

		spin_lock(&journal->j_state_lock);
		if (journal->j_running_transaction)
			transaction = journal->j_running_transaction;
		else
			transaction = journal->j_committing_transaction;
		if (transaction)
			tid = transaction->t_tid;
		else
			tid = journal->j_commit_sequence;
		spin_unlock(&journal->j_state_lock);
		atomic_set(&ei->i_sync_tid, tid);
		atomic_set(&ei->i_datasync_tid, tid);
	}

	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
		/*
@@ -3015,6 +3044,7 @@ static int ext3_do_update_inode(handle_t *handle,
		err = rc;
	ei->i_state &= ~EXT3_STATE_NEW;

	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
out_brelse:
	brelse (bh);
	ext3_std_error(inode->i_sb, err);
+2 −0
Original line number Diff line number Diff line
@@ -466,6 +466,8 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
		return NULL;
	ei->i_block_alloc_info = NULL;
	ei->vfs_inode.i_version = 1;
	atomic_set(&ei->i_datasync_tid, 0);
	atomic_set(&ei->i_sync_tid, 0);
	return &ei->vfs_inode;
}

+8 −0
Original line number Diff line number Diff line
@@ -137,6 +137,14 @@ struct ext3_inode_info {
	 * by other means, so we have truncate_mutex.
	 */
	struct mutex truncate_mutex;

	/*
	 * Transactions that contain inode's metadata needed to complete
	 * fsync and fdatasync, respectively.
	 */
	atomic_t i_sync_tid;
	atomic_t i_datasync_tid;

	struct inode vfs_inode;
};