Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03891159 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull lazytime mount option support from Al Viro:
 "Lazytime stuff from tytso"

* 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ext4: add optimization for the lazytime mount option
  vfs: add find_inode_nowait() function
  vfs: add support for a lazytime mount option
parents 66dc830d a26f4992
Loading
Loading
Loading
Loading
+68 −2
Original line number Diff line number Diff line
@@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
	return 0;
}

struct other_inode {
	unsigned long		orig_ino;
	struct ext4_inode	*raw_inode;
};

static int other_inode_match(struct inode * inode, unsigned long ino,
			     void *data)
{
	struct other_inode *oi = (struct other_inode *) data;

	if ((inode->i_ino != ino) ||
	    (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
			       I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
	    ((inode->i_state & I_DIRTY_TIME) == 0))
		return 0;
	spin_lock(&inode->i_lock);
	if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
				I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
	    (inode->i_state & I_DIRTY_TIME)) {
		struct ext4_inode_info	*ei = EXT4_I(inode);

		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
		spin_unlock(&inode->i_lock);

		spin_lock(&ei->i_raw_lock);
		EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
		EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
		EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
		ext4_inode_csum_set(inode, oi->raw_inode, ei);
		spin_unlock(&ei->i_raw_lock);
		trace_ext4_other_inode_update_time(inode, oi->orig_ino);
		return -1;
	}
	spin_unlock(&inode->i_lock);
	return -1;
}

/*
 * Opportunistically update the other time fields for other inodes in
 * the same inode table block.
 */
static void ext4_update_other_inodes_time(struct super_block *sb,
					  unsigned long orig_ino, char *buf)
{
	struct other_inode oi;
	unsigned long ino;
	int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
	int inode_size = EXT4_INODE_SIZE(sb);

	oi.orig_ino = orig_ino;
	ino = orig_ino & ~(inodes_per_block - 1);
	for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
		if (ino == orig_ino)
			continue;
		oi.raw_inode = (struct ext4_inode *) buf;
		(void) find_inode_nowait(sb, ino, other_inode_match, &oi);
	}
}

/*
 * Post the struct inode info into an on-disk inode location in the
 * buffer-cache.  This gobbles the caller's reference to the
@@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle,
				cpu_to_le16(ei->i_extra_isize);
		}
	}

	ext4_inode_csum_set(inode, raw_inode, ei);

	spin_unlock(&ei->i_raw_lock);
	if (inode->i_sb->s_flags & MS_LAZYTIME)
		ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
					      bh->b_data);

	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
	rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 * If the inode is marked synchronous, we don't honour that here - doing
 * so would cause a commit on atime updates, which we don't bother doing.
 * We handle synchronous inodes at the highest possible level.
 *
 * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
 * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
 * to copy into the on-disk inode structure are the timestamp files.
 */
void ext4_dirty_inode(struct inode *inode, int flags)
{
	handle_t *handle;

	if (flags == I_DIRTY_TIME)
		return;
	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	if (IS_ERR(handle))
		goto out;
+10 −0
Original line number Diff line number Diff line
@@ -1126,6 +1126,7 @@ enum {
	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
	Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
	Opt_lazytime, Opt_nolazytime,
	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
	Opt_inode_readahead_blks, Opt_journal_ioprio,
	Opt_dioread_nolock, Opt_dioread_lock,
@@ -1190,6 +1191,8 @@ static const match_table_t tokens = {
	{Opt_dax, "dax"},
	{Opt_stripe, "stripe=%u"},
	{Opt_delalloc, "delalloc"},
	{Opt_lazytime, "lazytime"},
	{Opt_nolazytime, "nolazytime"},
	{Opt_nodelalloc, "nodelalloc"},
	{Opt_removed, "mblk_io_submit"},
	{Opt_removed, "nomblk_io_submit"},
@@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
	case Opt_i_version:
		sb->s_flags |= MS_I_VERSION;
		return 1;
	case Opt_lazytime:
		sb->s_flags |= MS_LAZYTIME;
		return 1;
	case Opt_nolazytime:
		sb->s_flags &= ~MS_LAZYTIME;
		return 1;
	}

	for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
	}
#endif

	*flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
	kfree(orig_data);
	return 0;
+51 −11
Original line number Diff line number Diff line
@@ -253,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
	return ret;
}

#define EXPIRE_DIRTY_ATIME 0x0001

/*
 * Move expired (dirtied before work->older_than_this) dirty inodes from
 * @delaying_queue to @dispatch_queue.
 */
static int move_expired_inodes(struct list_head *delaying_queue,
			       struct list_head *dispatch_queue,
			       int flags,
			       struct wb_writeback_work *work)
{
	unsigned long *older_than_this = NULL;
	unsigned long expire_time;
	LIST_HEAD(tmp);
	struct list_head *pos, *node;
	struct super_block *sb = NULL;
@@ -268,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
	int do_sb_sort = 0;
	int moved = 0;

	if ((flags & EXPIRE_DIRTY_ATIME) == 0)
		older_than_this = work->older_than_this;
	else if ((work->reason == WB_REASON_SYNC) == 0) {
		expire_time = jiffies - (HZ * 86400);
		older_than_this = &expire_time;
	}
	while (!list_empty(delaying_queue)) {
		inode = wb_inode(delaying_queue->prev);
		if (work->older_than_this &&
		    inode_dirtied_after(inode, *work->older_than_this))
		if (older_than_this &&
		    inode_dirtied_after(inode, *older_than_this))
			break;
		list_move(&inode->i_wb_list, &tmp);
		moved++;
		if (flags & EXPIRE_DIRTY_ATIME)
			set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
		if (sb_is_blkdev_sb(inode->i_sb))
			continue;
		if (sb && sb != inode->i_sb)
@@ -315,9 +328,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
{
	int moved;

	assert_spin_locked(&wb->list_lock);
	list_splice_init(&wb->b_more_io, &wb->b_io);
	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
				     EXPIRE_DIRTY_ATIME, work);
	trace_writeback_queue_io(wb, work, moved);
}

@@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
		 * updates after data IO completion.
		 */
		redirty_tail(inode, wb);
	} else if (inode->i_state & I_DIRTY_TIME) {
		list_move(&inode->i_wb_list, &wb->b_dirty_time);
	} else {
		/* The inode is clean. Remove from writeback lists. */
		list_del_init(&inode->i_wb_list);
@@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
	spin_lock(&inode->i_lock);

	dirty = inode->i_state & I_DIRTY;
	inode->i_state &= ~I_DIRTY;
	if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
	     (inode->i_state & I_DIRTY_TIME)) ||
	    (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
		dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
		trace_writeback_lazytime(inode);
	}
	inode->i_state &= ~dirty;

	/*
	 * Paired with smp_mb() in __mark_inode_dirty().  This allows
@@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)

	spin_unlock(&inode->i_lock);

	if (dirty & I_DIRTY_TIME)
		mark_inode_dirty_sync(inode);
	/* Don't write the inode if only I_DIRTY_PAGES was set */
	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
	if (dirty & ~I_DIRTY_PAGES) {
		int err = write_inode(inode, wbc);
		if (ret == 0)
			ret = err;
@@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
	 * make sure inode is on some writeback list and leave it there unless
	 * we have completely cleaned the inode.
	 */
	if (!(inode->i_state & I_DIRTY) &&
	if (!(inode->i_state & I_DIRTY_ALL) &&
	    (wbc->sync_mode != WB_SYNC_ALL ||
	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
		goto out;
@@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
	 * If inode is clean, remove it from writeback lists. Otherwise don't
	 * touch it. See comment above for explanation.
	 */
	if (!(inode->i_state & I_DIRTY))
	if (!(inode->i_state & I_DIRTY_ALL))
		list_del_init(&inode->i_wb_list);
	spin_unlock(&wb->list_lock);
	inode_sync_complete(inode);
@@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb,
		wrote += write_chunk - wbc.nr_to_write;
		spin_lock(&wb->list_lock);
		spin_lock(&inode->i_lock);
		if (!(inode->i_state & I_DIRTY))
		if (!(inode->i_state & I_DIRTY_ALL))
			wrote++;
		requeue_inode(inode, wb, &wbc);
		inode_sync_complete(inode);
@@ -1151,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 * page->mapping->host, so the page-dirtying time is recorded in the internal
 * blockdev inode.
 */
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
void __mark_inode_dirty(struct inode *inode, int flags)
{
	struct super_block *sb = inode->i_sb;
	struct backing_dev_info *bdi = NULL;
	int dirtytime;

	trace_writeback_mark_inode_dirty(inode, flags);

	/*
	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
	 * dirty the inode itself
	 */
	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
		trace_writeback_dirty_inode_start(inode, flags);

		if (sb->s_op->dirty_inode)
@@ -1168,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)

		trace_writeback_dirty_inode(inode, flags);
	}
	if (flags & I_DIRTY_INODE)
		flags &= ~I_DIRTY_TIME;
	dirtytime = flags & I_DIRTY_TIME;

	/*
	 * Paired with smp_mb() in __writeback_single_inode() for the
@@ -1175,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags)
	 */
	smp_mb();

	if ((inode->i_state & flags) == flags)
	if (((inode->i_state & flags) == flags) ||
	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
		return;

	if (unlikely(block_dump))
		block_dump___mark_inode_dirty(inode);

	spin_lock(&inode->i_lock);
	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
		goto out_unlock_inode;
	if ((inode->i_state & flags) != flags) {
		const int was_dirty = inode->i_state & I_DIRTY;

		if (flags & I_DIRTY_INODE)
			inode->i_state &= ~I_DIRTY_TIME;
		inode->i_state |= flags;

		/*
@@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
			}

			inode->dirtied_when = jiffies;
			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
			list_move(&inode->i_wb_list, dirtytime ?
				  &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
			spin_unlock(&bdi->wb.list_lock);
			trace_writeback_dirty_inode_enqueue(inode);

			if (wakeup_bdi)
				bdi_wakeup_thread_delayed(bdi);
+2 −2
Original line number Diff line number Diff line
@@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	int sync_state = inode->i_state & I_DIRTY;
	int sync_state = inode->i_state & I_DIRTY_ALL;
	struct gfs2_inode *ip = GFS2_I(inode);
	int ret = 0, ret1 = 0;

@@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
	if (!gfs2_is_jdata(ip))
		sync_state &= ~I_DIRTY_PAGES;
	if (datasync)
		sync_state &= ~I_DIRTY_SYNC;
		sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);

	if (sync_state) {
		ret = sync_inode_metadata(inode, 1);
+90 −16
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include <trace/events/writeback.h>
#include "internal.h"

/*
@@ -30,7 +31,7 @@
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
 * bdi->wb.list_lock protects:
 *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
 *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
 * inode_hash_lock protects:
 *   inode_hashtable, inode->i_hash
 *
@@ -403,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode)
 */
void inode_add_lru(struct inode *inode)
{
	if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
	if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
				I_FREEING | I_WILL_FREE)) &&
	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
		inode_lru_list_add(inode);
}
@@ -634,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
			spin_unlock(&inode->i_lock);
			continue;
		}
		if (inode->i_state & I_DIRTY && !kill_dirty) {
		if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
			spin_unlock(&inode->i_lock);
			busy = 1;
			continue;
@@ -1268,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
}
EXPORT_SYMBOL(ilookup);

/**
 * find_inode_nowait - find an inode in the inode cache
 * @sb:		super block of file system to search
 * @hashval:	hash value (usually inode number) to search for
 * @match:	callback used for comparisons between inodes
 * @data:	opaque data pointer to pass to @match
 *
 * Search for the inode specified by @hashval and @data in the inode
 * cache, where the helper function @match will return 0 if the inode
 * does not match, 1 if the inode does match, and -1 if the search
 * should be stopped.  The @match function must be responsible for
 * taking the i_lock spin_lock and checking i_state for an inode being
 * freed or being initialized, and incrementing the reference count
 * before returning 1.  It also must not sleep, since it is called with
 * the inode_hash_lock spinlock held.
 *
 * This is a even more generalized version of ilookup5() when the
 * function must never block --- find_inode() can block in
 * __wait_on_freeing_inode() --- or when the caller can not increment
 * the reference count because the resulting iput() might cause an
 * inode eviction.  The tradeoff is that the @match funtion must be
 * very carefully implemented.
 */
struct inode *find_inode_nowait(struct super_block *sb,
				unsigned long hashval,
				int (*match)(struct inode *, unsigned long,
					     void *),
				void *data)
{
	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
	struct inode *inode, *ret_inode = NULL;
	int mval;

	spin_lock(&inode_hash_lock);
	hlist_for_each_entry(inode, head, i_hash) {
		if (inode->i_sb != sb)
			continue;
		mval = match(inode, hashval, data);
		if (mval == 0)
			continue;
		if (mval == 1)
			ret_inode = inode;
		goto out;
	}
out:
	spin_unlock(&inode_hash_lock);
	return ret_inode;
}
EXPORT_SYMBOL(find_inode_nowait);

int insert_inode_locked(struct inode *inode)
{
	struct super_block *sb = inode->i_sb;
@@ -1418,10 +1470,19 @@ static void iput_final(struct inode *inode)
 */
void iput(struct inode *inode)
{
	if (inode) {
	if (!inode)
		return;
	BUG_ON(inode->i_state & I_CLEAR);

		if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
retry:
	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
			atomic_inc(&inode->i_count);
			inode->i_state &= ~I_DIRTY_TIME;
			spin_unlock(&inode->i_lock);
			trace_writeback_lazytime_iput(inode);
			mark_inode_dirty_sync(inode);
			goto retry;
		}
		iput_final(inode);
	}
}
@@ -1481,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
	return 0;
}

/*
 * This does the actual work of updating an inodes time or version.  Must have
 * had called mnt_want_write() before calling this.
 */
static int update_time(struct inode *inode, struct timespec *time, int flags)
int generic_update_time(struct inode *inode, struct timespec *time, int flags)
{
	if (inode->i_op->update_time)
		return inode->i_op->update_time(inode, time, flags);
	int iflags = I_DIRTY_TIME;

	if (flags & S_ATIME)
		inode->i_atime = *time;
@@ -1498,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
		inode->i_ctime = *time;
	if (flags & S_MTIME)
		inode->i_mtime = *time;
	mark_inode_dirty_sync(inode);

	if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
		iflags |= I_DIRTY_SYNC;
	__mark_inode_dirty(inode, iflags);
	return 0;
}
EXPORT_SYMBOL(generic_update_time);

/*
 * This does the actual work of updating an inodes time or version.  Must have
 * had called mnt_want_write() before calling this.
 */
static int update_time(struct inode *inode, struct timespec *time, int flags)
{
	int (*update_time)(struct inode *, struct timespec *, int);

	update_time = inode->i_op->update_time ? inode->i_op->update_time :
		generic_update_time;

	return update_time(inode, time, flags);
}

/**
 *	touch_atime	-	update the access time
Loading