Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a2f48706 authored by Theodore Ts'o's avatar Theodore Ts'o
Browse files

fs: make sure the timestamps for lazytime inodes eventually get written



Jan Kara pointed out that if there is an inode which is constantly
getting dirtied with I_DIRTY_PAGES, an inode with an updated timestamp
will never be written since inode->dirtied_when is constantly getting
updated.  We fix this by adding an extra field to the inode,
dirtied_time_when, so inodes with a stale dirtytime can get detected
and handled.

In addition, if we have a dirtytime inode caused by an atime update,
and there is no write activity on the file system, we need to have a
secondary system to make sure these inodes get written out.  We do
this by setting up a second delayed work structure which wakes up the
CPU much more rarely compared to writeback_expire_centisecs.

Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
parent 13a7a6ac
Loading
Loading
Loading
Loading
+72 −10
Original line number Diff line number Diff line
@@ -53,6 +53,18 @@ struct wb_writeback_work {
	struct completion *done;	/* set if the caller waits */
};

/*
 * If an inode is constantly having its pages dirtied, but then the
 * updates stop dirtytime_expire_interval seconds in the past, it's
 * possible for the worst case time between when an inode has its
 * timestamps updated and when they finally get written out to be two
 * dirtytime_expire_intervals.  We set the default to 12 hours (in
 * seconds), which means most of the time inodes will have their
 * timestamps written to disk after 12 hours, but in the worst case a
 * few inodes might not their timestamps updated for 24 hours.
 */
unsigned int dirtytime_expire_interval = 12 * 60 * 60;

/**
 * writeback_in_progress - determine whether there is writeback in progress
 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,

	if ((flags & EXPIRE_DIRTY_ATIME) == 0)
		older_than_this = work->older_than_this;
	else if ((work->reason == WB_REASON_SYNC) == 0) {
		expire_time = jiffies - (HZ * 86400);
	else if (!work->for_sync) {
		expire_time = jiffies - (dirtytime_expire_interval * HZ);
		older_than_this = &expire_time;
	}
	while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
		 */
		redirty_tail(inode, wb);
	} else if (inode->i_state & I_DIRTY_TIME) {
		inode->dirtied_when = jiffies;
		list_move(&inode->i_wb_list, &wb->b_dirty_time);
	} else {
		/* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
	spin_lock(&inode->i_lock);

	dirty = inode->i_state & I_DIRTY;
	if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
	     (inode->i_state & I_DIRTY_TIME)) ||
	    (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
	if (inode->i_state & I_DIRTY_TIME) {
		if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
		    unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
		    unlikely(time_after(jiffies,
					(inode->dirtied_time_when +
					 dirtytime_expire_interval * HZ)))) {
			dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
			trace_writeback_lazytime(inode);
		}
	} else
		inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
	inode->i_state &= ~dirty;

	/*
@@ -1131,6 +1149,45 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
	rcu_read_unlock();
}

/*
 * Wake up bdi's periodically to make sure dirtytime inodes gets
 * written back periodically.  We deliberately do *not* check the
 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
 * kernel to be constantly waking up once there are any dirtytime
 * inodes on the system.  So instead we define a separate delayed work
 * function which gets called much more rarely.  (By default, only
 * once every 12 hours.)
 *
 * If there is any other write activity going on in the file system,
 * this function won't be necessary.  But if the only thing that has
 * happened on the file system is a dirtytime inode caused by an atime
 * update, we need this infrastructure below to make sure that inode
 * eventually gets pushed out to disk.
 */
static void wakeup_dirtytime_writeback(struct work_struct *w);
static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);

static void wakeup_dirtytime_writeback(struct work_struct *w)
{
	struct backing_dev_info *bdi;

	rcu_read_lock();
	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
		if (list_empty(&bdi->wb.b_dirty_time))
			continue;
		bdi_wakeup_thread(bdi);
	}
	rcu_read_unlock();
	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}

static int __init start_dirtytime_writeback(void)
{
	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
	return 0;
}
__initcall(start_dirtytime_writeback);

static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1326,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
			}

			inode->dirtied_when = jiffies;
			list_move(&inode->i_wb_list, dirtytime ?
				  &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
			if (dirtytime)
				inode->dirtied_time_when = jiffies;
			if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
				list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
			else
				list_move(&inode->i_wb_list,
					  &bdi->wb.b_dirty_time);
			spin_unlock(&bdi->wb.list_lock);
			trace_writeback_dirty_inode_enqueue(inode);

+1 −0
Original line number Diff line number Diff line
@@ -604,6 +604,7 @@ struct inode {
	struct mutex		i_mutex;

	unsigned long		dirtied_when;	/* jiffies of first dirtying */
	unsigned long		dirtied_time_when;

	struct hlist_node	i_hash;
	struct list_head	i_wb_list;	/* backing dev IO list */