Merge branch 'writeback' of git://git.kernel.dk/linux-2.6-block (6d7f18f6) · Commits · e / devices / android_kernel_sony_msm8994

fs/buffer.c

+5 −5

Original line number	Diff line number	Diff line
		@@ -280,7 +280,7 @@ void invalidate_bdev(struct block_device *bdev)
		EXPORT_SYMBOL(invalidate_bdev);

		/*
		* Kick pdflush then try to free up some ZONE_NORMAL memory.
		* Kick the writeback threads then try to free up some ZONE_NORMAL memory.
		*/
		static void free_more_memory(void)
		{
		@@ -1709,9 +1709,9 @@ static int __block_write_full_page(struct inode inode, struct page page,
		/*
		* If it's a fully non-blocking write attempt and we cannot
		* lock the buffer then redirty the page. Note that this can
		* potentially cause a busy-wait loop from pdflush and kswapd
		* activity, but those code paths have their own higher-level
		* throttling.
		* potentially cause a busy-wait loop from writeback threads
		* and kswapd activity, but those code paths have their own
		* higher-level throttling.
		*/
		if (wbc->sync_mode != WB_SYNC_NONE \|\| !wbc->nonblocking) {
		lock_buffer(bh);
		@@ -3208,7 +3208,7 @@ EXPORT_SYMBOL(block_sync_page);
		* still running obsolete flush daemons, so we terminate them here.
		*
		* Use of bdflush() is deprecated and will be removed in a future kernel.
		* The `pdflush' kernel threads fully replace bdflush daemons and this call.
		* The `flush-X' kernel threads fully replace bdflush daemons and this call.
		*/
		SYSCALL_DEFINE2(bdflush, int, func, long, data)
		{

fs/fs-writeback.c

+113 −48

Original line number	Diff line number	Diff line
		@@ -41,8 +41,9 @@ struct wb_writeback_args {
		long nr_pages;
		struct super_block *sb;
		enum writeback_sync_modes sync_mode;
		int for_kupdate;
		int range_cyclic;
		int for_kupdate:1;
		int range_cyclic:1;
		int for_background:1;
		};

		/*
		@@ -257,6 +258,15 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
		.range_cyclic = 1,
		};

		/*
		* We treat @nr_pages=0 as the special case to do background writeback,
		* ie. to sync pages until the background dirty threshold is reached.
		*/
		if (!nr_pages) {
		args.nr_pages = LONG_MAX;
		args.for_background = 1;
		}

		bdi_alloc_queue_work(bdi, &args);
		}

		@@ -310,7 +320,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
		* For inodes being constantly redirtied, dirtied_when can get stuck.
		* It _appears_ to be in the future, but is actually in distant past.
		* This test is necessary to prevent such wrapped-around relative times
		* from permanently stopping the whole pdflush writeback.
		* from permanently stopping the whole bdi writeback.
		*/
		ret = ret && time_before_eq(inode->dirtied_when, jiffies);
		#endif
		@@ -324,15 +334,40 @@ static void move_expired_inodes(struct list_head *delaying_queue,
		struct list_head *dispatch_queue,
		unsigned long *older_than_this)
		{
		LIST_HEAD(tmp);
		struct list_head pos, node;
		struct super_block *sb = NULL;
		struct inode *inode;
		int do_sb_sort = 0;

		while (!list_empty(delaying_queue)) {
		struct inode *inode = list_entry(delaying_queue->prev,
		struct inode, i_list);
		inode = list_entry(delaying_queue->prev, struct inode, i_list);
		if (older_than_this &&
		inode_dirtied_after(inode, *older_than_this))
		break;
		if (sb && sb != inode->i_sb)
		do_sb_sort = 1;
		sb = inode->i_sb;
		list_move(&inode->i_list, &tmp);
		}

		/* just one sb in list, splice to dispatch_queue and we're done */
		if (!do_sb_sort) {
		list_splice(&tmp, dispatch_queue);
		return;
		}

		/* Move inodes from one superblock together */
		while (!list_empty(&tmp)) {
		inode = list_entry(tmp.prev, struct inode, i_list);
		sb = inode->i_sb;
		list_for_each_prev_safe(pos, node, &tmp) {
		inode = list_entry(pos, struct inode, i_list);
		if (inode->i_sb == sb)
		list_move(&inode->i_list, dispatch_queue);
		}
		}
		}

		/*
		* Queue all expired dirty inodes for io, eldest first.
		@@ -439,8 +474,18 @@ writeback_single_inode(struct inode inode, struct writeback_control wbc)
		spin_lock(&inode_lock);
		inode->i_state &= ~I_SYNC;
		if (!(inode->i_state & (I_FREEING \| I_CLEAR))) {
		if (!(inode->i_state & I_DIRTY) &&
		mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
		/*
		* More pages get dirtied by a fast dirtier.
		*/
		goto select_queue;
		} else if (inode->i_state & I_DIRTY) {
		/*
		* At least XFS will redirty the inode during the
		* writeback (delalloc) and on io completion (isize).
		*/
		redirty_tail(inode);
		} else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		/*
		* We didn't write back all the pages. nfs_writepages()
		* sometimes bales out without doing anything. Redirty
		@@ -462,6 +507,7 @@ writeback_single_inode(struct inode inode, struct writeback_control wbc)
		* soon as the queue becomes uncongested.
		*/
		inode->i_state \|= I_DIRTY_PAGES;
		select_queue:
		if (wbc->nr_to_write <= 0) {
		/*
		* slice used up: queue for next turn
		@@ -484,12 +530,6 @@ writeback_single_inode(struct inode inode, struct writeback_control wbc)
		inode->i_state \|= I_DIRTY_PAGES;
		redirty_tail(inode);
		}
		} else if (inode->i_state & I_DIRTY) {
		/*
		* Someone redirtied the inode while were writing back
		* the pages.
		*/
		redirty_tail(inode);
		} else if (atomic_read(&inode->i_count)) {
		/*
		* The inode is clean, inuse
		@@ -506,6 +546,17 @@ writeback_single_inode(struct inode inode, struct writeback_control wbc)
		return ret;
		}

		static void unpin_sb_for_writeback(struct super_block **psb)
		{
		struct super_block sb = psb;

		if (sb) {
		up_read(&sb->s_umount);
		put_super(sb);
		*psb = NULL;
		}
		}

		/*
		* For WB_SYNC_NONE writeback, the caller does not have the sb pinned
		* before calling writeback. So make sure that we do pin it, so it doesn't
		@@ -515,10 +566,19 @@ writeback_single_inode(struct inode inode, struct writeback_control wbc)
		* 1 if we failed.
		*/
		static int pin_sb_for_writeback(struct writeback_control *wbc,
		struct inode *inode)
		struct inode inode, struct super_block *psb)
		{
		struct super_block *sb = inode->i_sb;

		/*
		* If this sb is already pinned, nothing more to do. If not and
		* *psb is non-NULL, unpin the old one first
		*/
		if (sb == *psb)
		return 0;
		else if (*psb)
		unpin_sb_for_writeback(psb);

		/*
		* Caller must already hold the ref for this
		*/
		@@ -532,7 +592,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
		if (down_read_trylock(&sb->s_umount)) {
		if (sb->s_root) {
		spin_unlock(&sb_lock);
		return 0;
		goto pinned;
		}
		/*
		* umounted, drop rwsem again and fall through to failure
		@@ -543,24 +603,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
		sb->s_count--;
		spin_unlock(&sb_lock);
		return 1;
		}

		static void unpin_sb_for_writeback(struct writeback_control *wbc,
		struct inode *inode)
		{
		struct super_block *sb = inode->i_sb;

		if (wbc->sync_mode == WB_SYNC_ALL)
		return;

		up_read(&sb->s_umount);
		put_super(sb);
		pinned:
		*psb = sb;
		return 0;
		}

		static void writeback_inodes_wb(struct bdi_writeback *wb,
		struct writeback_control *wbc)
		{
		struct super_block *sb = wbc->sb;
		struct super_block sb = wbc->sb, pin_sb = NULL;
		const int is_blkdev_sb = sb_is_blkdev_sb(sb);
		const unsigned long start = jiffies; /* livelock avoidance */

		@@ -619,7 +670,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
		if (inode_dirtied_after(inode, start))
		break;

		if (pin_sb_for_writeback(wbc, inode)) {
		if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
		requeue_io(inode);
		continue;
		}
		@@ -628,7 +679,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
		__iget(inode);
		pages_skipped = wbc->pages_skipped;
		writeback_single_inode(inode, wbc);
		unpin_sb_for_writeback(wbc, inode);
		if (wbc->pages_skipped != pages_skipped) {
		/*
		* writeback is not making progress due to locked
		@@ -648,6 +698,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
		wbc->more_io = 1;
		}

		unpin_sb_for_writeback(&pin_sb);

		spin_unlock(&inode_lock);
		/* Leave any unwritten inodes on b_io */
		}
		@@ -706,6 +758,7 @@ static long wb_writeback(struct bdi_writeback *wb,
		};
		unsigned long oldest_jif;
		long wrote = 0;
		struct inode *inode;

		if (wbc.for_kupdate) {
		wbc.older_than_this = &oldest_jif;
		@@ -719,20 +772,16 @@ static long wb_writeback(struct bdi_writeback *wb,

		for (;;) {
		/*
		* Don't flush anything for non-integrity writeback where
		* no nr_pages was given
		* Stop writeback when nr_pages has been consumed
		*/
		if (!args->for_kupdate && args->nr_pages <= 0 &&
		args->sync_mode == WB_SYNC_NONE)
		if (args->nr_pages <= 0)
		break;

		/*
		* If no specific pages were given and this is just a
		* periodic background writeout and we are below the
		* background dirty threshold, don't do anything
		* For background writeout, stop when we are below the
		* background dirty threshold
		*/
		if (args->for_kupdate && args->nr_pages <= 0 &&
		!over_bground_thresh())
		if (args->for_background && !over_bground_thresh())
		break;

		wbc.more_io = 0;
		@@ -744,13 +793,32 @@ static long wb_writeback(struct bdi_writeback *wb,
		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;

		/*
		* If we ran out of stuff to write, bail unless more_io got set
		* If we consumed everything, see if we have more
		*/
		if (wbc.nr_to_write > 0 \|\| wbc.pages_skipped > 0) {
		if (wbc.more_io && !wbc.for_kupdate)
		if (wbc.nr_to_write <= 0)
		continue;
		/*
		* Didn't write everything and we don't have more IO, bail
		*/
		if (!wbc.more_io)
		break;
		/*
		* Did we write something? Try for more
		*/
		if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
		continue;
		/*
		* Nothing written. Wait for some inode to
		* become available for writeback. Otherwise
		* we'll just busyloop.
		*/
		spin_lock(&inode_lock);
		if (!list_empty(&wb->b_more_io)) {
		inode = list_entry(wb->b_more_io.prev,
		struct inode, i_list);
		inode_wait_for_writeback(inode);
		}
		spin_unlock(&inode_lock);
		}

		return wrote;
		@@ -1060,9 +1128,6 @@ EXPORT_SYMBOL(__mark_inode_dirty);
		* If older_than_this is non-NULL, then only write out inodes which
		* had their first dirtying at a time earlier than *older_than_this.
		*
		* If we're a pdlfush thread, then implement pdflush collision avoidance
		* against the entire list.
		*
		* If `bdi' is non-zero then we're being asked to writeback a specific queue.
		* This function assumes that the blockdev superblock's inodes are backed by
		* a variety of queues, so all inodes are searched. For other superblocks,
		@@ -1141,7 +1206,7 @@ void writeback_inodes_sb(struct super_block *sb)
		nr_to_write = nr_dirty + nr_unstable +
		(inodes_stat.nr_inodes - inodes_stat.nr_unused);

		bdi_writeback_all(sb, nr_to_write);
		bdi_start_writeback(sb->s_bdi, nr_to_write);
		}
		EXPORT_SYMBOL(writeback_inodes_sb);

mm/page-writeback.c

+17 −13

Original line number	Diff line number	Diff line
		@@ -44,18 +44,21 @@ static long ratelimit_pages = 32;
		/*
		* When balance_dirty_pages decides that the caller needs to perform some
		* non-background writeback, this is how many pages it will attempt to write.
		* It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably
		* It should be somewhat larger than dirtied pages to ensure that reasonably
		* large amounts of I/O are submitted.
		*/
		static inline long sync_writeback_pages(void)
		static inline long sync_writeback_pages(unsigned long dirtied)
		{
		return ratelimit_pages + ratelimit_pages / 2;
		if (dirtied < ratelimit_pages)
		dirtied = ratelimit_pages;

		return dirtied + dirtied / 2;
		}

		/* The following parameters are exported via /proc/sys/vm */

		/*
		* Start background writeback (via pdflush) at this percentage
		* Start background writeback (via writeback threads) at this percentage
		*/
		int dirty_background_ratio = 10;

		@@ -474,10 +477,11 @@ get_dirty_limits(unsigned long pbackground, unsigned long pdirty,
		* balance_dirty_pages() must be called by processes which are generating dirty
		* data. It looks at the number of dirty pages in the machine and will force
		* the caller to perform writeback if the system is over `vm_dirty_ratio'.
		* If we're over `background_thresh' then pdflush is woken to perform some
		* writeout.
		* If we're over `background_thresh' then the writeback threads are woken to
		* perform some writeout.
		*/
		static void balance_dirty_pages(struct address_space *mapping)
		static void balance_dirty_pages(struct address_space *mapping,
		unsigned long write_chunk)
		{
		long nr_reclaimable, bdi_nr_reclaimable;
		long nr_writeback, bdi_nr_writeback;
		@@ -485,7 +489,6 @@ static void balance_dirty_pages(struct address_space *mapping)
		unsigned long dirty_thresh;
		unsigned long bdi_thresh;
		unsigned long pages_written = 0;
		unsigned long write_chunk = sync_writeback_pages();
		unsigned long pause = 1;

		struct backing_dev_info *bdi = mapping->backing_dev_info;
		@@ -579,7 +582,7 @@ static void balance_dirty_pages(struct address_space *mapping)
		bdi->dirty_exceeded = 0;

		if (writeback_in_progress(bdi))
		return; /* pdflush is already working this queue */
		return;

		/*
		* In laptop mode, we wait until hitting the higher threshold before
		@@ -590,10 +593,10 @@ static void balance_dirty_pages(struct address_space *mapping)
		* background_thresh, to keep the amount of dirty memory low.
		*/
		if ((laptop_mode && pages_written) \|\|
		(!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
		(!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
		+ global_page_state(NR_UNSTABLE_NFS))
		> background_thresh)))
		bdi_start_writeback(bdi, nr_writeback);
		bdi_start_writeback(bdi, 0);
		}

		void set_page_dirty_balance(struct page *page, int page_mkwrite)
		@@ -640,9 +643,10 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
		p = &__get_cpu_var(bdp_ratelimits);
		*p += nr_pages_dirtied;
		if (unlikely(*p >= ratelimit)) {
		ratelimit = sync_writeback_pages(*p);
		*p = 0;
		preempt_enable();
		balance_dirty_pages(mapping);
		balance_dirty_pages(mapping, ratelimit);
		return;
		}
		preempt_enable();

mm/shmem.c

+3 −2

Original line number	Diff line number	Diff line
		@@ -1046,8 +1046,9 @@ static int shmem_writepage(struct page page, struct writeback_control wbc)
		* sync from ever calling shmem_writepage; but a stacking filesystem
		* may use the ->writepage of its underlying filesystem, in which case
		* tmpfs should write out to swap only in response to memory pressure,
		* and not for pdflush or sync. However, in those cases, we do still
		* want to check if there's a redundant swappage to be discarded.
		* and not for the writeback threads or sync. However, in those cases,
		* we do still want to check if there's a redundant swappage to be
		* discarded.
		*/
		if (wbc->for_reclaim)
		swap = get_swap_page();

mm/vmscan.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -1709,10 +1709,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
		*
		* If the caller is !__GFP_FS then the probability of a failure is reasonably
		* high - the zone may be full of dirty or under-writeback pages, which this
		* caller can't do much about. We kick pdflush and take explicit naps in the
		* hope that some of these pages can be written. But if the allocating task
		* holds filesystem locks which prevent writeout this might not work, and the
		* allocation attempt will fail.
		* caller can't do much about. We kick the writeback threads and take explicit
		* naps in the hope that some of these pages can be written. But if the
		* allocating task holds filesystem locks which prevent writeout this might not
		* work, and the allocation attempt will fail.
		*
		* returns: 0, if no pages reclaimed
		* else, the number of pages reclaimed