Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5b41d924 authored by Eric Sandeen's avatar Eric Sandeen Committed by Theodore Ts'o
Browse files

ext4: implement writeback livelock avoidance using page tagging



This is analogous to Jan Kara's commit,
f446daae
mm: implement writeback livelock avoidance using page tagging

but since we forked write_cache_pages, we need to reimplement
it there (and in ext4_da_writepages, since range_cyclic handling
was moved to there)

If you start a large buffered IO to a file, and then set
fsync after it, you'll find that fsync does not complete
until the other IO stops.

If you continue re-dirtying the file (say, putting dd
with conv=notrunc in a loop), when fsync finally completes
(after all IO is done), it reports via tracing that
it has written many more pages than the file contains;
in other words it has synced and re-synced pages in
the file multiple times.

This then leads to problems with our writeback_index
update, since it advances it by pages written, and
essentially sets writeback_index off the end of the
file...

With the following patch, we only sync as much as was
dirty at the time of the sync.

Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent bbd08344
Loading
Loading
Loading
Loading
+15 −3
Original line number Original line Diff line number Diff line
@@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping,
	pgoff_t index;
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t end;		/* Inclusive */
	long nr_to_write = wbc->nr_to_write;
	long nr_to_write = wbc->nr_to_write;
	int tag;


	pagevec_init(&pvec, 0);
	pagevec_init(&pvec, 0);
	index = wbc->range_start >> PAGE_CACHE_SHIFT;
	index = wbc->range_start >> PAGE_CACHE_SHIFT;
	end = wbc->range_end >> PAGE_CACHE_SHIFT;
	end = wbc->range_end >> PAGE_CACHE_SHIFT;


	if (wbc->sync_mode == WB_SYNC_ALL)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;

	while (!done && (index <= end)) {
	while (!done && (index <= end)) {
		int i;
		int i;


		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      PAGECACHE_TAG_DIRTY,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		if (nr_pages == 0)
		if (nr_pages == 0)
			break;
			break;
@@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping,
	long desired_nr_to_write, nr_to_writebump = 0;
	long desired_nr_to_write, nr_to_writebump = 0;
	loff_t range_start = wbc->range_start;
	loff_t range_start = wbc->range_start;
	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
	pgoff_t end;


	trace_ext4_da_writepages(inode, wbc);
	trace_ext4_da_writepages(inode, wbc);


@@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping,
		wbc->range_start = index << PAGE_CACHE_SHIFT;
		wbc->range_start = index << PAGE_CACHE_SHIFT;
		wbc->range_end  = LLONG_MAX;
		wbc->range_end  = LLONG_MAX;
		wbc->range_cyclic = 0;
		wbc->range_cyclic = 0;
	} else
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
	}


	/*
	/*
	 * This works around two forms of stupidity.  The first is in
	 * This works around two forms of stupidity.  The first is in
@@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping,
	pages_skipped = wbc->pages_skipped;
	pages_skipped = wbc->pages_skipped;


retry:
retry:
	if (wbc->sync_mode == WB_SYNC_ALL)
		tag_pages_for_writeback(mapping, index, end);

	while (!ret && wbc->nr_to_write > 0) {
	while (!ret && wbc->nr_to_write > 0) {


		/*
		/*
+2 −0
Original line number Original line Diff line number Diff line
@@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,


int generic_writepages(struct address_space *mapping,
int generic_writepages(struct address_space *mapping,
		       struct writeback_control *wbc);
		       struct writeback_control *wbc);
void tag_pages_for_writeback(struct address_space *mapping,
			     pgoff_t start, pgoff_t end);
int write_cache_pages(struct address_space *mapping,
int write_cache_pages(struct address_space *mapping,
		      struct writeback_control *wbc, writepage_t writepage,
		      struct writeback_control *wbc, writepage_t writepage,
		      void *data);
		      void *data);