Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c8e0b00e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: call __jbd2_log_start_commit with j_state_lock write locked
  ext4: serialize unaligned asynchronous DIO
  ext4: make grpinfo slab cache names static
  ext4: Fix data corruption with multi-block writepages support
  ext4: fix up ext4 error handling
  ext4: unregister features interface on module unload
  ext4: fix panic on module unload when stopping lazyinit thread
parents 3c6c0d6c e4471831
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -848,6 +848,7 @@ struct ext4_inode_info {
	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
	/* current io_end structure for async DIO write*/
	ext4_io_end_t *cur_aio_dio;
	atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */

	spinlock_t i_block_reservation_lock;

@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)

#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)

/* For ioend & aio unwritten conversion wait queues */
#define EXT4_WQ_HASH_SZ		37
#define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
					    EXT4_WQ_HASH_SZ])
#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
					     EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];

#endif	/* __KERNEL__ */

#endif	/* _EXT4_H */
+6 −4
Original line number Diff line number Diff line
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
		 * that this IO needs to convertion to written when IO is
		 * completed
		 */
		if (io)
		if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
			io->flag = EXT4_IO_END_UNWRITTEN;
		else
			atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
		} else
			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
		if (ext4_should_dioread_nolock(inode))
			map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
		 * that we need to perform convertion when IO is done.
		 */
		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
			if (io)
			if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
				io->flag = EXT4_IO_END_UNWRITTEN;
			else
				atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
			} else
				ext4_set_inode_state(inode,
						     EXT4_STATE_DIO_UNWRITTEN);
		}
+59 −1
Original line number Diff line number Diff line
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
	return 0;
}

static void ext4_aiodio_wait(struct inode *inode)
{
	wait_queue_head_t *wq = ext4_ioend_wq(inode);

	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
}

/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
		   unsigned long nr_segs, loff_t pos)
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;
	size_t count = iov_length(iov, nr_segs);
	loff_t final_size = pos + count;

	if (pos >= inode->i_size)
		return 0;

	if ((pos & blockmask) || (final_size & blockmask))
		return 1;

	return 0;
}

static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
	int unaligned_aio = 0;
	int ret;

	/*
	 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
					      sbi->s_bitmap_maxbytes - pos);
		}
	} else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
		   !is_sync_kiocb(iocb))) {
		unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
	}

	return generic_file_aio_write(iocb, iov, nr_segs, pos);
	/* Unaligned direct AIO must be serialized; see comment above */
	if (unaligned_aio) {
		static unsigned long unaligned_warn_time;

		/* Warn about this once per day */
		if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
			ext4_msg(inode->i_sb, KERN_WARNING,
				 "Unaligned AIO/DIO on inode %ld by %s; "
				 "performance will be poor.",
				 inode->i_ino, current->comm);
		mutex_lock(ext4_aio_mutex(inode));
		ext4_aiodio_wait(inode);
	}

	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

	if (unaligned_aio)
		mutex_unlock(ext4_aio_mutex(inode));

	return ret;
}

static const struct vm_operations_struct ext4_file_vm_ops = {
+60 −40
Original line number Diff line number Diff line
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
 * superblock block size.  There will be one per mounted filesystem for
 * each unique s_blocksize_bits */
#define NR_GRPINFO_CACHES	\
	(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
#define NR_GRPINFO_CACHES 8
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];

static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
	"ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
	"ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
	"ext4_groupinfo_64k", "ext4_groupinfo_128k"
};

static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
					ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ static int ext4_mb_init_backend(struct super_block *sb)
	return -ENOMEM;
}

static void ext4_groupinfo_destroy_slabs(void)
{
	int i;

	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
		if (ext4_groupinfo_caches[i])
			kmem_cache_destroy(ext4_groupinfo_caches[i]);
		ext4_groupinfo_caches[i] = NULL;
	}
}

static int ext4_groupinfo_create_slab(size_t size)
{
	static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
	int slab_size;
	int blocksize_bits = order_base_2(size);
	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
	struct kmem_cache *cachep;

	if (cache_index >= NR_GRPINFO_CACHES)
		return -EINVAL;

	if (unlikely(cache_index < 0))
		cache_index = 0;

	mutex_lock(&ext4_grpinfo_slab_create_mutex);
	if (ext4_groupinfo_caches[cache_index]) {
		mutex_unlock(&ext4_grpinfo_slab_create_mutex);
		return 0;	/* Already created */
	}

	slab_size = offsetof(struct ext4_group_info,
				bb_counters[blocksize_bits + 2]);

	cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
					slab_size, 0, SLAB_RECLAIM_ACCOUNT,
					NULL);

	mutex_unlock(&ext4_grpinfo_slab_create_mutex);
	if (!cachep) {
		printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
		return -ENOMEM;
	}

	ext4_groupinfo_caches[cache_index] = cachep;

	return 0;
}

int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
	unsigned offset;
	unsigned max;
	int ret;
	int cache_index;
	struct kmem_cache *cachep;
	char *namep = NULL;

	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);

@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
		goto out;
	}

	cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
	cachep = ext4_groupinfo_caches[cache_index];
	if (!cachep) {
		char name[32];
		int len = offsetof(struct ext4_group_info,
					bb_counters[sb->s_blocksize_bits + 2]);

		sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
		namep = kstrdup(name, GFP_KERNEL);
		if (!namep) {
			ret = -ENOMEM;
			goto out;
		}

		/* Need to free the kmem_cache_name() when we
		 * destroy the slab */
		cachep = kmem_cache_create(namep, len, 0,
					     SLAB_RECLAIM_ACCOUNT, NULL);
		if (!cachep) {
			ret = -ENOMEM;
	ret = ext4_groupinfo_create_slab(sb->s_blocksize);
	if (ret < 0)
		goto out;
		}
		ext4_groupinfo_caches[cache_index] = cachep;
	}

	/* order 0 is regular bitmap */
	sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
	if (ret) {
		kfree(sbi->s_mb_offsets);
		kfree(sbi->s_mb_maxs);
		kfree(namep);
	}
	return ret;
}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)

void ext4_exit_mballoc(void)
{
	int i;
	/*
	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
	 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
	kmem_cache_destroy(ext4_pspace_cachep);
	kmem_cache_destroy(ext4_ac_cachep);
	kmem_cache_destroy(ext4_free_ext_cachep);

	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
		struct kmem_cache *cachep = ext4_groupinfo_caches[i];
		if (cachep) {
			char *name = (char *)kmem_cache_name(cachep);
			kmem_cache_destroy(cachep);
			kfree(name);
		}
	}
	ext4_groupinfo_destroy_slabs();
	ext4_remove_debugfs_entry();
}

+19 −17
Original line number Diff line number Diff line
@@ -32,14 +32,8 @@

static struct kmem_cache *io_page_cachep, *io_end_cachep;

#define WQ_HASH_SZ		37
#define to_ioend_wq(v)	(&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
static wait_queue_head_t ioend_wq[WQ_HASH_SZ];

int __init ext4_init_pageio(void)
{
	int i;

	io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
	if (io_page_cachep == NULL)
		return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
		kmem_cache_destroy(io_page_cachep);
		return -ENOMEM;
	}
	for (i = 0; i < WQ_HASH_SZ; i++)
		init_waitqueue_head(&ioend_wq[i]);

	return 0;
}

@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)

void ext4_ioend_wait(struct inode *inode)
{
	wait_queue_head_t *wq = to_ioend_wq(inode);
	wait_queue_head_t *wq = ext4_ioend_wq(inode);

	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
	for (i = 0; i < io->num_io_pages; i++)
		put_io_page(io->pages[i]);
	io->num_io_pages = 0;
	wq = to_ioend_wq(io->inode);
	wq = ext4_ioend_wq(io->inode);
	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
	    waitqueue_active(wq))
		wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
	struct inode *inode = io->inode;
	loff_t offset = io->offset;
	ssize_t size = io->size;
	wait_queue_head_t *wq;
	int ret = 0;

	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
	if (io->iocb)
		aio_complete(io->iocb, io->result, 0);
	/* clear the DIO AIO unwritten flag */
	if (io->flag & EXT4_IO_END_UNWRITTEN) {
		io->flag &= ~EXT4_IO_END_UNWRITTEN;
		/* Wake up anyone waiting on unwritten extent conversion */
		wq = ext4_ioend_wq(io->inode);
		if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
		    waitqueue_active(wq)) {
			wake_up_all(wq);
		}
	}

	return ret;
}

@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
	struct inode *inode;
	unsigned long flags;
	int i;
	sector_t bi_sector = bio->bi_sector;

	BUG_ON(!io_end);
	bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
		if (error)
			SetPageError(page);
		BUG_ON(!head);
		if (head->b_size == PAGE_CACHE_SIZE)
			clear_buffer_dirty(head);
		else {
		if (head->b_size != PAGE_CACHE_SIZE) {
			loff_t offset;
			loff_t io_end_offset = io_end->offset + io_end->size;

@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
					if (error)
						buffer_io_error(bh);

					clear_buffer_dirty(bh);
				}
				if (buffer_delay(bh))
					partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
			     (unsigned long long) io_end->offset,
			     (long) io_end->size,
			     (unsigned long long)
			     bio->bi_sector >> (inode->i_blkbits - 9));
			     bi_sector >> (inode->i_blkbits - 9));
	}

	/* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,

	blocksize = 1 << inode->i_blkbits;

	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));
	set_page_writeback(page);
	ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
	for (bh = head = page_buffers(page), block_start = 0;
	     bh != head || !block_start;
	     block_start = block_end, bh = bh->b_this_page) {

		block_end = block_start + blocksize;
		if (block_start >= len) {
			clear_buffer_dirty(bh);
			set_buffer_uptodate(bh);
			continue;
		}
		clear_buffer_dirty(bh);
		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
		if (ret) {
			/*
Loading