Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 391f2a16 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "Some locking and page fault bug fixes from Jan Kara, some ext4
  encryption fixes from me, and Li Xi's Project Quota commits"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  fs: clean up the flags definition in uapi/linux/fs.h
  ext4: add FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR interface support
  ext4: add project quota support
  ext4: adds project ID support
  ext4 crypto: simplify interfaces to directory entry insert functions
  ext4 crypto: add missing locking for keyring_key access
  ext4: use pre-zeroed blocks for DAX page faults
  ext4: implement allocation of pre-zeroed blocks
  ext4: provide ext4_issue_zeroout()
  ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag
  ext4: document lock ordering
  ext4: fix races of writeback with punch hole and zero range
  ext4: fix races between buffered IO and collapse / insert range
  ext4: move unlocked dio protection from ext4_alloc_file_blocks()
  ext4: fix races between page faults and hole punching
parents d5ffdf8b 68ce7bfc
Loading
Loading
Loading
Loading
+2 −4
Original line number Original line Diff line number Diff line
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
				EXT4_DECRYPT, page->index, page, page);
				EXT4_DECRYPT, page->index, page, page);
}
}


int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
			   ext4_fsblk_t pblk, ext4_lblk_t len)
{
{
	struct ext4_crypto_ctx	*ctx;
	struct ext4_crypto_ctx	*ctx;
	struct page		*ciphertext_page = NULL;
	struct page		*ciphertext_page = NULL;
	struct bio		*bio;
	struct bio		*bio;
	ext4_lblk_t		lblk = le32_to_cpu(ex->ee_block);
	ext4_fsblk_t		pblk = ext4_ext_pblock(ex);
	unsigned int		len = ext4_ext_get_actual_len(ex);
	int			ret, err = 0;
	int			ret, err = 0;


#if 0
#if 0
+4 −0
Original line number Original line Diff line number Diff line
@@ -213,9 +213,11 @@ int _ext4_get_encryption_info(struct inode *inode)
		res = -ENOKEY;
		res = -ENOKEY;
		goto out;
		goto out;
	}
	}
	down_read(&keyring_key->sem);
	ukp = user_key_payload(keyring_key);
	ukp = user_key_payload(keyring_key);
	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
		res = -EINVAL;
		res = -EINVAL;
		up_read(&keyring_key->sem);
		goto out;
		goto out;
	}
	}
	master_key = (struct ext4_encryption_key *)ukp->data;
	master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ int _ext4_get_encryption_info(struct inode *inode)
			    "ext4: key size incorrect: %d\n",
			    "ext4: key size incorrect: %d\n",
			    master_key->size);
			    master_key->size);
		res = -ENOKEY;
		res = -ENOKEY;
		up_read(&keyring_key->sem);
		goto out;
		goto out;
	}
	}
	res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
	res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
				  raw_key);
				  raw_key);
	up_read(&keyring_key->sem);
	if (res)
	if (res)
		goto out;
		goto out;
got_key:
got_key:
+86 −13
Original line number Original line Diff line number Diff line
@@ -378,14 +378,22 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
#define EXT4_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
#define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */


#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_VISIBLE		0x304BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE		0x004380FF /* User modifiable flags */
#define EXT4_FL_USER_MODIFIABLE		0x204380FF /* User modifiable flags */

#define EXT4_FL_XFLAG_VISIBLE		(EXT4_SYNC_FL | \
					 EXT4_IMMUTABLE_FL | \
					 EXT4_APPEND_FL | \
					 EXT4_NODUMP_FL | \
					 EXT4_NOATIME_FL | \
					 EXT4_PROJINHERIT_FL)


/* Flags that should be inherited by new inodes from their parent. */
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
			   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
			   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
			   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
			   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
			   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
			   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
			   EXT4_PROJINHERIT_FL)


/* Flags that are appropriate for regular files (all but dir-specific ones). */
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
#define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
	/* Request will not result in inode size update (user for fallocate) */
	/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080
#define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080
	/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK			0x0100
	/* Convert written extents to unwritten */
	/* Convert written extents to unwritten */
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN	0x0200
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN	0x0100
	/* Write zeros to newly created written extents */
#define EXT4_GET_BLOCKS_ZERO			0x0200
#define EXT4_GET_BLOCKS_CREATE_ZERO		(EXT4_GET_BLOCKS_CREATE |\
					EXT4_GET_BLOCKS_ZERO)


/*
/*
 * The bit position of these flags must not overlap with any of the
 * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct ext4_encryption_policy)
#define EXT4_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct ext4_encryption_policy)


#ifndef FS_IOC_FSGETXATTR
/* Until the uapi changes get merged for project quota... */

#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)

/*
 * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
 */
struct fsxattr {
	__u32		fsx_xflags;	/* xflags field value (get/set) */
	__u32		fsx_extsize;	/* extsize field value (get/set)*/
	__u32		fsx_nextents;	/* nextents field value (get)	*/
	__u32		fsx_projid;	/* project identifier (get/set) */
	unsigned char	fsx_pad[12];
};

/*
 * Flags for the fsx_xflags field
 */
#define FS_XFLAG_REALTIME	0x00000001	/* data in realtime volume */
#define FS_XFLAG_PREALLOC	0x00000002	/* preallocated file extents */
#define FS_XFLAG_IMMUTABLE	0x00000008	/* file cannot be modified */
#define FS_XFLAG_APPEND		0x00000010	/* all writes append */
#define FS_XFLAG_SYNC		0x00000020	/* all writes synchronous */
#define FS_XFLAG_NOATIME	0x00000040	/* do not update access time */
#define FS_XFLAG_NODUMP		0x00000080	/* do not include in backups */
#define FS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
#define FS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
#define FS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
#define FS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
#define FS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
#define FS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
#define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
#define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this */
#endif /* !defined(FS_IOC_FSGETXATTR) */

#define EXT4_IOC_FSGETXATTR		FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR		FS_IOC_FSSETXATTR

#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
/*
 * ioctl commands in 32 bit emulation
 * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
	 * by other means, so we have i_data_sem.
	 * by other means, so we have i_data_sem.
	 */
	 */
	struct rw_semaphore i_data_sem;
	struct rw_semaphore i_data_sem;
	/*
	 * i_mmap_sem is for serializing page faults with truncate / punch hole
	 * operations. We have to make sure that new page cannot be faulted in
	 * a section of the inode that is being punched. We cannot easily use
	 * i_data_sem for this since we need protection for the whole punch
	 * operation and i_data_sem ranks below transaction start so we have
	 * to occasionally drop it.
	 */
	struct rw_semaphore i_mmap_sem;
	struct inode vfs_inode;
	struct inode vfs_inode;
	struct jbd2_inode *jinode;
	struct jbd2_inode *jinode;


@@ -993,6 +1052,7 @@ struct ext4_inode_info {
	/* Encryption params */
	/* Encryption params */
	struct ext4_crypt_info *i_crypt_info;
	struct ext4_crypt_info *i_crypt_info;
#endif
#endif
	kprojid_t i_projid;
};
};


/*
/*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
#endif
#endif


/* Number of quota types we support */
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 2
#define EXT4_MAXQUOTAS 3


/*
/*
 * fourth extended-fs super-block data in memory
 * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
					 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
					 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
					 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
					 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
					 EXT4_FEATURE_RO_COMPAT_QUOTA)
					 EXT4_FEATURE_RO_COMPAT_QUOTA |\
					 EXT4_FEATURE_RO_COMPAT_PROJECT)


#define EXTN_FEATURE_FUNCS(ver) \
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define	EXT4_DEF_RESUID		0
#define	EXT4_DEF_RESUID		0
#define	EXT4_DEF_RESGID		0
#define	EXT4_DEF_RESGID		0


/*
 * Default project ID
 */
#define	EXT4_DEF_PROJID		0

#define EXT4_DEF_INODE_READAHEAD_BLKS	32
#define EXT4_DEF_INODE_READAHEAD_BLKS	32


/*
/*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode,
struct page *ext4_encrypt(struct inode *inode,
			  struct page *plaintext_page);
			  struct page *plaintext_page);
int ext4_decrypt(struct page *page);
int ext4_decrypt(struct page *page);
int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
			   ext4_fsblk_t pblk, ext4_lblk_t len);


#ifdef CONFIG_EXT4_FS_ENCRYPTION
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
int ext4_init_crypto(void);
@@ -2440,7 +2507,7 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
int ext4_get_block_write(struct inode *inode, sector_t iblock,
			 struct buffer_head *bh_result, int create);
			 struct buffer_head *bh_result, int create);
int ext4_get_block_dax(struct inode *inode, sector_t iblock,
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
			    struct buffer_head *bh_result, int create);
			    struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
int ext4_get_block(struct inode *inode, sector_t iblock,
				struct buffer_head *bh_result, int create);
				struct buffer_head *bh_result, int create);
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
			     loff_t lstart, loff_t lend);
			     loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
extern void ext4_da_update_reserve_space(struct inode *inode,
					int used, int quota_claim);
					int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
			      ext4_fsblk_t pblk, ext4_lblk_t len);


/* indirect.c */
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
	return changed;
	return changed;
}
}


int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
				      loff_t len);

struct ext4_group_info {
struct ext4_group_info {
	unsigned long   bb_state;
	unsigned long   bb_state;
	struct rb_root  bb_free_root;
	struct rb_root  bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
					 struct page *page);
					 struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
extern int ext4_try_add_inline_entry(handle_t *handle,
				     struct ext4_filename *fname,
				     struct ext4_filename *fname,
				     struct dentry *dentry,
				     struct inode *dir, struct inode *inode);
				     struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
extern int ext4_try_create_inline_dir(handle_t *handle,
				      struct inode *parent,
				      struct inode *parent,
				      struct inode *inode);
				      struct inode *inode);
+83 −70
Original line number Original line Diff line number Diff line
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
{
	ext4_fsblk_t ee_pblock;
	ext4_fsblk_t ee_pblock;
	unsigned int ee_len;
	unsigned int ee_len;
	int ret;


	ee_len    = ext4_ext_get_actual_len(ex);
	ee_len    = ext4_ext_get_actual_len(ex);
	ee_pblock = ext4_ext_pblock(ex);
	ee_pblock = ext4_ext_pblock(ex);

	return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
	if (ext4_encrypted_inode(inode))
				  ee_len);
		return ext4_encrypted_zeroout(inode, ex);

	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
	if (ret > 0)
		ret = 0;

	return ret;
}
}


/*
/*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
	}
	}
	/* IO end_io complete, convert the filled extent to written */
	/* IO end_io complete, convert the filled extent to written */
	if (flags & EXT4_GET_BLOCKS_CONVERT) {
	if (flags & EXT4_GET_BLOCKS_CONVERT) {
		if (flags & EXT4_GET_BLOCKS_ZERO) {
			if (allocated > map->m_len)
				allocated = map->m_len;
			err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
						 allocated);
			if (err < 0)
				goto out2;
		}
		ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
		ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
							   ppath);
							   ppath);
		if (ret >= 0) {
		if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
	if (len <= EXT_UNWRITTEN_MAX_LEN)
	if (len <= EXT_UNWRITTEN_MAX_LEN)
		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;


	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);

	/*
	/*
	 * credits to insert 1 extent into extent tree
	 * credits to insert 1 extent into extent tree
	 */
	 */
@@ -4752,8 +4748,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
		goto retry;
		goto retry;
	}
	}


	ext4_inode_resume_unlocked_dio(inode);

	return ret > 0 ? ret2 : ret;
	return ret > 0 ? ret2 : ret;
}
}


@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
	int partial_begin, partial_end;
	int partial_begin, partial_end;
	loff_t start, end;
	loff_t start, end;
	ext4_lblk_t lblk;
	ext4_lblk_t lblk;
	struct address_space *mapping = inode->i_mapping;
	unsigned int blkbits = inode->i_blkbits;
	unsigned int blkbits = inode->i_blkbits;


	trace_ext4_zero_range(inode, offset, len, mode);
	trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
			return ret;
			return ret;
	}
	}


	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		ret = filemap_write_and_wait_range(mapping, offset,
						   offset + len - 1);
		if (ret)
			return ret;
	}

	/*
	/*
	 * Round up offset. This is not fallocate, we neet to zero out
	 * Round up offset. This is not fallocate, we neet to zero out
	 * blocks, so convert interior block aligned part of the range to
	 * blocks, so convert interior block aligned part of the range to
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
	if (mode & FALLOC_FL_KEEP_SIZE)
	if (mode & FALLOC_FL_KEEP_SIZE)
		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;


	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);

	/* Preallocate the range including the unaligned edges */
	/* Preallocate the range including the unaligned edges */
	if (partial_begin || partial_end) {
	if (partial_begin || partial_end) {
		ret = ext4_alloc_file_blocks(file,
		ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
				 round_down(offset, 1 << blkbits)) >> blkbits,
				 round_down(offset, 1 << blkbits)) >> blkbits,
				new_size, flags, mode);
				new_size, flags, mode);
		if (ret)
		if (ret)
			goto out_mutex;
			goto out_dio;


	}
	}


@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
			  EXT4_EX_NOCACHE);
			  EXT4_EX_NOCACHE);


		/*
		 * Prevent page faults from reinstantiating pages we have
		 * released from page cache.
		 */
		down_write(&EXT4_I(inode)->i_mmap_sem);
		ret = ext4_update_disksize_before_punch(inode, offset, len);
		if (ret) {
			up_write(&EXT4_I(inode)->i_mmap_sem);
			goto out_dio;
		}
		/* Now release the pages and zero block aligned part of pages */
		/* Now release the pages and zero block aligned part of pages */
		truncate_pagecache_range(inode, start, end - 1);
		truncate_pagecache_range(inode, start, end - 1);
		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);


		/* Wait all existing dio workers, newcomers will block on i_mutex */
		ext4_inode_block_unlocked_dio(inode);
		inode_dio_wait(inode);

		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
					     flags, mode);
					     flags, mode);
		up_write(&EXT4_I(inode)->i_mmap_sem);
		if (ret)
		if (ret)
			goto out_dio;
			goto out_dio;
	}
	}
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
			goto out;
			goto out;
	}
	}


	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);

	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
				     flags, mode);
				     flags, mode);
	ext4_inode_resume_unlocked_dio(inode);
	if (ret)
	if (ret)
		goto out;
		goto out;


@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
			return ret;
			return ret;
	}
	}


	/*
	 * Need to round down offset to be aligned with page size boundary
	 * for page size > block size.
	 */
	ioffset = round_down(offset, PAGE_SIZE);

	/* Write out all dirty pages */
	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
					   LLONG_MAX);
	if (ret)
		return ret;

	/* Take mutex lock */
	mutex_lock(&inode->i_mutex);
	mutex_lock(&inode->i_mutex);

	/*
	/*
	 * There is no need to overlap collapse range with EOF, in which case
	 * There is no need to overlap collapse range with EOF, in which case
	 * it is effectively a truncate operation
	 * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
		goto out_mutex;
		goto out_mutex;
	}
	}


	truncate_pagecache(inode, ioffset);

	/* Wait for existing dio to complete */
	/* Wait for existing dio to complete */
	ext4_inode_block_unlocked_dio(inode);
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);
	inode_dio_wait(inode);


	/*
	 * Prevent page faults from reinstantiating pages we have released from
	 * page cache.
	 */
	down_write(&EXT4_I(inode)->i_mmap_sem);
	/*
	 * Need to round down offset to be aligned with page size boundary
	 * for page size > block size.
	 */
	ioffset = round_down(offset, PAGE_SIZE);
	/*
	 * Write tail of the last page before removed range since it will get
	 * removed from the page cache below.
	 */
	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
	if (ret)
		goto out_mmap;
	/*
	 * Write data that will be shifted to preserve them when discarding
	 * page cache below. We are also protected from pages becoming dirty
	 * by i_mmap_sem.
	 */
	ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
					   LLONG_MAX);
	if (ret)
		goto out_mmap;
	truncate_pagecache(inode, ioffset);

	credits = ext4_writepage_trans_blocks(inode);
	credits = ext4_writepage_trans_blocks(inode);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle)) {
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		ret = PTR_ERR(handle);
		goto out_dio;
		goto out_mmap;
	}
	}


	down_write(&EXT4_I(inode)->i_data_sem);
	down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)


out_stop:
out_stop:
	ext4_journal_stop(handle);
	ext4_journal_stop(handle);
out_dio:
out_mmap:
	up_write(&EXT4_I(inode)->i_mmap_sem);
	ext4_inode_resume_unlocked_dio(inode);
	ext4_inode_resume_unlocked_dio(inode);
out_mutex:
out_mutex:
	mutex_unlock(&inode->i_mutex);
	mutex_unlock(&inode->i_mutex);
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
			return ret;
			return ret;
	}
	}


	/*
	 * Need to round down to align start offset to page size boundary
	 * for page size > block size.
	 */
	ioffset = round_down(offset, PAGE_SIZE);

	/* Write out all dirty pages */
	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
			LLONG_MAX);
	if (ret)
		return ret;

	/* Take mutex lock */
	mutex_lock(&inode->i_mutex);
	mutex_lock(&inode->i_mutex);

	/* Currently just for extent based files */
	/* Currently just for extent based files */
	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
		ret = -EOPNOTSUPP;
		ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
		goto out_mutex;
		goto out_mutex;
	}
	}


	truncate_pagecache(inode, ioffset);

	/* Wait for existing dio to complete */
	/* Wait for existing dio to complete */
	ext4_inode_block_unlocked_dio(inode);
	ext4_inode_block_unlocked_dio(inode);
	inode_dio_wait(inode);
	inode_dio_wait(inode);


	/*
	 * Prevent page faults from reinstantiating pages we have released from
	 * page cache.
	 */
	down_write(&EXT4_I(inode)->i_mmap_sem);
	/*
	 * Need to round down to align start offset to page size boundary
	 * for page size > block size.
	 */
	ioffset = round_down(offset, PAGE_SIZE);
	/* Write out all dirty pages */
	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
			LLONG_MAX);
	if (ret)
		goto out_mmap;
	truncate_pagecache(inode, ioffset);

	credits = ext4_writepage_trans_blocks(inode);
	credits = ext4_writepage_trans_blocks(inode);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle)) {
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		ret = PTR_ERR(handle);
		goto out_dio;
		goto out_mmap;
	}
	}


	/* Expand file to avoid data loss if there is error while shifting */
	/* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)


out_stop:
out_stop:
	ext4_journal_stop(handle);
	ext4_journal_stop(handle);
out_dio:
out_mmap:
	up_write(&EXT4_I(inode)->i_mmap_sem);
	ext4_inode_resume_unlocked_dio(inode);
	ext4_inode_resume_unlocked_dio(inode);
out_mutex:
out_mutex:
	mutex_unlock(&inode->i_mutex);
	mutex_unlock(&inode->i_mutex);
+58 −24
Original line number Original line Diff line number Diff line
@@ -193,43 +193,35 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
}


#ifdef CONFIG_FS_DAX
#ifdef CONFIG_FS_DAX
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
	struct inode *inode = bh->b_assoc_map->host;
	/* XXX: breaks on 32-bit > 16TB. Is that even supported? */
	loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
	int err;
	if (!uptodate)
		return;
	WARN_ON(!buffer_unwritten(bh));
	err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}

static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
{
	int result;
	int result;
	handle_t *handle = NULL;
	handle_t *handle = NULL;
	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	bool write = vmf->flags & FAULT_FLAG_WRITE;


	if (write) {
	if (write) {
		sb_start_pagefault(sb);
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
		file_update_time(vma->vm_file);
		down_read(&EXT4_I(inode)->i_mmap_sem);
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
						EXT4_DATA_TRANS_BLOCKS(sb));
						EXT4_DATA_TRANS_BLOCKS(sb));
	}
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);


	if (IS_ERR(handle))
	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
		result = VM_FAULT_SIGBUS;
	else
	else
		result = __dax_fault(vma, vmf, ext4_get_block_dax,
		result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
						ext4_end_io_unwritten);


	if (write) {
	if (write) {
		if (!IS_ERR(handle))
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
			ext4_journal_stop(handle);
		up_read(&EXT4_I(inode)->i_mmap_sem);
		sb_end_pagefault(sb);
		sb_end_pagefault(sb);
	}
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);


	return result;
	return result;
}
}
@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
	if (write) {
	if (write) {
		sb_start_pagefault(sb);
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
		file_update_time(vma->vm_file);
		down_read(&EXT4_I(inode)->i_mmap_sem);
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				ext4_chunk_trans_blocks(inode,
				ext4_chunk_trans_blocks(inode,
							PMD_SIZE / PAGE_SIZE));
							PMD_SIZE / PAGE_SIZE));
	}
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);


	if (IS_ERR(handle))
	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
		result = VM_FAULT_SIGBUS;
	else
	else
		result = __dax_pmd_fault(vma, addr, pmd, flags,
		result = __dax_pmd_fault(vma, addr, pmd, flags,
				ext4_get_block_dax, ext4_end_io_unwritten);
				ext4_dax_mmap_get_block, NULL);


	if (write) {
	if (write) {
		if (!IS_ERR(handle))
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
			ext4_journal_stop(handle);
		up_read(&EXT4_I(inode)->i_mmap_sem);
		sb_end_pagefault(sb);
		sb_end_pagefault(sb);
	}
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);


	return result;
	return result;
}
}


static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
{
	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
	int err;
				ext4_end_io_unwritten);
	struct inode *inode = file_inode(vma->vm_file);

	sb_start_pagefault(inode->i_sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
	err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(inode->i_sb);

	return err;
}

/*
 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
 * handler we check for races agaist truncate. Note that since we cycle through
 * i_mmap_sem, we are sure that also any hole punching that began before we
 * were called is finished by now and so if it included part of the file we
 * are working on, our pte will get unmapped and the check for pte_same() in
 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
 * desired.
 */
static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
				struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	int ret = VM_FAULT_NOPAGE;
	loff_t size;

	sb_start_pagefault(sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		ret = VM_FAULT_SIGBUS;
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(sb);

	return ret;
}
}


static const struct vm_operations_struct ext4_dax_vm_ops = {
static const struct vm_operations_struct ext4_dax_vm_ops = {
	.fault		= ext4_dax_fault,
	.fault		= ext4_dax_fault,
	.pmd_fault	= ext4_dax_pmd_fault,
	.pmd_fault	= ext4_dax_pmd_fault,
	.page_mkwrite	= ext4_dax_mkwrite,
	.page_mkwrite	= ext4_dax_mkwrite,
	.pfn_mkwrite	= dax_pfn_mkwrite,
	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
};
};
#else
#else
#define ext4_dax_vm_ops	ext4_file_vm_ops
#define ext4_dax_vm_ops	ext4_file_vm_ops
#endif
#endif


static const struct vm_operations_struct ext4_file_vm_ops = {
static const struct vm_operations_struct ext4_file_vm_ops = {
	.fault		= filemap_fault,
	.fault		= ext4_filemap_fault,
	.map_pages	= filemap_map_pages,
	.map_pages	= filemap_map_pages,
	.page_mkwrite   = ext4_page_mkwrite,
	.page_mkwrite   = ext4_page_mkwrite,
};
};
Loading