Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5ed7b76f authored by Eric Biggers's avatar Eric Biggers Committed by Jaegeuk Kim
Browse files

ext4: add basic fs-verity support



Add most of fs-verity support to ext4.  fs-verity is a filesystem
feature that enables transparent integrity protection and authentication
of read-only files.  It uses a dm-verity like mechanism at the file
level: a Merkle tree is used to verify any block in the file in
log(filesize) time.  It is implemented mainly by helper functions in
fs/verity/.  See Documentation/filesystems/fsverity.rst for the full
documentation.

This commit adds all of ext4 fs-verity support except for the actual
data verification, including:

- Adding a filesystem feature flag and an inode flag for fs-verity.

- Implementing the fsverity_operations to support enabling verity on an
  inode and reading/writing the verity metadata.

- Updating ->write_begin(), ->write_end(), and ->writepages() to support
  writing verity metadata pages.

- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().

ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
past the end of the file, starting at the first 64K boundary beyond
i_size.  This approach works because (a) verity files are readonly, and
(b) pages fully beyond i_size aren't visible to userspace but can be
read/written internally by ext4 with only some relatively small changes
to ext4.  This approach avoids having to depend on the EA_INODE feature
and on rearchitecturing ext4's xattr support to support paging
multi-gigabyte xattrs into memory, and to support encrypting xattrs.
Note that the verity metadata *must* be encrypted when the file is,
since it contains hashes of the plaintext data.

This patch incorporates work by Theodore Ts'o and Chandan Rajendra.

Reviewed-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
parent 0c22f68f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \

ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
ext4-$(CONFIG_FS_VERITY)		+= verity.o
+19 −5
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#endif

#include <linux/fscrypt.h>
#include <linux/fsverity.h>

#include <linux/compiler.h>

@@ -404,6 +405,7 @@ struct flex_groups {
#define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
#define EXT4_VERITY_FL			0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL		0x10000000 /* Inode has inline data. */
@@ -411,7 +413,7 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL		0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */

#define EXT4_FL_USER_VISIBLE		0x704BDFFF /* User visible flags */
#define EXT4_FL_USER_VISIBLE		0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE		0x604BC0FF /* User modifiable flags */

/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
@@ -472,6 +474,7 @@ enum {
	EXT4_INODE_TOPDIR	= 17,	/* Top of directory hierarchies*/
	EXT4_INODE_HUGE_FILE	= 18,	/* Set to each huge file */
	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
	EXT4_INODE_VERITY	= 20,	/* Verity protected inode */
	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
	EXT4_INODE_INLINE_DATA	= 28,	/* Data in inode. */
@@ -517,6 +520,7 @@ static inline void ext4_check_flag_values(void)
	CHECK_FLAG_VALUE(TOPDIR);
	CHECK_FLAG_VALUE(HUGE_FILE);
	CHECK_FLAG_VALUE(EXTENTS);
	CHECK_FLAG_VALUE(VERITY);
	CHECK_FLAG_VALUE(EA_INODE);
	CHECK_FLAG_VALUE(EOFBLOCKS);
	CHECK_FLAG_VALUE(INLINE_DATA);
@@ -1560,6 +1564,7 @@ enum {
	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
	EXT4_STATE_LUSTRE_EA_INODE,	/* Lustre-style ea_inode */
	EXT4_STATE_VERITY_IN_PROGRESS,	/* building fs-verity Merkle tree */
};

#define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -1610,9 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb)	(sb)
#endif

/*
 * Returns true if the inode is inode is encrypted
 */
static inline bool ext4_verity_in_progress(struct inode *inode)
{
	return IS_ENABLED(CONFIG_FS_VERITY) &&
	       ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
}

#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime

/*
@@ -1665,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM	0x0400
#define EXT4_FEATURE_RO_COMPAT_READONLY		0x1000
#define EXT4_FEATURE_RO_COMPAT_PROJECT		0x2000
#define EXT4_FEATURE_RO_COMPAT_VERITY		0x8000

#define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
@@ -1754,6 +1763,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC)
EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum,	METADATA_CSUM)
EXT4_FEATURE_RO_COMPAT_FUNCS(readonly,		READONLY)
EXT4_FEATURE_RO_COMPAT_FUNCS(project,		PROJECT)
EXT4_FEATURE_RO_COMPAT_FUNCS(verity,		VERITY)

EXT4_FEATURE_INCOMPAT_FUNCS(compression,	COMPRESSION)
EXT4_FEATURE_INCOMPAT_FUNCS(filetype,		FILETYPE)
@@ -1811,7 +1821,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
					 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
					 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
					 EXT4_FEATURE_RO_COMPAT_QUOTA |\
					 EXT4_FEATURE_RO_COMPAT_PROJECT)
					 EXT4_FEATURE_RO_COMPAT_PROJECT |\
					 EXT4_FEATURE_RO_COMPAT_VERITY)

#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -3273,6 +3284,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);

/* verity.c */
extern const struct fsverity_operations ext4_verityops;

/*
 * Add new method to test whether block and inode bitmaps are properly
 * initialized. With uninit_bg reading the block from disk is not enough
+4 −0
Original line number Diff line number Diff line
@@ -444,6 +444,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
	if (ret)
		return ret;

	ret = fsverity_file_open(inode, filp);
	if (ret)
		return ret;

	/*
	 * Set up the jbd2_inode if we are opening the inode for
	 * writing and the journal is present
+38 −15
Original line number Diff line number Diff line
@@ -1322,6 +1322,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
	}

	if (ret) {
		bool extended = (pos + len > inode->i_size) &&
				!ext4_verity_in_progress(inode);

		unlock_page(page);
		/*
		 * __block_write_begin may have instantiated a few blocks
@@ -1331,11 +1334,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
		 * Add inode to orphan list in case we crash before
		 * truncate finishes
		 */
		if (pos + len > inode->i_size && ext4_can_truncate(inode))
		if (extended && ext4_can_truncate(inode))
			ext4_orphan_add(handle, inode);

		ext4_journal_stop(handle);
		if (pos + len > inode->i_size) {
		if (extended) {
			ext4_truncate_failed_write(inode);
			/*
			 * If truncate failed early the inode might
@@ -1388,6 +1391,7 @@ static int ext4_write_end(struct file *file,
	int ret = 0, ret2;
	int i_size_changed = 0;
	int inline_data = ext4_has_inline_data(inode);
	bool verity = ext4_verity_in_progress(inode);

	trace_ext4_write_end(inode, pos, len, copied);
	if (inline_data) {
@@ -1405,12 +1409,16 @@ static int ext4_write_end(struct file *file,
	/*
	 * it's important to update i_size while still holding page lock:
	 * page writeout could otherwise come in and zero beyond i_size.
	 *
	 * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
	 * blocks are being written past EOF, so skip the i_size update.
	 */
	if (!verity)
		i_size_changed = ext4_update_inode_size(inode, pos + copied);
	unlock_page(page);
	put_page(page);

	if (old_size < pos)
	if (old_size < pos && !verity)
		pagecache_isize_extended(inode, old_size, pos);
	/*
	 * Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1421,7 +1429,7 @@ static int ext4_write_end(struct file *file,
	if (i_size_changed || inline_data)
		ext4_mark_inode_dirty(handle, inode);

	if (pos + len > inode->i_size && ext4_can_truncate(inode))
	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
		/* if we have allocated more blocks and copied
		 * less. We will have blocks allocated outside
		 * inode->i_size. So truncate them
@@ -1432,7 +1440,7 @@ static int ext4_write_end(struct file *file,
	if (!ret)
		ret = ret2;

	if (pos + len > inode->i_size) {
	if (pos + len > inode->i_size && !verity) {
		ext4_truncate_failed_write(inode);
		/*
		 * If truncate failed early the inode might still be
@@ -1493,6 +1501,7 @@ static int ext4_journalled_write_end(struct file *file,
	unsigned from, to;
	int size_changed = 0;
	int inline_data = ext4_has_inline_data(inode);
	bool verity = ext4_verity_in_progress(inode);

	trace_ext4_journalled_write_end(inode, pos, len, copied);
	from = pos & (PAGE_SIZE - 1);
@@ -1522,13 +1531,14 @@ static int ext4_journalled_write_end(struct file *file,
		if (!partial)
			SetPageUptodate(page);
	}
	if (!verity)
		size_changed = ext4_update_inode_size(inode, pos + copied);
	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
	unlock_page(page);
	put_page(page);

	if (old_size < pos)
	if (old_size < pos && !verity)
		pagecache_isize_extended(inode, old_size, pos);

	if (size_changed || inline_data) {
@@ -1537,7 +1547,7 @@ static int ext4_journalled_write_end(struct file *file,
			ret = ret2;
	}

	if (pos + len > inode->i_size && ext4_can_truncate(inode))
	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
		/* if we have allocated more blocks and copied
		 * less. We will have blocks allocated outside
		 * inode->i_size. So truncate them
@@ -1548,7 +1558,7 @@ static int ext4_journalled_write_end(struct file *file,
	ret2 = ext4_journal_stop(handle);
	if (!ret)
		ret = ret2;
	if (pos + len > inode->i_size) {
	if (pos + len > inode->i_size && !verity) {
		ext4_truncate_failed_write(inode);
		/*
		 * If truncate failed early the inode might still be
@@ -2114,7 +2124,8 @@ static int ext4_writepage(struct page *page,

	trace_ext4_writepage(page);
	size = i_size_read(inode);
	if (page->index == size >> PAGE_SHIFT)
	if (page->index == size >> PAGE_SHIFT &&
	    !ext4_verity_in_progress(inode))
		len = size & ~PAGE_MASK;
	else
		len = PAGE_SIZE;
@@ -2198,7 +2209,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
	 * after page tables are updated.
	 */
	size = i_size_read(mpd->inode);
	if (page->index == size >> PAGE_SHIFT)
	if (page->index == size >> PAGE_SHIFT &&
	    !ext4_verity_in_progress(mpd->inode))
		len = size & ~PAGE_MASK;
	else
		len = PAGE_SIZE;
@@ -2297,6 +2309,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
	ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
							>> inode->i_blkbits;

	if (ext4_verity_in_progress(inode))
		blocks = EXT_MAX_BLOCKS;

	do {
		BUG_ON(buffer_locked(bh));

@@ -3015,8 +3030,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,

	index = pos >> PAGE_SHIFT;

	if (ext4_nonda_switch(inode->i_sb) ||
	    S_ISLNK(inode->i_mode)) {
	if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
	    ext4_verity_in_progress(inode)) {
		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
		return ext4_write_begin(file, mapping, pos,
					len, flags, pagep, fsdata);
@@ -4689,6 +4704,8 @@ static bool ext4_should_use_dax(struct inode *inode)
		return false;
	if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
		return false;
	if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
		return false;
	return true;
}

@@ -4713,9 +4730,11 @@ void ext4_set_inode_flags(struct inode *inode)
		new_fl |= S_ENCRYPTED;
	if (flags & EXT4_CASEFOLD_FL)
		new_fl |= S_CASEFOLD;
	if (flags & EXT4_VERITY_FL)
		new_fl |= S_VERITY;
	inode_set_flags(inode, new_fl,
			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
			S_ENCRYPTED|S_CASEFOLD);
			S_ENCRYPTED|S_CASEFOLD|S_VERITY);
}

static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5476,6 +5495,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
	if (error)
		return error;

	error = fsverity_prepare_setattr(dentry, attr);
	if (error)
		return error;

	if (is_quota_modification(inode, attr)) {
		error = dquot_initialize(inode);
		if (error)
+13 −0
Original line number Diff line number Diff line
@@ -1086,6 +1086,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	}
	case EXT4_IOC_SHUTDOWN:
		return ext4_shutdown(sb, arg);

	case FS_IOC_ENABLE_VERITY:
		if (!ext4_has_feature_verity(sb))
			return -EOPNOTSUPP;
		return fsverity_ioctl_enable(filp, (const void __user *)arg);

	case FS_IOC_MEASURE_VERITY:
		if (!ext4_has_feature_verity(sb))
			return -EOPNOTSUPP;
		return fsverity_ioctl_measure(filp, (void __user *)arg);

	default:
		return -ENOTTY;
	}
@@ -1153,6 +1164,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
	case EXT4_IOC_SHUTDOWN:
	case FS_IOC_GETFSMAP:
	case FS_IOC_ENABLE_VERITY:
	case FS_IOC_MEASURE_VERITY:
		break;
	default:
		return -ENOIOCTLCMD;
Loading