Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2058f83a authored by Michael Halcrow's avatar Michael Halcrow Committed by Theodore Ts'o
Browse files

ext4 crypto: implement the ext4 encryption write path



Pulls block_write_begin() into fs/ext4/inode.c because it might need
to do a low-level read of the existing data, in which case we need to
decrypt it.

Signed-off-by: default avatarMichael Halcrow <mhalcrow@google.com>
Signed-off-by: default avatarIldar Muslukhov <ildarm@google.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent dde680ce
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -3122,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
	ee_len    = ext4_ext_get_actual_len(ex);
	ee_pblock = ext4_ext_pblock(ex);

	if (ext4_encrypted_inode(inode))
		return ext4_encrypted_zeroout(inode, ex);

	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
	if (ret > 0)
		ret = 0;
@@ -4898,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
	ext4_lblk_t lblk;
	unsigned int blkbits = inode->i_blkbits;

	/*
	 * Encrypted inodes can't handle collapse range or insert
	 * range since we would need to re-encrypt blocks with a
	 * different IV or XTS tweak (which are based on the logical
	 * block number).
	 *
	 * XXX It's not clear why zero range isn't working, but we'll
	 * leave it disabled for encrypted inodes for now.  This is a
	 * bug we should fix....
	 */
	if (ext4_encrypted_inode(inode) &&
	    (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
		return -EOPNOTSUPP;

	/* Return error if mode is not supported */
	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+5 −0
Original line number Diff line number Diff line
@@ -996,6 +996,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
	ei->i_block_group = group;
	ei->i_last_alloc_group = ~0;

	/* If the directory encrypted, then we should encrypt the inode. */
	if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
	    ext4_encrypted_inode(dir))
		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);

	ext4_set_inode_flags(inode);
	if (IS_DIRSYNC(inode))
		ext4_handle_sync(handle);
+111 −1
Original line number Diff line number Diff line
@@ -886,6 +886,95 @@ int do_journal_get_write_access(handle_t *handle,

static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
		   struct buffer_head *bh_result, int create);

#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
				  get_block_t *get_block)
{
	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
	unsigned to = from + len;
	struct inode *inode = page->mapping->host;
	unsigned block_start, block_end;
	sector_t block;
	int err = 0;
	unsigned blocksize = inode->i_sb->s_blocksize;
	unsigned bbits;
	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
	bool decrypt = false;

	BUG_ON(!PageLocked(page));
	BUG_ON(from > PAGE_CACHE_SIZE);
	BUG_ON(to > PAGE_CACHE_SIZE);
	BUG_ON(from > to);

	if (!page_has_buffers(page))
		create_empty_buffers(page, blocksize, 0);
	head = page_buffers(page);
	bbits = ilog2(blocksize);
	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);

	for (bh = head, block_start = 0; bh != head || !block_start;
	    block++, block_start = block_end, bh = bh->b_this_page) {
		block_end = block_start + blocksize;
		if (block_end <= from || block_start >= to) {
			if (PageUptodate(page)) {
				if (!buffer_uptodate(bh))
					set_buffer_uptodate(bh);
			}
			continue;
		}
		if (buffer_new(bh))
			clear_buffer_new(bh);
		if (!buffer_mapped(bh)) {
			WARN_ON(bh->b_size != blocksize);
			err = get_block(inode, block, bh, 1);
			if (err)
				break;
			if (buffer_new(bh)) {
				unmap_underlying_metadata(bh->b_bdev,
							  bh->b_blocknr);
				if (PageUptodate(page)) {
					clear_buffer_new(bh);
					set_buffer_uptodate(bh);
					mark_buffer_dirty(bh);
					continue;
				}
				if (block_end > to || block_start < from)
					zero_user_segments(page, to, block_end,
							   block_start, from);
				continue;
			}
		}
		if (PageUptodate(page)) {
			if (!buffer_uptodate(bh))
				set_buffer_uptodate(bh);
			continue;
		}
		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
		    !buffer_unwritten(bh) &&
		    (block_start < from || block_end > to)) {
			ll_rw_block(READ, 1, &bh);
			*wait_bh++ = bh;
			decrypt = ext4_encrypted_inode(inode) &&
				S_ISREG(inode->i_mode);
		}
	}
	/*
	 * If we issued read requests, let them complete.
	 */
	while (wait_bh > wait) {
		wait_on_buffer(*--wait_bh);
		if (!buffer_uptodate(*wait_bh))
			err = -EIO;
	}
	if (unlikely(err))
		page_zero_new_buffers(page, from, to);
	else if (decrypt)
		err = ext4_decrypt_one(inode, page);
	return err;
}
#endif

static int ext4_write_begin(struct file *file, struct address_space *mapping,
			    loff_t pos, unsigned len, unsigned flags,
			    struct page **pagep, void **fsdata)
@@ -948,11 +1037,19 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
	/* In case writeback began while the page was unlocked */
	wait_for_stable_page(page);

#ifdef CONFIG_EXT4_FS_ENCRYPTION
	if (ext4_should_dioread_nolock(inode))
		ret = ext4_block_write_begin(page, pos, len,
					     ext4_get_block_write);
	else
		ret = ext4_block_write_begin(page, pos, len,
					     ext4_get_block);
#else
	if (ext4_should_dioread_nolock(inode))
		ret = __block_write_begin(page, pos, len, ext4_get_block_write);
	else
		ret = __block_write_begin(page, pos, len, ext4_get_block);

#endif
	if (!ret && ext4_should_journal_data(inode)) {
		ret = ext4_walk_page_buffers(handle, page_buffers(page),
					     from, to, NULL,
@@ -2574,7 +2671,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
	/* In case writeback began while the page was unlocked */
	wait_for_stable_page(page);

#ifdef CONFIG_EXT4_FS_ENCRYPTION
	ret = ext4_block_write_begin(page, pos, len,
				     ext4_da_get_block_prep);
#else
	ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
#endif
	if (ret < 0) {
		unlock_page(page);
		ext4_journal_stop(handle);
@@ -3032,6 +3134,9 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
		get_block_func = ext4_get_block_write;
		dio_flags = DIO_LOCKING;
	}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
	if (IS_DAX(inode))
		ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
				ext4_end_io_dio, dio_flags);
@@ -3096,6 +3201,11 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
	size_t count = iov_iter_count(iter);
	ssize_t ret;

#ifdef CONFIG_EXT4_FS_ENCRYPTION
	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;
#endif

	/*
	 * If we are doing data journalling we don't support O_DIRECT
	 */
+40 −5
Original line number Diff line number Diff line
@@ -67,6 +67,10 @@ static void ext4_finish_bio(struct bio *bio)

	bio_for_each_segment_all(bvec, bio, i) {
		struct page *page = bvec->bv_page;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
		struct page *data_page = NULL;
		struct ext4_crypto_ctx *ctx = NULL;
#endif
		struct buffer_head *bh, *head;
		unsigned bio_start = bvec->bv_offset;
		unsigned bio_end = bio_start + bvec->bv_len;
@@ -76,6 +80,15 @@ static void ext4_finish_bio(struct bio *bio)
		if (!page)
			continue;

#ifdef CONFIG_EXT4_FS_ENCRYPTION
		if (!page->mapping) {
			/* The bounce data pages are unmapped. */
			data_page = page;
			ctx = (struct ext4_crypto_ctx *)page_private(data_page);
			page = ctx->control_page;
		}
#endif

		if (error) {
			SetPageError(page);
			set_bit(AS_EIO, &page->mapping->flags);
@@ -100,10 +113,15 @@ static void ext4_finish_bio(struct bio *bio)
		} while ((bh = bh->b_this_page) != head);
		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
		local_irq_restore(flags);
		if (!under_io)
		if (!under_io) {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
			if (ctx)
				ext4_restore_control_page(data_page);
#endif
			end_page_writeback(page);
		}
	}
}

static void ext4_release_io_end(ext4_io_end_t *io_end)
{
@@ -376,6 +394,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,

static int io_submit_add_bh(struct ext4_io_submit *io,
			    struct inode *inode,
			    struct page *page,
			    struct buffer_head *bh)
{
	int ret;
@@ -389,7 +408,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
		if (ret)
			return ret;
	}
	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
	ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
	if (ret != bh->b_size)
		goto submit_and_retry;
	io->io_next_block++;
@@ -402,6 +421,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
			struct writeback_control *wbc,
			bool keep_towrite)
{
	struct page *data_page = NULL;
	struct inode *inode = page->mapping->host;
	unsigned block_start, blocksize;
	struct buffer_head *bh, *head;
@@ -461,19 +481,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
		set_buffer_async_write(bh);
	} while ((bh = bh->b_this_page) != head);

	/* Now submit buffers to write */
	bh = head = page_buffers(page);

	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
		data_page = ext4_encrypt(inode, page);
		if (IS_ERR(data_page)) {
			ret = PTR_ERR(data_page);
			data_page = NULL;
			goto out;
		}
	}

	/* Now submit buffers to write */
	do {
		if (!buffer_async_write(bh))
			continue;
		ret = io_submit_add_bh(io, inode, bh);
		ret = io_submit_add_bh(io, inode,
				       data_page ? data_page : page, bh);
		if (ret) {
			/*
			 * We only get here on ENOMEM.  Not much else
			 * we can do but mark the page as dirty, and
			 * better luck next time.
			 */
			redirty_page_for_writepage(wbc, page);
			break;
		}
		nr_submitted++;
@@ -482,6 +512,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,

	/* Error stopped previous loop? Clean up buffers... */
	if (ret) {
	out:
		if (data_page)
			ext4_restore_control_page(data_page);
		printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
		redirty_page_for_writepage(wbc, page);
		do {
			clear_buffer_async_write(bh);
			bh = bh->b_this_page;