Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 18ddf074 authored by Eric Biggers's avatar Eric Biggers Committed by Jaegeuk Kim
Browse files

fs-verity: implement readahead of Merkle tree pages

When fs-verity verifies data pages, currently it reads each Merkle tree
page synchronously using read_mapping_page().

Therefore, when the Merkle tree pages aren't already cached, fs-verity
causes an extra 4 KiB I/O request for every 512 KiB of data (assuming
that the Merkle tree uses SHA-256 and 4 KiB blocks).  This results in
more I/O requests and performance loss than is strictly necessary.

Therefore, implement readahead of the Merkle tree pages.

For simplicity, we take advantage of the fact that the kernel already
does readahead of the file's *data*, just like it does for any other
file.  Due to this, we don't really need a separate readahead state
(struct file_ra_state) just for the Merkle tree, but rather we just need
to piggy-back on the existing data readahead requests.

We also only really need to bother with the first level of the Merkle
tree, since the usual fan-out factor is 128, so normally over 99% of
Merkle tree I/O requests are for the first level.

Therefore, make fsverity_verify_bio() enable readahead of the first
Merkle tree level, for up to 1/4 the number of pages in the bio, when it
sees that the REQ_RAHEAD flag is set on the bio.  The readahead size is
then passed down to ->read_merkle_tree_page() for the filesystem to
(optionally) implement if it sees that the requested page is uncached.

While we're at it, also make build_merkle_tree_level() set the Merkle
tree readahead size, since it's easy to do there.

However, for now don't set the readahead size in fsverity_verify_page(),
since currently it's only used to verify holes on ext4 and f2fs, and it
would need parameters added to know how much to read ahead.

This patch significantly improves fs-verity sequential read performance.
Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches:

    On an ARM64 phone (using sha256-ce):
        Before: 217 MB/s
        After: 263 MB/s
        (compare to sha256sum of non-verity file: 357 MB/s)

    In an x86_64 VM (using sha256-avx2):
        Before: 173 MB/s
        After: 215 MB/s
        (compare to sha256sum of non-verity file: 223 MB/s)

Link: https://lore.kernel.org/r/20200106205533.137005-1-ebiggers@kernel.org


Reviewed-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
parent 2ff972ed
Loading
Loading
Loading
Loading
+47 −2
Original line number Original line Diff line number Diff line
@@ -342,12 +342,57 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
	return desc_size;
	return desc_size;
}
}


/*
 * Prefetch some pages from the file's Merkle tree.
 *
 * This is basically a stripped-down version of __do_page_cache_readahead()
 * which works on pages past i_size.
 */
static void ext4_merkle_tree_readahead(struct address_space *mapping,
				       pgoff_t start_index, unsigned long count)
{
	LIST_HEAD(pages);
	unsigned int nr_pages = 0;
	struct page *page;
	pgoff_t index;
	struct blk_plug plug;

	for (index = start_index; index < start_index + count; index++) {
		rcu_read_lock();
		page = radix_tree_lookup(&mapping->page_tree, index);
		rcu_read_unlock();
		if (!page || radix_tree_exceptional_entry(page)) {
			page = __page_cache_alloc(readahead_gfp_mask(mapping));
			if (!page)
				break;
			page->index = index;
			list_add(&page->lru, &pages);
			nr_pages++;
		}
	}
	blk_start_plug(&plug);
	ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
	blk_finish_plug(&plug);
}

static struct page *ext4_read_merkle_tree_page(struct inode *inode,
static struct page *ext4_read_merkle_tree_page(struct inode *inode,
					       pgoff_t index)
					       pgoff_t index,
					       unsigned long num_ra_pages)
{
{
	struct page *page;

	index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
	index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;


	return read_mapping_page(inode->i_mapping, index, NULL);
	page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
	if (!page || !PageUptodate(page)) {
		if (page)
			put_page(page);
		else if (num_ra_pages > 1)
			ext4_merkle_tree_readahead(inode->i_mapping, index,
						   num_ra_pages);
		page = read_mapping_page(inode->i_mapping, index, NULL);
	}
	return page;
}
}


static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
+47 −2
Original line number Original line Diff line number Diff line
@@ -222,12 +222,57 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
	return size;
	return size;
}
}


/*
 * Prefetch some pages from the file's Merkle tree.
 *
 * This is basically a stripped-down version of __do_page_cache_readahead()
 * which works on pages past i_size.
 */
static void f2fs_merkle_tree_readahead(struct address_space *mapping,
				       pgoff_t start_index, unsigned long count)
{
	LIST_HEAD(pages);
	unsigned int nr_pages = 0;
	struct page *page;
	pgoff_t index;
	struct blk_plug plug;

	for (index = start_index; index < start_index + count; index++) {
		rcu_read_lock();
		page = radix_tree_lookup(&mapping->page_tree, index);
		rcu_read_unlock();
		if (!page || radix_tree_exceptional_entry(page)) {
			page = __page_cache_alloc(readahead_gfp_mask(mapping));
			if (!page)
				break;
			page->index = index;
			list_add(&page->lru, &pages);
			nr_pages++;
		}
	}
	blk_start_plug(&plug);
	f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
	blk_finish_plug(&plug);
}

static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
					       pgoff_t index)
					       pgoff_t index,
					       unsigned long num_ra_pages)
{
{
	struct page *page;

	index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
	index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;


	return read_mapping_page(inode->i_mapping, index, NULL);
	page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
	if (!page || !PageUptodate(page)) {
		if (page)
			put_page(page);
		else if (num_ra_pages > 1)
			f2fs_merkle_tree_readahead(inode->i_mapping, index,
						   num_ra_pages);
		page = read_mapping_page(inode->i_mapping, index, NULL);
	}
	return page;
}
}


static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
+7 −1
Original line number Original line Diff line number Diff line
@@ -8,6 +8,7 @@
#include "fsverity_private.h"
#include "fsverity_private.h"


#include <crypto/hash.h>
#include <crypto/hash.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <linux/sched/signal.h>
@@ -86,9 +87,14 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level,
				return err;
				return err;
			}
			}
		} else {
		} else {
			unsigned long num_ra_pages =
				min_t(unsigned long, num_blocks_to_hash - i,
				      inode->i_sb->s_bdi->io_pages);

			/* Non-leaf: hashing hash block from level below */
			/* Non-leaf: hashing hash block from level below */
			src_page = vops->read_merkle_tree_page(inode,
			src_page = vops->read_merkle_tree_page(inode,
					params->level_start[level - 1] + i);
					params->level_start[level - 1] + i,
					num_ra_pages);
			if (IS_ERR(src_page)) {
			if (IS_ERR(src_page)) {
				err = PTR_ERR(src_page);
				err = PTR_ERR(src_page);
				fsverity_err(inode,
				fsverity_err(inode,
+1 −0
Original line number Original line Diff line number Diff line
@@ -50,6 +50,7 @@ struct merkle_tree_params {
	unsigned int log_arity;		/* log2(hashes_per_block) */
	unsigned int log_arity;		/* log2(hashes_per_block) */
	unsigned int num_levels;	/* number of levels in Merkle tree */
	unsigned int num_levels;	/* number of levels in Merkle tree */
	u64 tree_size;			/* Merkle tree size in bytes */
	u64 tree_size;			/* Merkle tree size in bytes */
	unsigned long level0_blocks;	/* number of blocks in tree level 0 */


	/*
	/*
	 * Starting block index for each tree level, ordered from leaf level (0)
	 * Starting block index for each tree level, ordered from leaf level (0)
+1 −0
Original line number Original line Diff line number Diff line
@@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
		/* temporarily using level_start[] to store blocks in level */
		/* temporarily using level_start[] to store blocks in level */
		params->level_start[params->num_levels++] = blocks;
		params->level_start[params->num_levels++] = blocks;
	}
	}
	params->level0_blocks = params->level_start[0];


	/* Compute the starting block of each level */
	/* Compute the starting block of each level */
	offset = 0;
	offset = 0;
Loading