From fd39073dba8632575b920edefba2577e1b84262a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:55:33 -0800 Subject: fs-verity: implement readahead of Merkle tree pages When fs-verity verifies data pages, currently it reads each Merkle tree page synchronously using read_mapping_page(). Therefore, when the Merkle tree pages aren't already cached, fs-verity causes an extra 4 KiB I/O request for every 512 KiB of data (assuming that the Merkle tree uses SHA-256 and 4 KiB blocks). This results in more I/O requests and performance loss than is strictly necessary. Therefore, implement readahead of the Merkle tree pages. For simplicity, we take advantage of the fact that the kernel already does readahead of the file's *data*, just like it does for any other file. Due to this, we don't really need a separate readahead state (struct file_ra_state) just for the Merkle tree, but rather we just need to piggy-back on the existing data readahead requests. We also only really need to bother with the first level of the Merkle tree, since the usual fan-out factor is 128, so normally over 99% of Merkle tree I/O requests are for the first level. Therefore, make fsverity_verify_bio() enable readahead of the first Merkle tree level, for up to 1/4 the number of pages in the bio, when it sees that the REQ_RAHEAD flag is set on the bio. The readahead size is then passed down to ->read_merkle_tree_page() for the filesystem to (optionally) implement if it sees that the requested page is uncached. While we're at it, also make build_merkle_tree_level() set the Merkle tree readahead size, since it's easy to do there. However, for now don't set the readahead size in fsverity_verify_page(), since currently it's only used to verify holes on ext4 and f2fs, and it would need parameters added to know how much to read ahead. This patch significantly improves fs-verity sequential read performance. Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches: On an ARM64 phone (using sha256-ce): Before: 217 MB/s After: 263 MB/s (compare to sha256sum of non-verity file: 357 MB/s) In an x86_64 VM (using sha256-avx2): Before: 173 MB/s After: 215 MB/s (compare to sha256sum of non-verity file: 223 MB/s) Link: https://lore.kernel.org/r/20200106205533.137005-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/verity/enable.c | 8 +++++++- fs/verity/fsverity_private.h | 1 + fs/verity/open.c | 1 + fs/verity/verify.c | 34 ++++++++++++++++++++++++++++------ 4 files changed, 37 insertions(+), 7 deletions(-) (limited to 'fs/verity') diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 9c93c17f1c1c..efc79a2cedf2 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,6 +8,7 @@ #include "fsverity_private.h" #include +#include #include #include #include @@ -86,9 +87,14 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level, return err; } } else { + unsigned long num_ra_pages = + min_t(unsigned long, num_blocks_to_hash - i, + inode->i_sb->s_bdi->io_pages); + /* Non-leaf: hashing hash block from level below */ src_page = vops->read_merkle_tree_page(inode, - params->level_start[level - 1] + i); + params->level_start[level - 1] + i, + num_ra_pages); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index e74c79b64d88..ab9cfdd8f965 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -50,6 +50,7 @@ struct merkle_tree_params { unsigned int log_arity; /* log2(hashes_per_block) */ unsigned int num_levels; /* number of levels in Merkle tree */ u64 tree_size; /* Merkle tree size in bytes */ + unsigned long level0_blocks; /* number of blocks in tree level 0 */ /* * Starting block index for each tree level, ordered from leaf level (0) diff --git a/fs/verity/open.c b/fs/verity/open.c index 63d1004b688c..e9cdf7d00ed2 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, /* temporarily using level_start[] to store blocks in level */ params->level_start[params->num_levels++] = blocks; } + params->level0_blocks = params->level_start[0]; /* Compute the starting block of each level */ offset = 0; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 3e8f2de44667..7fa561c343c2 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi, * Return: true if the page is valid, else false. */ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, - struct ahash_request *req, struct page *data_page) + struct ahash_request *req, struct page *data_page, + unsigned long level0_ra_pages) { const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", level, hindex, hoffset); - hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hindex); + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex, + level == 0 ? level0_ra_pages : 0); if (IS_ERR(hpage)) { err = PTR_ERR(hpage); fsverity_err(inode, @@ -195,7 +196,7 @@ bool fsverity_verify_page(struct page *page) if (unlikely(!req)) return false; - valid = verify_page(inode, vi, req, page); + valid = verify_page(inode, vi, req, page, 0); ahash_request_free(req); @@ -222,21 +223,42 @@ void fsverity_verify_bio(struct bio *bio) { struct inode *inode = bio_first_page_all(bio)->mapping->host; const struct fsverity_info *vi = inode->i_verity_info; + const struct merkle_tree_params *params = &vi->tree_params; struct ahash_request *req; struct bio_vec *bv; struct bvec_iter_all iter_all; + unsigned long max_ra_pages = 0; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS); if (unlikely(!req)) { bio_for_each_segment_all(bv, bio, iter_all) SetPageError(bv->bv_page); return; } + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ + bio_for_each_segment_all(bv, bio, iter_all) + max_ra_pages++; + max_ra_pages /= 4; + } + bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; + unsigned long level0_index = page->index >> params->log_arity; + unsigned long level0_ra_pages = + min(max_ra_pages, params->level0_blocks - level0_index); - if (!PageError(page) && !verify_page(inode, vi, req, page)) + if (!PageError(page) && + !verify_page(inode, vi, req, page, level0_ra_pages)) SetPageError(page); } -- cgit v1.2.3