*
* @inode: inode to read a merkle tree page for
* @index: page index relative to the start of the merkle tree
- * @num_ra_pages: number of pages to readahead. Optional, we ignore it
*
* The Merkle tree is stored in the filesystem btree, but its pages are cached
* with a logical position past EOF in the inode's mapping.
* Returns the page we read, or an ERR_PTR on error.
*/
static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
- pgoff_t index,
- unsigned long num_ra_pages)
+ pgoff_t index)
{
struct folio *folio;
u64 off = (u64)index << PAGE_SHIFT;
int ext4_read_folio(struct file *file, struct folio *folio)
{
- int ret = -EAGAIN;
struct inode *inode = folio->mapping->host;
+ int ret;
trace_ext4_read_folio(inode, folio);
- if (ext4_has_inline_data(inode))
+ if (ext4_has_inline_data(inode)) {
ret = ext4_readpage_inline(inode, folio);
+ if (ret != -EAGAIN)
+ return ret;
+ }
- if (ret == -EAGAIN)
- return ext4_mpage_readpages(inode, NULL, folio);
-
- return ret;
+ if (ext4_need_verity(inode, folio->index))
+ fsverity_readahead(inode, folio->index, folio_nr_pages(folio));
+ return ext4_mpage_readpages(inode, NULL, folio);
}
void ext4_readahead(struct readahead_control *rac)
if (ext4_has_inline_data(inode))
return;
+ if (ext4_need_verity(inode, readahead_index(rac)))
+ fsverity_readahead(inode, readahead_index(rac),
+ readahead_count(rac));
ext4_mpage_readpages(inode, rac, NULL);
}
}
static struct page *ext4_read_merkle_tree_page(struct inode *inode,
- pgoff_t index,
- unsigned long num_ra_pages)
+ pgoff_t index)
{
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
- return generic_read_merkle_tree_page(inode, index, num_ra_pages);
+ return generic_read_merkle_tree_page(inode, index);
+}
+
+static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages)
+{
+ index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
+ generic_readahead_merkle_tree(inode, index, nr_pages);
}
static int ext4_write_merkle_tree_block(struct file *file, const void *buf,
.end_enable_verity = ext4_end_enable_verity,
.get_verity_descriptor = ext4_get_verity_descriptor,
.read_merkle_tree_page = ext4_read_merkle_tree_page,
+ .readahead_merkle_tree = ext4_readahead_merkle_tree,
.write_merkle_tree_block = ext4_write_merkle_tree_block,
};
static int f2fs_read_data_folio(struct file *file, struct folio *folio)
{
struct inode *inode = folio->mapping->host;
- int ret = -EAGAIN;
+ int ret;
trace_f2fs_readpage(folio, DATA);
}
/* If the file has inline data, try to read it directly */
- if (f2fs_has_inline_data(inode))
+ if (f2fs_has_inline_data(inode)) {
ret = f2fs_read_inline_data(inode, folio);
- if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(inode, NULL, folio);
- return ret;
+ if (ret != -EAGAIN)
+ return ret;
+ }
+
+ if (f2fs_need_verity(inode, folio->index))
+ fsverity_readahead(inode, folio->index, folio_nr_pages(folio));
+ return f2fs_mpage_readpages(inode, NULL, folio);
}
static void f2fs_readahead(struct readahead_control *rac)
if (f2fs_has_inline_data(inode))
return;
+ if (f2fs_need_verity(inode, readahead_index(rac)))
+ fsverity_readahead(inode, readahead_index(rac),
+ readahead_count(rac));
f2fs_mpage_readpages(inode, rac, NULL);
}
}
static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
- pgoff_t index,
- unsigned long num_ra_pages)
+ pgoff_t index)
{
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
- return generic_read_merkle_tree_page(inode, index, num_ra_pages);
+ return generic_read_merkle_tree_page(inode, index);
+}
+
+static void f2fs_readahead_merkle_tree(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages)
+{
+ index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
+ generic_readahead_merkle_tree(inode, index, nr_pages);
}
static int f2fs_write_merkle_tree_block(struct file *file, const void *buf,
.end_enable_verity = f2fs_end_enable_verity,
.get_verity_descriptor = f2fs_get_verity_descriptor,
.read_merkle_tree_page = f2fs_read_merkle_tree_page,
+ .readahead_merkle_tree = f2fs_readahead_merkle_tree,
.write_merkle_tree_block = f2fs_write_merkle_tree_block,
};
* Copyright 2019 Google LLC
*/
+#include <linux/export.h>
#include <linux/fsverity.h>
#include <linux/pagemap.h>
* generic_read_merkle_tree_page - generic ->read_merkle_tree_page helper
* @inode: inode containing the Merkle tree
* @index: 0-based index of the Merkle tree page in the inode
- * @num_ra_pages: The number of Merkle tree pages that should be prefetched.
*
* The caller needs to adjust @index from the Merkle-tree relative index passed
* to ->read_merkle_tree_page to the actual index where the Merkle tree is
* stored in the page cache for @inode.
*/
-struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index,
- unsigned long num_ra_pages)
+struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index)
{
struct folio *folio;
+ folio = read_mapping_folio(inode->i_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
+ return folio_file_page(folio, index);
+}
+EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page);
+
+/**
+ * generic_readahead_merkle_tree() - generic ->readahead_merkle_tree helper
+ * @inode: inode containing the Merkle tree
+ * @index: 0-based index of the first Merkle tree page to read ahead in the
+ * inode
+ * @nr_pages: the number of Merkle tree pages that should be read ahead
+ *
+ * The caller needs to adjust @index from the Merkle-tree relative index passed
+ * to ->read_merkle_tree_page to the actual index where the Merkle tree is
+ * stored in the page cache for @inode.
+ */
+void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages)
+{
+ struct folio *folio;
+
+ lockdep_assert_held(&inode->i_mapping->invalidate_lock);
+
folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
if (folio == ERR_PTR(-ENOENT) ||
(!IS_ERR(folio) && !folio_test_uptodate(folio))) {
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
- if (!IS_ERR(folio)) {
- folio_put(folio);
- } else if (num_ra_pages > 1) {
- filemap_invalidate_lock_shared(inode->i_mapping);
- page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
- filemap_invalidate_unlock_shared(inode->i_mapping);
- }
- folio = read_mapping_folio(inode->i_mapping, index, NULL);
+ page_cache_ra_unbounded(&ractl, nr_pages, 0);
}
- if (IS_ERR(folio))
- return ERR_CAST(folio);
- return folio_file_page(folio, index);
+ if (!IS_ERR(folio))
+ folio_put(folio);
}
-EXPORT_SYMBOL_GPL(generic_read_merkle_tree_page);
+EXPORT_SYMBOL_GPL(generic_readahead_merkle_tree);
if (offset >= end_offset)
return 0;
offs_in_page = offset_in_page(offset);
+ index = offset >> PAGE_SHIFT;
last_index = (end_offset - 1) >> PAGE_SHIFT;
+ /*
+ * Kick off readahead for the range we are going to read to ensure a
+ * single large sequential read instead of lots of small ones.
+ */
+ if (inode->i_sb->s_vop->readahead_merkle_tree) {
+ filemap_invalidate_lock_shared(inode->i_mapping);
+ inode->i_sb->s_vop->readahead_merkle_tree(
+ inode, index, last_index - index + 1);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
+ }
+
/*
* Iterate through each Merkle tree page in the requested range and copy
* the requested portion to userspace. Note that the Merkle tree block
* size isn't important here, as we are returning a byte stream; i.e.,
* we can just work with pages even if the tree block size != PAGE_SIZE.
*/
- for (index = offset >> PAGE_SHIFT; index <= last_index; index++) {
- unsigned long num_ra_pages =
- min_t(unsigned long, last_index - index + 1,
- inode->i_sb->s_bdi->io_pages);
+ for (; index <= last_index; index++) {
unsigned int bytes_to_copy = min_t(u64, end_offset - offset,
PAGE_SIZE - offs_in_page);
struct page *page;
const void *virt;
- page = vops->read_merkle_tree_page(inode, index, num_ra_pages);
+ page = vops->read_merkle_tree_page(inode, index);
if (IS_ERR(page)) {
err = PTR_ERR(page);
fsverity_err(inode,
struct fsverity_verification_context {
struct inode *inode;
struct fsverity_info *vi;
- unsigned long max_ra_pages;
/*
* This is the queue of data blocks that are pending verification. When
static struct workqueue_struct *fsverity_read_workqueue;
+/**
+ * fsverity_readahead() - kick off readahead on fsverity hashes
+ * @inode: inode that is being read
+ * @index: first file data page index that is being read
+ * @nr_pages: number of file data pages to be read
+ *
+ * Start readahead on the fsverity hashes that are needed to verify the file
+ * data in the range from @index to @index + @nr_pages (exclusive upper bound).
+ *
+ * To be called from the file systems' ->read_folio and ->readahead methods to
+ * ensure that the hashes are already cached on completion of the file data
+ * read if possible.
+ */
+void fsverity_readahead(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages)
+{
+ const struct fsverity_info *vi = *fsverity_info_addr(inode);
+ const struct merkle_tree_params *params = &vi->tree_params;
+ u64 start_hidx = (u64)index << params->log_blocks_per_page;
+ u64 end_hidx =
+ (((u64)index + nr_pages) << params->log_blocks_per_page) - 1;
+ int level;
+
+ if (!inode->i_sb->s_vop->readahead_merkle_tree)
+ return;
+
+ for (level = 0; level < params->num_levels; level++) {
+ unsigned long level_start = params->level_start[level];
+ unsigned long next_start_hidx = start_hidx >> params->log_arity;
+ unsigned long next_end_hidx = end_hidx >> params->log_arity;
+ pgoff_t start_idx = (level_start + next_start_hidx) >>
+ params->log_blocks_per_page;
+ pgoff_t end_idx = (level_start + next_end_hidx) >>
+ params->log_blocks_per_page;
+
+ inode->i_sb->s_vop->readahead_merkle_tree(
+ inode, start_idx, end_idx - start_idx + 1);
+
+ start_hidx = next_start_hidx;
+ end_hidx = next_end_hidx;
+ }
+}
+EXPORT_SYMBOL_GPL(fsverity_readahead);
+
/*
* Returns true if the hash block with index @hblock_idx in the tree, located in
* @hpage, has already been verified.
* Return: %true if the data block is valid, else %false.
*/
static bool verify_data_block(struct inode *inode, struct fsverity_info *vi,
- const struct fsverity_pending_block *dblock,
- unsigned long max_ra_pages)
+ const struct fsverity_pending_block *dblock)
{
const u64 data_pos = dblock->pos;
const struct merkle_tree_params *params = &vi->tree_params;
(params->block_size - 1);
hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
- hpage_idx, level == 0 ? min(max_ra_pages,
- params->tree_pages - hpage_idx) : 0);
+ hpage_idx);
if (IS_ERR(hpage)) {
fsverity_err(inode,
"Error %ld reading Merkle tree page %lu",
static void
fsverity_init_verification_context(struct fsverity_verification_context *ctx,
- struct inode *inode,
- unsigned long max_ra_pages)
+ struct inode *inode)
{
struct fsverity_info *vi = *fsverity_info_addr(inode);
ctx->inode = inode;
ctx->vi = vi;
- ctx->max_ra_pages = max_ra_pages;
ctx->num_pending = 0;
if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&
sha256_finup_2x_is_optimized())
}
for (i = 0; i < ctx->num_pending; i++) {
- if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i],
- ctx->max_ra_pages))
+ if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i]))
return false;
}
fsverity_clear_pending_blocks(ctx);
{
struct fsverity_verification_context ctx;
- fsverity_init_verification_context(&ctx, folio->mapping->host, 0);
+ fsverity_init_verification_context(&ctx, folio->mapping->host);
if (fsverity_add_data_blocks(&ctx, folio, len, offset) &&
fsverity_verify_pending_blocks(&ctx))
struct inode *inode = bio_first_folio_all(bio)->mapping->host;
struct fsverity_verification_context ctx;
struct folio_iter fi;
- unsigned long max_ra_pages = 0;
-
- if (bio->bi_opf & REQ_RAHEAD) {
- /*
- * If this bio is for data readahead, then we also do readahead
- * of the first (largest) level of the Merkle tree. Namely,
- * when a Merkle tree page is read, we also try to piggy-back on
- * some additional pages -- up to 1/4 the number of data pages.
- *
- * This improves sequential read performance, as it greatly
- * reduces the number of I/O requests made to the Merkle tree.
- */
- max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
- }
- fsverity_init_verification_context(&ctx, inode, max_ra_pages);
+ fsverity_init_verification_context(&ctx, inode);
bio_for_each_folio_all(fi, bio) {
if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length,
*
* @inode: the inode
* @index: 0-based index of the page within the Merkle tree
- * @num_ra_pages: The number of Merkle tree pages that should be
- * prefetched starting at @index if the page at @index
- * isn't already cached. Implementations may ignore this
- * argument; it's only a performance optimization.
*
* This can be called at any time on an open verity file. It may be
* called by multiple processes concurrently, even with the same page.
* Return: the page on success, ERR_PTR() on failure
*/
struct page *(*read_merkle_tree_page)(struct inode *inode,
- pgoff_t index,
- unsigned long num_ra_pages);
+ pgoff_t index);
+
+ /**
+ * Perform readahead of a Merkle tree for the given inode.
+ *
+ * @inode: the inode
+ * @index: 0-based index of the first page within the Merkle tree
+ * @nr_pages: number of pages to be read ahead.
+ *
+ * This can be called at any time on an open verity file. It may be
+ * called by multiple processes concurrently, even with the same range.
+ *
+ * Optional method so that ->read_merkle_tree_page preferably finds
+ * cached data instead of issuing dependent I/O.
+ */
+ void (*readahead_merkle_tree)(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages);
/**
* Write a Merkle tree block to the given file.
}
void fsverity_cleanup_inode(struct inode *inode);
+void fsverity_readahead(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages);
-struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index,
- unsigned long num_ra_pages);
+struct page *generic_read_merkle_tree_page(struct inode *inode, pgoff_t index);
+void generic_readahead_merkle_tree(struct inode *inode, pgoff_t index,
+ unsigned long nr_pages);
#endif /* _LINUX_FSVERITY_H */