From: Eric Biggers Date: Tue, 14 Oct 2025 21:16:57 +0000 (-0700) Subject: dm-verity: use 2-way interleaved SHA-256 hashing when supported X-Git-Tag: v6.19-rc1~28^2~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=23f57ed9d26e309010996a6809e410ed59c7ec7c;p=thirdparty%2Flinux.git dm-verity: use 2-way interleaved SHA-256 hashing when supported When the crypto library provides an optimized implementation of sha256_finup_2x(), use it to interleave the hashing of pairs of data blocks. On some CPUs this nearly doubles hashing performance. The increase in overall throughput of cold-cache dm-verity reads that I'm seeing on arm64 and x86_64 is roughly 35% (though this metric is hard to measure as it jumps around a lot). For now this is done only on data blocks, not Merkle tree blocks. We could use sha256_finup_2x() on Merkle tree blocks too, but that is less important as there aren't as many Merkle tree blocks as data blocks, and that would require some additional code restructuring. Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index af9f1544af3e..bf0aee73b074 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -417,9 +417,12 @@ free_ret: static int verity_handle_data_hash_mismatch(struct dm_verity *v, struct dm_verity_io *io, struct bio *bio, - const u8 *want_digest, - sector_t blkno, u8 *data) + struct pending_block *block) { + const u8 *want_digest = block->want_digest; + sector_t blkno = block->blkno; + u8 *data = block->data; + if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) { /* * Error handling code (FEC included) cannot be run in the @@ -448,6 +451,58 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v, return 0; } +static void verity_clear_pending_blocks(struct dm_verity_io *io) +{ + int i; + + for (i = io->num_pending - 1; i >= 0; i--) { + kunmap_local(io->pending_blocks[i].data); + io->pending_blocks[i].data = NULL; + } + io->num_pending = 0; +} + +static int verity_verify_pending_blocks(struct dm_verity *v, + struct dm_verity_io *io, + struct bio *bio) +{ + const unsigned int block_size = 1 << v->data_dev_block_bits; + int i, r; + + if (io->num_pending == 2) { + /* num_pending == 2 implies that the algorithm is SHA-256 */ + sha256_finup_2x(v->initial_hashstate.sha256, + io->pending_blocks[0].data, + io->pending_blocks[1].data, block_size, + io->pending_blocks[0].real_digest, + io->pending_blocks[1].real_digest); + } else { + for (i = 0; i < io->num_pending; i++) { + r = verity_hash(v, io, io->pending_blocks[i].data, + block_size, + io->pending_blocks[i].real_digest); + if (unlikely(r)) + return r; + } + } + + for (i = 0; i < io->num_pending; i++) { + struct pending_block *block = &io->pending_blocks[i]; + + if (likely(memcmp(block->real_digest, block->want_digest, + v->digest_size) == 0)) { + if (v->validated_blocks) + set_bit(block->blkno, v->validated_blocks); + } else { + r = verity_handle_data_hash_mismatch(v, io, bio, block); + if (unlikely(r)) + return r; + } + } + verity_clear_pending_blocks(io); + return 0; +} + /* * Verify one "dm_verity_io" structure. */ @@ -455,10 +510,14 @@ static int verity_verify_io(struct dm_verity_io *io) { struct dm_verity *v = io->v; const unsigned int block_size = 1 << v->data_dev_block_bits; + const int max_pending = v->use_sha256_finup_2x ? 2 : 1; struct bvec_iter iter_copy; struct bvec_iter *iter; struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); unsigned int b; + int r; + + io->num_pending = 0; if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) { /* @@ -472,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++, bio_advance_iter(bio, iter, block_size)) { - int r; - sector_t cur_block = io->block + b; + sector_t blkno = io->block + b; + struct pending_block *block; bool is_zero; struct bio_vec bv; void *data; if (v->validated_blocks && bio->bi_status == BLK_STS_OK && - likely(test_bit(cur_block, v->validated_blocks))) + likely(test_bit(blkno, v->validated_blocks))) continue; - r = verity_hash_for_block(v, io, cur_block, - verity_io_want_digest(v, io), + block = &io->pending_blocks[io->num_pending]; + + r = verity_hash_for_block(v, io, blkno, block->want_digest, &is_zero); if (unlikely(r < 0)) - return r; + goto error; bv = bio_iter_iovec(bio, *iter); if (unlikely(bv.bv_len < block_size)) { @@ -497,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io) * data block size to be greater than PAGE_SIZE. */ DMERR_LIMIT("unaligned io (data block spans pages)"); - return -EIO; + r = -EIO; + goto error; } data = bvec_kmap_local(&bv); @@ -511,30 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io) kunmap_local(data); continue; } - - r = verity_hash(v, io, data, block_size, - verity_io_real_digest(v, io)); - if (unlikely(r < 0)) { - kunmap_local(data); - return r; + block->data = data; + block->blkno = blkno; + if (++io->num_pending == max_pending) { + r = verity_verify_pending_blocks(v, io, bio); + if (unlikely(r)) + goto error; } + } - if (likely(memcmp(verity_io_real_digest(v, io), - verity_io_want_digest(v, io), v->digest_size) == 0)) { - if (v->validated_blocks) - set_bit(cur_block, v->validated_blocks); - kunmap_local(data); - continue; - } - r = verity_handle_data_hash_mismatch(v, io, bio, - verity_io_want_digest(v, io), - cur_block, data); - kunmap_local(data); + if (io->num_pending) { + r = verity_verify_pending_blocks(v, io, bio); if (unlikely(r)) - return r; + goto error; } return 0; + +error: + verity_clear_pending_blocks(io); + return r; } /* @@ -1277,6 +1334,8 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name) * interleaved hashing support. */ v->use_sha256_lib = true; + if (sha256_finup_2x_is_optimized()) + v->use_sha256_finup_2x = true; ti->per_io_data_size = offsetofend(struct dm_verity_io, hash_ctx.sha256); } else { diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index cf7973ed3059..f975a9e5c5d6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -64,6 +64,7 @@ struct dm_verity { bool hash_failed:1; /* set if hash of any block failed */ bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */ bool use_sha256_lib:1; /* use SHA-256 library instead of generic crypto API */ + bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */ unsigned int digest_size; /* digest size for the current hash algorithm */ enum verity_mode mode; /* mode for handling verification errors */ enum verity_mode error_mode;/* mode for handling I/O errors */ @@ -83,6 +84,13 @@ struct dm_verity { mempool_t recheck_pool; }; +struct pending_block { + void *data; + sector_t blkno; + u8 want_digest[HASH_MAX_DIGESTSIZE]; + u8 real_digest[HASH_MAX_DIGESTSIZE]; +}; + struct dm_verity_io { struct dm_verity *v; @@ -100,8 +108,15 @@ struct dm_verity_io { struct work_struct bh_work; u8 tmp_digest[HASH_MAX_DIGESTSIZE]; - u8 real_digest[HASH_MAX_DIGESTSIZE]; - u8 want_digest[HASH_MAX_DIGESTSIZE]; + + /* + * This is the queue of data blocks that are pending verification. When + * the crypto layer supports interleaved hashing, we allow multiple + * blocks to be queued up in order to utilize it. This can improve + * performance significantly vs. sequential hashing of each block. + */ + int num_pending; + struct pending_block pending_blocks[2]; /* * Temporary space for hashing. Either sha256 or shash is used, @@ -116,18 +131,6 @@ struct dm_verity_io { } hash_ctx; }; -static inline u8 *verity_io_real_digest(struct dm_verity *v, - struct dm_verity_io *io) -{ - return io->real_digest; -} - -static inline u8 *verity_io_want_digest(struct dm_verity *v, - struct dm_verity_io *io) -{ - return io->want_digest; -} - extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io, const u8 *data, size_t len, u8 *digest);