]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
dm-verity: use 2-way interleaved SHA-256 hashing when supported
authorEric Biggers <ebiggers@kernel.org>
Tue, 14 Oct 2025 21:16:57 +0000 (14:16 -0700)
committerMikulas Patocka <mpatocka@redhat.com>
Mon, 20 Oct 2025 13:47:42 +0000 (15:47 +0200)
When the crypto library provides an optimized implementation of
sha256_finup_2x(), use it to interleave the hashing of pairs of data
blocks.  On some CPUs this nearly doubles hashing performance.  The
increase in overall throughput of cold-cache dm-verity reads that I'm
seeing on arm64 and x86_64 is roughly 35% (though this metric is hard to
measure as it jumps around a lot).

For now this is done only on data blocks, not Merkle tree blocks.  We
could use sha256_finup_2x() on Merkle tree blocks too, but that is less
important as there aren't as many Merkle tree blocks as data blocks, and
that would require some additional code restructuring.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
drivers/md/dm-verity-target.c
drivers/md/dm-verity.h

index af9f1544af3eab7a63bfc67465eb50972e1b91b8..bf0aee73b074cd4f2a6afec771085193c3013ed6 100644 (file)
@@ -417,9 +417,12 @@ free_ret:
 static int verity_handle_data_hash_mismatch(struct dm_verity *v,
                                            struct dm_verity_io *io,
                                            struct bio *bio,
-                                           const u8 *want_digest,
-                                           sector_t blkno, u8 *data)
+                                           struct pending_block *block)
 {
+       const u8 *want_digest = block->want_digest;
+       sector_t blkno = block->blkno;
+       u8 *data = block->data;
+
        if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
                /*
                 * Error handling code (FEC included) cannot be run in the
@@ -448,6 +451,58 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
        return 0;
 }
 
+static void verity_clear_pending_blocks(struct dm_verity_io *io)
+{
+       int i;
+
+       for (i = io->num_pending - 1; i >= 0; i--) {
+               kunmap_local(io->pending_blocks[i].data);
+               io->pending_blocks[i].data = NULL;
+       }
+       io->num_pending = 0;
+}
+
+static int verity_verify_pending_blocks(struct dm_verity *v,
+                                       struct dm_verity_io *io,
+                                       struct bio *bio)
+{
+       const unsigned int block_size = 1 << v->data_dev_block_bits;
+       int i, r;
+
+       if (io->num_pending == 2) {
+               /* num_pending == 2 implies that the algorithm is SHA-256 */
+               sha256_finup_2x(v->initial_hashstate.sha256,
+                               io->pending_blocks[0].data,
+                               io->pending_blocks[1].data, block_size,
+                               io->pending_blocks[0].real_digest,
+                               io->pending_blocks[1].real_digest);
+       } else {
+               for (i = 0; i < io->num_pending; i++) {
+                       r = verity_hash(v, io, io->pending_blocks[i].data,
+                                       block_size,
+                                       io->pending_blocks[i].real_digest);
+                       if (unlikely(r))
+                               return r;
+               }
+       }
+
+       for (i = 0; i < io->num_pending; i++) {
+               struct pending_block *block = &io->pending_blocks[i];
+
+               if (likely(memcmp(block->real_digest, block->want_digest,
+                                 v->digest_size) == 0)) {
+                       if (v->validated_blocks)
+                               set_bit(block->blkno, v->validated_blocks);
+               } else {
+                       r = verity_handle_data_hash_mismatch(v, io, bio, block);
+                       if (unlikely(r))
+                               return r;
+               }
+       }
+       verity_clear_pending_blocks(io);
+       return 0;
+}
+
 /*
  * Verify one "dm_verity_io" structure.
  */
@@ -455,10 +510,14 @@ static int verity_verify_io(struct dm_verity_io *io)
 {
        struct dm_verity *v = io->v;
        const unsigned int block_size = 1 << v->data_dev_block_bits;
+       const int max_pending = v->use_sha256_finup_2x ? 2 : 1;
        struct bvec_iter iter_copy;
        struct bvec_iter *iter;
        struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
        unsigned int b;
+       int r;
+
+       io->num_pending = 0;
 
        if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
                /*
@@ -472,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io)
 
        for (b = 0; b < io->n_blocks;
             b++, bio_advance_iter(bio, iter, block_size)) {
-               int r;
-               sector_t cur_block = io->block + b;
+               sector_t blkno = io->block + b;
+               struct pending_block *block;
                bool is_zero;
                struct bio_vec bv;
                void *data;
 
                if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
-                   likely(test_bit(cur_block, v->validated_blocks)))
+                   likely(test_bit(blkno, v->validated_blocks)))
                        continue;
 
-               r = verity_hash_for_block(v, io, cur_block,
-                                         verity_io_want_digest(v, io),
+               block = &io->pending_blocks[io->num_pending];
+
+               r = verity_hash_for_block(v, io, blkno, block->want_digest,
                                          &is_zero);
                if (unlikely(r < 0))
-                       return r;
+                       goto error;
 
                bv = bio_iter_iovec(bio, *iter);
                if (unlikely(bv.bv_len < block_size)) {
@@ -497,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io)
                         * data block size to be greater than PAGE_SIZE.
                         */
                        DMERR_LIMIT("unaligned io (data block spans pages)");
-                       return -EIO;
+                       r = -EIO;
+                       goto error;
                }
 
                data = bvec_kmap_local(&bv);
@@ -511,30 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io)
                        kunmap_local(data);
                        continue;
                }
-
-               r = verity_hash(v, io, data, block_size,
-                               verity_io_real_digest(v, io));
-               if (unlikely(r < 0)) {
-                       kunmap_local(data);
-                       return r;
+               block->data = data;
+               block->blkno = blkno;
+               if (++io->num_pending == max_pending) {
+                       r = verity_verify_pending_blocks(v, io, bio);
+                       if (unlikely(r))
+                               goto error;
                }
+       }
 
-               if (likely(memcmp(verity_io_real_digest(v, io),
-                                 verity_io_want_digest(v, io), v->digest_size) == 0)) {
-                       if (v->validated_blocks)
-                               set_bit(cur_block, v->validated_blocks);
-                       kunmap_local(data);
-                       continue;
-               }
-               r = verity_handle_data_hash_mismatch(v, io, bio,
-                                                    verity_io_want_digest(v, io),
-                                                    cur_block, data);
-               kunmap_local(data);
+       if (io->num_pending) {
+               r = verity_verify_pending_blocks(v, io, bio);
                if (unlikely(r))
-                       return r;
+                       goto error;
        }
 
        return 0;
+
+error:
+       verity_clear_pending_blocks(io);
+       return r;
 }
 
 /*
@@ -1277,6 +1334,8 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
                 * interleaved hashing support.
                 */
                v->use_sha256_lib = true;
+               if (sha256_finup_2x_is_optimized())
+                       v->use_sha256_finup_2x = true;
                ti->per_io_data_size =
                        offsetofend(struct dm_verity_io, hash_ctx.sha256);
        } else {
index cf7973ed30596cc1ca45e4e2a8bce08f22a5a54c..f975a9e5c5d6ba411766e090130aa8c7ff759781 100644 (file)
@@ -64,6 +64,7 @@ struct dm_verity {
        bool hash_failed:1;     /* set if hash of any block failed */
        bool use_bh_wq:1;       /* try to verify in BH wq before normal work-queue */
        bool use_sha256_lib:1;  /* use SHA-256 library instead of generic crypto API */
+       bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */
        unsigned int digest_size;       /* digest size for the current hash algorithm */
        enum verity_mode mode;  /* mode for handling verification errors */
        enum verity_mode error_mode;/* mode for handling I/O errors */
@@ -83,6 +84,13 @@ struct dm_verity {
        mempool_t recheck_pool;
 };
 
+struct pending_block {
+       void *data;
+       sector_t blkno;
+       u8 want_digest[HASH_MAX_DIGESTSIZE];
+       u8 real_digest[HASH_MAX_DIGESTSIZE];
+};
+
 struct dm_verity_io {
        struct dm_verity *v;
 
@@ -100,8 +108,15 @@ struct dm_verity_io {
        struct work_struct bh_work;
 
        u8 tmp_digest[HASH_MAX_DIGESTSIZE];
-       u8 real_digest[HASH_MAX_DIGESTSIZE];
-       u8 want_digest[HASH_MAX_DIGESTSIZE];
+
+       /*
+        * This is the queue of data blocks that are pending verification.  When
+        * the crypto layer supports interleaved hashing, we allow multiple
+        * blocks to be queued up in order to utilize it.  This can improve
+        * performance significantly vs. sequential hashing of each block.
+        */
+       int num_pending;
+       struct pending_block pending_blocks[2];
 
        /*
         * Temporary space for hashing.  Either sha256 or shash is used,
@@ -116,18 +131,6 @@ struct dm_verity_io {
        } hash_ctx;
 };
 
-static inline u8 *verity_io_real_digest(struct dm_verity *v,
-                                       struct dm_verity_io *io)
-{
-       return io->real_digest;
-}
-
-static inline u8 *verity_io_want_digest(struct dm_verity *v,
-                                       struct dm_verity_io *io)
-{
-       return io->want_digest;
-}
-
 extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
                       const u8 *data, size_t len, u8 *digest);