]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
block: add configurable error injection
authorChristoph Hellwig <hch@lst.de>
Thu, 11 Jun 2026 14:06:47 +0000 (16:06 +0200)
committerJens Axboe <axboe@kernel.dk>
Fri, 12 Jun 2026 16:40:35 +0000 (10:40 -0600)
Add a new block error injection interface that allows to inject specific
status code for specific ranges.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Md Haris Iqbal <haris.iqbal@linux.dev>
Link: https://patch.msgid.link/20260611140703.2401204-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Documentation/block/error-injection.rst [new file with mode: 0644]
Documentation/block/index.rst
block/Kconfig
block/Makefile
block/blk-core.c
block/blk-sysfs.c
block/error-injection.c [new file with mode: 0644]
block/error-injection.h [new file with mode: 0644]
block/genhd.c
include/linux/blkdev.h

diff --git a/Documentation/block/error-injection.rst b/Documentation/block/error-injection.rst
new file mode 100644 (file)
index 0000000..81f31af
--- /dev/null
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Configurable Error Injection
+============================
+
+Overview
+--------
+
+Configurable error injection allows injecting specific block layer status codes
+for sector ranges of a block device.  Errors can be injected unconditionally, or
+with a given probability.
+
+To use configurable error injection, CONFIG_BLK_ERROR_INJECTION must be enabled.
+
+The only interface is the error_injection debugfs file, which is created for
+each registered gendisk.  Writes to this file are used to create or delete rules
+and reads return a list of the current error injection sites.
+
+Options
+-------
+
+The following options specify the operations:
+
+===================    =======================================================
+add                    add a new rule
+removeall              remove all existing rules
+===================    =======================================================
+
+The following options specify the details of the rule for the add operation:
+
+===================    =======================================================
+op=<string>            block layer operation this rule applies to.  This uses
+                       the XYZ for each REQ_OP_XYZ operation, e.g. READ, WRITE
+                       or DISCARD. Mandatory.
+status=<string>                Status to return.  This uses XYZ for each BLK_STS_XYZ
+                       code, e.g. IOERR or MEDIUM. Mandatory.
+start=<number>         First block layer sector the rule applies to.
+                       Optional, defaults to 0.
+nr_sectors=<number>    Number of sectors this rule applies.
+                       Optional, defaults to the remainder of the device.
+chance=<number>                Only return a failure with a likelihood of 1/chance.
+                       Optional, defaults to 1 (always).
+===================    =======================================================
+
+Example
+-------
+
+Return BLK_STS_IOERR for one in 10 reads of sector 0 of /dev/nvme0n1:
+
+       $ echo 'add,op=READ,start=0,status=IOERR,chance=10' > /sys/kernel/debug/block/nvme0n1/error_injection
+
+Return BLK_STS_MEDIUM for every write to /dev/nvme0n1:
+
+       $ echo 'add,op=WRITE,start=0,status=MEDIUM' > /sys/kernel/debug/block/nvme0n1/error_injection
+
+Remove all rules for /dev/nvme0n1:
+
+       $ echo 'removeall' > /sys/kernel/debug/block/nvme0n1/error_injection
index 9fea696f9daa0121a2e3e57368e2ebd44cbe0848..bfa1bbd31ddf31ea569d568e08f40e1ad6a484c0 100644 (file)
@@ -22,3 +22,4 @@ Block
    switching-sched
    writeback_cache_control
    ublk
+   error-injection
index 15027963472d7b40e27b9097a5993c457b5b3054..70e4a66d941ff66ecb0ec34f4f7eff7fd1e4be40 100644 (file)
@@ -221,6 +221,14 @@ config BLOCK_HOLDER_DEPRECATED
 config BLK_MQ_STACKING
        bool
 
+config BLK_ERROR_INJECTION
+       bool "Enable block layer error injection"
+       select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
+       help
+         Enable inserting arbitrary block errors through a debugfs interface.
+
+         See Documentation/block/error-injection.rst for details.
+
 source "block/Kconfig.iosched"
 
 endif # BLOCK
index 54130faacc21eeed19f6a0dfd03b7b4e70885a6e..e7bd320e3d6971a8abf584f5de42776bb54e8216 100644 (file)
@@ -13,6 +13,7 @@ obj-y         := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
                        genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
                        disk-events.o blk-ia-ranges.o early-lookup.o
 
+obj-$(CONFIG_BLK_ERROR_INJECTION) += error-injection.o
 obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)   += bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
index beaab7a71fba7c42635d5d9ed2518b4956b04c18..73a41df98c9a3ca18a19c2e4db0269ff8e0fe6be 100644 (file)
@@ -50,6 +50,7 @@
 #include "blk-cgroup.h"
 #include "blk-throttle.h"
 #include "blk-ioprio.h"
+#include "error-injection.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -767,6 +768,9 @@ static void __submit_bio_noacct_mq(struct bio *bio)
 
 void submit_bio_noacct_nocheck(struct bio *bio, bool split)
 {
+       if (unlikely(blk_error_inject(bio)))
+               return;
+
        blk_cgroup_bio_start(bio);
 
        if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
index f22c1f253eb3a77c6329fa8aba89f97abe95e1bd..520972676ab46cd997996558f4507ec83731162d 100644 (file)
@@ -19,6 +19,7 @@
 #include "blk-wbt.h"
 #include "blk-cgroup.h"
 #include "blk-throttle.h"
+#include "error-injection.h"
 
 struct queue_sysfs_entry {
        struct attribute attr;
@@ -933,6 +934,8 @@ static void blk_debugfs_remove(struct gendisk *disk)
 
        blk_debugfs_lock_nomemsave(q);
        blk_trace_shutdown(q);
+       if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
+               blk_error_injection_exit(disk);
        debugfs_remove_recursive(q->debugfs_dir);
        q->debugfs_dir = NULL;
        q->sched_debugfs_dir = NULL;
@@ -963,6 +966,8 @@ int blk_register_queue(struct gendisk *disk)
 
        memflags = blk_debugfs_lock(q);
        q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
+       if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
+               blk_error_injection_init(disk);
        if (queue_is_mq(q))
                blk_mq_debugfs_register(q);
        blk_debugfs_unlock(q, memflags);
diff --git a/block/error-injection.c b/block/error-injection.c
new file mode 100644 (file)
index 0000000..d24c90e
--- /dev/null
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Christoph Hellwig.
+ */
+#include <linux/debugfs.h>
+#include <linux/blkdev.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "blk.h"
+#include "error-injection.h"
+
+struct blk_error_inject {
+       struct list_head                entry;
+       sector_t                        start;
+       sector_t                        end;
+       enum req_op                     op;
+       blk_status_t                    status;
+
+       /* only inject every 1 / chance times */
+       unsigned int                    chance;
+};
+
+DEFINE_STATIC_KEY_FALSE(blk_error_injection_enabled);
+
+bool __blk_error_inject(struct bio *bio)
+{
+       struct gendisk *disk = bio->bi_bdev->bd_disk;
+       struct blk_error_inject *inj;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) {
+               if (bio_op(bio) != inj->op)
+                       continue;
+               /*
+                * This never matches 0-sized bios like empty WRITEs with
+                * REQ_PREFLUSH or ZONE_RESET_ALL.  While adding a special case
+                * for them would be trivial, that means any WRITE rule would
+                * trigger for flushes.  So before we can make this work
+                * properly, we'll need to start using REQ_OP_FLUSH for pure
+                * flushes at the bio level like we already do in blk-mq.
+                */
+               if (bio->bi_iter.bi_sector > inj->end ||
+                   bio_end_sector(bio) <= inj->start)
+                       continue;
+               if (inj->chance > 1 && (get_random_u32() % inj->chance) != 0)
+                       continue;
+
+               pr_info_ratelimited("%pg: injecting %s error for %s at sector %llu:%u\n",
+                               disk->part0, blk_status_to_str(inj->status),
+                               blk_op_str(inj->op), bio->bi_iter.bi_sector,
+                               bio_sectors(bio));
+               bio->bi_status = inj->status;
+               rcu_read_unlock();
+               bio_endio(bio);
+               return true;
+       }
+       rcu_read_unlock();
+       return false;
+}
+
+static int error_inject_add(struct gendisk *disk, enum req_op op,
+               sector_t start, u64 nr_sectors, blk_status_t status,
+               unsigned int chance)
+{
+       struct blk_error_inject *inj;
+       int error = -EINVAL;
+
+       if (op == REQ_OP_LAST)
+               return -EINVAL;
+       if (status == BLK_STS_OK)
+               return -EINVAL;
+
+       inj = kzalloc_obj(*inj);
+       if (!inj)
+               return -ENOMEM;
+
+       if (nr_sectors) {
+               if (U64_MAX - nr_sectors < start)
+                       goto out_free_inj;
+               inj->end = start + nr_sectors - 1;
+       } else {
+               inj->end = U64_MAX;
+       }
+
+       inj->op = op;
+       inj->start = start;
+       inj->status = status;
+       inj->chance = chance;
+
+       pr_debug_ratelimited("%pg: adding %s injection for %s at sector %llu:%llu\n",
+                       disk->part0, blk_status_to_str(status),
+                       blk_op_str(op),
+                       start, nr_sectors);
+
+       /*
+        * Add to the front of the list so that newer entries can partially
+        * override other entries.  This also intentionally allows duplicate
+        * entries as there is no real reason to reject them.
+        */
+       mutex_lock(&disk->error_injection_lock);
+       if (!disk_live(disk)) {
+               mutex_unlock(&disk->error_injection_lock);
+               error = -ENODEV;
+               goto out_free_inj;
+       }
+       if (list_empty(&disk->error_injection_list))
+               static_branch_inc(&blk_error_injection_enabled);
+       list_add_rcu(&inj->entry, &disk->error_injection_list);
+       set_bit(GD_ERROR_INJECT, &disk->state);
+       mutex_unlock(&disk->error_injection_lock);
+       return 0;
+
+out_free_inj:
+       kfree(inj);
+       return error;
+}
+
+static void error_inject_removeall(struct gendisk *disk)
+{
+       struct blk_error_inject *inj;
+
+       mutex_lock(&disk->error_injection_lock);
+       clear_bit(GD_ERROR_INJECT, &disk->state);
+       while ((inj = list_first_entry_or_null(&disk->error_injection_list,
+                       struct blk_error_inject, entry))) {
+               list_del_rcu(&inj->entry);
+               kfree_rcu_mightsleep(inj);
+       }
+       static_branch_dec(&blk_error_injection_enabled);
+       mutex_unlock(&disk->error_injection_lock);
+}
+
+enum options {
+       Opt_add                 = (1u << 0),
+       Opt_removeall           = (1u << 1),
+
+       Opt_op                  = (1u << 16),
+       Opt_start               = (1u << 17),
+       Opt_nr_sectors          = (1u << 18),
+       Opt_status              = (1u << 19),
+       Opt_chance              = (1u << 20),
+
+       Opt_invalid,
+};
+
+static const match_table_t opt_tokens = {
+       { Opt_add,                      "add",                  },
+       { Opt_removeall,                "removeall",            },
+       { Opt_op,                       "op=%s",                },
+       { Opt_start,                    "start=%u"              },
+       { Opt_nr_sectors,               "nr_sectors=%u"         },
+       { Opt_status,                   "status=%s"             },
+       { Opt_chance,                   "chance=%u"             },
+       { Opt_invalid,                  NULL,                   },
+};
+
+static int match_op(substring_t *args, enum req_op *op)
+{
+       const char *tag;
+
+       tag = match_strdup(args);
+       if (!tag)
+               return -ENOMEM;
+       *op = str_to_blk_op(tag);
+       if (*op == REQ_OP_LAST)
+               pr_warn("invalid op '%s'\n", tag);
+       kfree(tag);
+       return 0;
+}
+
+static int match_status(substring_t *args, blk_status_t *status)
+{
+       const char *tag;
+
+       tag = match_strdup(args);
+       if (!tag)
+               return -ENOMEM;
+       *status = tag_to_blk_status(tag);
+       if (!*status)
+               pr_warn("invalid status '%s'\n", tag);
+       kfree(tag);
+       return 0;
+}
+
+static ssize_t blk_error_injection_parse_options(struct gendisk *disk,
+               char *options)
+{
+       enum { Unset, Add, Removeall } action = Unset;
+       unsigned int option_mask = 0, chance = 1;
+       enum req_op op = REQ_OP_LAST;
+       u64 start = 0, nr_sectors = 0;
+       blk_status_t status = BLK_STS_OK;
+       substring_t args[MAX_OPT_ARGS];
+       char *p;
+
+       while ((p = strsep(&options, ",\n")) != NULL) {
+               int error = 0;
+               ssize_t token;
+
+               if (!*p)
+                       continue;
+               token = match_token(p, opt_tokens, args);
+               option_mask |= token;
+               switch (token) {
+               case Opt_add:
+                       if (action != Unset)
+                               return -EINVAL;
+                       action = Add;
+                       break;
+               case Opt_removeall:
+                       if (action != Unset)
+                               return -EINVAL;
+                       action = Removeall;
+                       break;
+               case Opt_op:
+                       error = match_op(args, &op);
+                       break;
+               case Opt_start:
+                       error = match_u64(args, &start);
+                       break;
+               case Opt_nr_sectors:
+                       error = match_u64(args, &nr_sectors);
+                       break;
+               case Opt_status:
+                       error = match_status(args, &status);
+                       break;
+               case Opt_chance:
+                       error = match_uint(args, &chance);
+                       if (!error && chance == 0)
+                               error = -EINVAL;
+                       break;
+               default:
+                       pr_warn("unknown parameter or missing value '%s'\n", p);
+                       error = -EINVAL;
+               }
+               if (error)
+                       return error;
+       }
+
+       switch (action) {
+       case Add:
+               return error_inject_add(disk, op, start, nr_sectors, status,
+                               chance);
+       case Removeall:
+               if (option_mask & ~Opt_removeall)
+                       return -EINVAL;
+               error_inject_removeall(disk);
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
+static ssize_t blk_error_injection_write(struct file *file,
+               const char __user *ubuf, size_t count, loff_t *pos)
+{
+       struct gendisk *disk = file_inode(file)->i_private;
+       char *options;
+       int error;
+
+       options = memdup_user_nul(ubuf, count);
+       if (IS_ERR(options))
+               return PTR_ERR(options);
+       error = blk_error_injection_parse_options(disk, options);
+       kfree(options);
+
+       if (error)
+               return error;
+       return count;
+}
+
+static int blk_error_injection_show(struct seq_file *s, void *private)
+{
+       struct gendisk *disk = s->private;
+       struct blk_error_inject *inj;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) {
+               seq_printf(s, "%llu:%llu status=%s,chance=%u",
+                       inj->start, inj->end,
+                       blk_status_to_tag(inj->status), inj->chance);
+               seq_putc(s, '\n');
+       }
+       rcu_read_unlock();
+       return 0;
+}
+
+static int blk_error_injection_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, blk_error_injection_show, inode->i_private);
+}
+
+static int blk_error_injection_release(struct inode *inode, struct file *file)
+{
+       return single_release(inode, file);
+}
+
+static const struct file_operations blk_error_injection_fops = {
+       .owner          = THIS_MODULE,
+       .write          = blk_error_injection_write,
+       .read           = seq_read,
+       .open           = blk_error_injection_open,
+       .release        = blk_error_injection_release,
+};
+
+void blk_error_injection_init(struct gendisk *disk)
+{
+       debugfs_create_file("error_injection", 0600, disk->queue->debugfs_dir,
+                       disk, &blk_error_injection_fops);
+}
+
+void blk_error_injection_exit(struct gendisk *disk)
+{
+       error_inject_removeall(disk);
+}
diff --git a/block/error-injection.h b/block/error-injection.h
new file mode 100644 (file)
index 0000000..9821d77
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BLK_ERROR_INJECTION_H
+#define _BLK_ERROR_INJECTION_H 1
+
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(blk_error_injection_enabled);
+
+void blk_error_injection_init(struct gendisk *disk);
+void blk_error_injection_exit(struct gendisk *disk);
+bool __blk_error_inject(struct bio *bio);
+static inline bool blk_error_inject(struct bio *bio)
+{
+       if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION) &&
+           static_branch_unlikely(&blk_error_injection_enabled) &&
+           test_bit(GD_ERROR_INJECT, &bio->bi_bdev->bd_disk->state))
+               return __blk_error_inject(bio);
+       return false;
+}
+
+#endif /* _BLK_ERROR_INJECTION_H */
index 7d6854fd28e95ae9134309679a7c6a937f5b7db8..f84b6a355b574af88c870938a8c69aeb9f8f44ad 100644 (file)
@@ -1485,6 +1485,10 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
        lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
        INIT_LIST_HEAD(&disk->slave_bdevs);
+#endif
+#ifdef CONFIG_BLK_ERROR_INJECTION
+       mutex_init(&disk->error_injection_lock);
+       INIT_LIST_HEAD(&disk->error_injection_list);
 #endif
        mutex_init(&disk->rqos_state_mutex);
        kobject_init(&disk->queue_kobj, &blk_queue_ktype);
index 57e84d59a642792e3397af3989e41fe035112b2b..5070851cf9249277eb973b1425bf993ed30cae21 100644 (file)
@@ -176,6 +176,7 @@ struct gendisk {
 #define GD_SUPPRESS_PART_SCAN          5
 #define GD_OWNS_QUEUE                  6
 #define GD_ZONE_APPEND_USED            7
+#define GD_ERROR_INJECT                        8
 
        struct mutex open_mutex;        /* open/close mutex */
        unsigned open_partitions;       /* number of open partitions */
@@ -227,6 +228,11 @@ struct gendisk {
         */
        struct blk_independent_access_ranges *ia_ranges;
 
+#ifdef CONFIG_BLK_ERROR_INJECTION
+       struct mutex            error_injection_lock;
+       struct list_head        error_injection_list;
+#endif
+
        struct mutex rqos_state_mutex;  /* rqos state change mutex */
 };