]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/block/null_blk/main.c
null_blk: fix null-ptr-dereference while configuring 'power' and 'submit_queues'
[thirdparty/kernel/stable.git] / drivers / block / null_blk / main.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
3bf2bd20
SL
2/*
3 * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
4 * Shaohua Li <shli@fb.com>
5 */
f2298c04 6#include <linux/module.h>
fc1bc354 7
f2298c04
JA
8#include <linux/moduleparam.h>
9#include <linux/sched.h>
10#include <linux/fs.h>
f2298c04 11#include <linux/init.h>
6dad38d3 12#include "null_blk.h"
f2298c04 13
db060f54
DLM
14#undef pr_fmt
15#define pr_fmt(fmt) "null_blk: " fmt
16
5bcd0e0c
SL
17#define FREE_BATCH 16
18
eff2c4f1
SL
19#define TICKS_PER_SEC 50ULL
20#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
21
33f782c4 22#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
93b57046 23static DECLARE_FAULT_ATTR(null_timeout_attr);
24941b90 24static DECLARE_FAULT_ATTR(null_requeue_attr);
596444e7 25static DECLARE_FAULT_ATTR(null_init_hctx_attr);
33f782c4 26#endif
93b57046 27
eff2c4f1
SL
28static inline u64 mb_per_tick(int mbps)
29{
30 return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
31}
f2298c04 32
3bf2bd20
SL
33/*
34 * Status flags for nullb_device.
35 *
36 * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
37 * UP: Device is currently on and visible in userspace.
eff2c4f1 38 * THROTTLED: Device is being throttled.
deb78b41 39 * CACHE: Device is using a write-back cache.
3bf2bd20
SL
40 */
41enum nullb_device_flags {
42 NULLB_DEV_FL_CONFIGURED = 0,
43 NULLB_DEV_FL_UP = 1,
eff2c4f1 44 NULLB_DEV_FL_THROTTLED = 2,
deb78b41 45 NULLB_DEV_FL_CACHE = 3,
3bf2bd20
SL
46};
47
66231ad3 48#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
5bcd0e0c
SL
49/*
50 * nullb_page is a page in memory for nullb devices.
51 *
52 * @page: The page holding the data.
53 * @bitmap: The bitmap represents which sector in the page has data.
54 * Each bit represents one block size. For example, sector 8
55 * will use the 7th bit
deb78b41
SL
56 * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
57 * page is being flushing to storage. FREE means the cache page is freed and
58 * should be skipped from flushing to storage. Please see
59 * null_make_cache_space
5bcd0e0c
SL
60 */
61struct nullb_page {
62 struct page *page;
66231ad3 63 DECLARE_BITMAP(bitmap, MAP_SZ);
5bcd0e0c 64};
66231ad3
ML
65#define NULLB_PAGE_LOCK (MAP_SZ - 1)
66#define NULLB_PAGE_FREE (MAP_SZ - 2)
5bcd0e0c 67
f2298c04
JA
68static LIST_HEAD(nullb_list);
69static struct mutex lock;
70static int null_major;
94bc02e3 71static DEFINE_IDA(nullb_indexes);
82f402fe 72static struct blk_mq_tag_set tag_set;
f2298c04 73
f2298c04
JA
74enum {
75 NULL_IRQ_NONE = 0,
76 NULL_IRQ_SOFTIRQ = 1,
77 NULL_IRQ_TIMER = 2,
ce2c350b 78};
f2298c04 79
cee1b215
MG
80static bool g_virt_boundary = false;
81module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
82MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
83
b3cffc38 84static int g_no_sched;
5657a819 85module_param_named(no_sched, g_no_sched, int, 0444);
b3cffc38 86MODULE_PARM_DESC(no_sched, "No io scheduler");
87
2984c868 88static int g_submit_queues = 1;
5657a819 89module_param_named(submit_queues, g_submit_queues, int, 0444);
f2298c04
JA
90MODULE_PARM_DESC(submit_queues, "Number of submission queues");
91
0a593fbb
JA
92static int g_poll_queues = 1;
93module_param_named(poll_queues, g_poll_queues, int, 0444);
94MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
95
2984c868 96static int g_home_node = NUMA_NO_NODE;
5657a819 97module_param_named(home_node, g_home_node, int, 0444);
f2298c04
JA
98MODULE_PARM_DESC(home_node, "Home node for the device");
99
33f782c4 100#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
290df92a
DZ
101/*
102 * For more details about fault injection, please refer to
103 * Documentation/fault-injection/fault-injection.rst.
104 */
93b57046 105static char g_timeout_str[80];
5657a819 106module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
290df92a 107MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
24941b90
JA
108
109static char g_requeue_str[80];
5657a819 110module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
290df92a 111MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
596444e7
BVA
112
113static char g_init_hctx_str[80];
114module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
290df92a 115MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
33f782c4 116#endif
93b57046 117
8b631f9c
CH
118/*
119 * Historic queue modes.
120 *
121 * These days nothing but NULL_Q_MQ is actually supported, but we keep it the
122 * enum for error reporting.
123 */
124enum {
125 NULL_Q_BIO = 0,
126 NULL_Q_RQ = 1,
127 NULL_Q_MQ = 2,
128};
129
2984c868 130static int g_queue_mode = NULL_Q_MQ;
709c8667
MB
131
132static int null_param_store_val(const char *str, int *val, int min, int max)
133{
134 int ret, new_val;
135
136 ret = kstrtoint(str, 10, &new_val);
137 if (ret)
138 return -EINVAL;
139
140 if (new_val < min || new_val > max)
141 return -EINVAL;
142
143 *val = new_val;
144 return 0;
145}
146
147static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
148{
2984c868 149 return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
709c8667
MB
150}
151
9c27847d 152static const struct kernel_param_ops null_queue_mode_param_ops = {
709c8667
MB
153 .set = null_set_queue_mode,
154 .get = param_get_int,
155};
156
5657a819 157device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
54ae81cd 158MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
f2298c04 159
2984c868 160static int g_gb = 250;
5657a819 161module_param_named(gb, g_gb, int, 0444);
f2298c04
JA
162MODULE_PARM_DESC(gb, "Size in GB");
163
2984c868 164static int g_bs = 512;
5657a819 165module_param_named(bs, g_bs, int, 0444);
f2298c04
JA
166MODULE_PARM_DESC(bs, "Block size (in bytes)");
167
ea17fd35
DLM
168static int g_max_sectors;
169module_param_named(max_sectors, g_max_sectors, int, 0444);
170MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
171
f7c4ce89 172static unsigned int nr_devices = 1;
701dfc42 173module_param(nr_devices, uint, 0444);
f2298c04
JA
174MODULE_PARM_DESC(nr_devices, "Number of devices to register");
175
2984c868 176static bool g_blocking;
5657a819 177module_param_named(blocking, g_blocking, bool, 0444);
db5bcf87
JA
178MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
179
14509b74
SK
180static bool g_shared_tags;
181module_param_named(shared_tags, g_shared_tags, bool, 0444);
82f402fe
JA
182MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
183
0905053b
JG
184static bool g_shared_tag_bitmap;
185module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
186MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
187
2984c868 188static int g_irqmode = NULL_IRQ_SOFTIRQ;
709c8667
MB
189
190static int null_set_irqmode(const char *str, const struct kernel_param *kp)
191{
2984c868 192 return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
709c8667
MB
193 NULL_IRQ_TIMER);
194}
195
9c27847d 196static const struct kernel_param_ops null_irqmode_param_ops = {
709c8667
MB
197 .set = null_set_irqmode,
198 .get = param_get_int,
199};
200
5657a819 201device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
f2298c04
JA
202MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
203
2984c868 204static unsigned long g_completion_nsec = 10000;
5657a819 205module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
f2298c04
JA
206MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
207
2984c868 208static int g_hw_queue_depth = 64;
5657a819 209module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
f2298c04
JA
210MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
211
2984c868 212static bool g_use_per_node_hctx;
5657a819 213module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
20005244 214MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
f2298c04 215
058efe00
VF
216static bool g_memory_backed;
217module_param_named(memory_backed, g_memory_backed, bool, 0444);
218MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
219
220static bool g_discard;
221module_param_named(discard, g_discard, bool, 0444);
222MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
223
224static unsigned long g_cache_size;
225module_param_named(cache_size, g_cache_size, ulong, 0444);
226MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
227
f4f84586
DLM
228static bool g_fua = true;
229module_param_named(fua, g_fua, bool, 0444);
230MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
231
058efe00
VF
232static unsigned int g_mbps;
233module_param_named(mbps, g_mbps, uint, 0444);
234MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
235
ca4b2a01
MB
236static bool g_zoned;
237module_param_named(zoned, g_zoned, bool, S_IRUGO);
238MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
239
240static unsigned long g_zone_size = 256;
241module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
242MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
243
089565fb
AR
244static unsigned long g_zone_capacity;
245module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
246MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
247
ea2c18e1
MS
248static unsigned int g_zone_nr_conv;
249module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
250MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
251
dc4d137e
NC
252static unsigned int g_zone_max_open;
253module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
254MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
255
256static unsigned int g_zone_max_active;
257module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
258MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
259
997a1f08
DLM
260static int g_zone_append_max_sectors = INT_MAX;
261module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
262MODULE_PARM_DESC(zone_append_max_sectors,
263 "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
264
3bf2bd20
SL
265static struct nullb_device *null_alloc_dev(void);
266static void null_free_dev(struct nullb_device *dev);
cedcafad
SL
267static void null_del_dev(struct nullb *nullb);
268static int null_add_dev(struct nullb_device *dev);
49c3b926 269static struct nullb *null_find_dev_by_name(const char *name);
deb78b41 270static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
3bf2bd20
SL
271
272static inline struct nullb_device *to_nullb_device(struct config_item *item)
273{
bb4c19e0 274 return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
3bf2bd20
SL
275}
276
277static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
278{
279 return snprintf(page, PAGE_SIZE, "%u\n", val);
280}
281
282static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
283 char *page)
284{
285 return snprintf(page, PAGE_SIZE, "%lu\n", val);
286}
287
288static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
289{
290 return snprintf(page, PAGE_SIZE, "%u\n", val);
291}
292
293static ssize_t nullb_device_uint_attr_store(unsigned int *val,
294 const char *page, size_t count)
295{
296 unsigned int tmp;
297 int result;
298
299 result = kstrtouint(page, 0, &tmp);
45919fbf 300 if (result < 0)
3bf2bd20
SL
301 return result;
302
303 *val = tmp;
304 return count;
305}
306
307static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
308 const char *page, size_t count)
309{
310 int result;
311 unsigned long tmp;
312
313 result = kstrtoul(page, 0, &tmp);
45919fbf 314 if (result < 0)
3bf2bd20
SL
315 return result;
316
317 *val = tmp;
318 return count;
319}
320
321static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
322 size_t count)
323{
324 bool tmp;
325 int result;
326
327 result = kstrtobool(page, &tmp);
45919fbf 328 if (result < 0)
3bf2bd20
SL
329 return result;
330
331 *val = tmp;
332 return count;
333}
334
335/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
ca0a95a6
AM
336#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
337static ssize_t \
338nullb_device_##NAME##_show(struct config_item *item, char *page) \
339{ \
340 return nullb_device_##TYPE##_attr_show( \
341 to_nullb_device(item)->NAME, page); \
342} \
343static ssize_t \
344nullb_device_##NAME##_store(struct config_item *item, const char *page, \
345 size_t count) \
346{ \
347 int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
348 struct nullb_device *dev = to_nullb_device(item); \
b9853b4d 349 TYPE new_value = 0; \
ca0a95a6
AM
350 int ret; \
351 \
352 ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
353 if (ret < 0) \
354 return ret; \
355 if (apply_fn) \
356 ret = apply_fn(dev, new_value); \
357 else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
358 ret = -EBUSY; \
359 if (ret < 0) \
360 return ret; \
361 dev->NAME = new_value; \
362 return count; \
363} \
3bf2bd20
SL
364CONFIGFS_ATTR(nullb_device_, NAME);
365
15dfc662
SK
366static int nullb_update_nr_hw_queues(struct nullb_device *dev,
367 unsigned int submit_queues,
368 unsigned int poll_queues)
369
45919fbf 370{
45919fbf 371 struct blk_mq_tag_set *set;
15dfc662 372 int ret, nr_hw_queues;
45919fbf 373
15dfc662 374 if (!dev->nullb)
45919fbf
BVA
375 return 0;
376
15dfc662 377 /*
2bfdbe8b 378 * Make sure at least one submit queue exists.
15dfc662 379 */
2bfdbe8b 380 if (!submit_queues)
15dfc662
SK
381 return -EINVAL;
382
78b10be2
BVA
383 /*
384 * Make sure that null_init_hctx() does not access nullb->queues[] past
385 * the end of that array.
386 */
15dfc662 387 if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues)
78b10be2 388 return -EINVAL;
15dfc662
SK
389
390 /*
391 * Keep previous and new queue numbers in nullb_device for reference in
392 * the call back function null_map_queues().
393 */
394 dev->prev_submit_queues = dev->submit_queues;
395 dev->prev_poll_queues = dev->poll_queues;
396 dev->submit_queues = submit_queues;
397 dev->poll_queues = poll_queues;
398
399 set = dev->nullb->tag_set;
400 nr_hw_queues = submit_queues + poll_queues;
401 blk_mq_update_nr_hw_queues(set, nr_hw_queues);
402 ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM;
403
404 if (ret) {
405 /* on error, revert the queue numbers */
406 dev->submit_queues = dev->prev_submit_queues;
407 dev->poll_queues = dev->prev_poll_queues;
408 }
409
410 return ret;
411}
412
413static int nullb_apply_submit_queues(struct nullb_device *dev,
414 unsigned int submit_queues)
415{
a2db328b
YK
416 int ret;
417
418 mutex_lock(&lock);
419 ret = nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
420 mutex_unlock(&lock);
421
422 return ret;
15dfc662
SK
423}
424
425static int nullb_apply_poll_queues(struct nullb_device *dev,
426 unsigned int poll_queues)
427{
a2db328b
YK
428 int ret;
429
430 mutex_lock(&lock);
431 ret = nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
432 mutex_unlock(&lock);
433
434 return ret;
45919fbf
BVA
435}
436
437NULLB_DEVICE_ATTR(size, ulong, NULL);
438NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
439NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
15dfc662 440NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues);
45919fbf
BVA
441NULLB_DEVICE_ATTR(home_node, uint, NULL);
442NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
443NULLB_DEVICE_ATTR(blocksize, uint, NULL);
ea17fd35 444NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
45919fbf
BVA
445NULLB_DEVICE_ATTR(irqmode, uint, NULL);
446NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
447NULLB_DEVICE_ATTR(index, uint, NULL);
448NULLB_DEVICE_ATTR(blocking, bool, NULL);
449NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
450NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
451NULLB_DEVICE_ATTR(discard, bool, NULL);
452NULLB_DEVICE_ATTR(mbps, uint, NULL);
453NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
454NULLB_DEVICE_ATTR(zoned, bool, NULL);
455NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
089565fb 456NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
45919fbf 457NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
dc4d137e
NC
458NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
459NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
997a1f08 460NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
cee1b215 461NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
7012eef5 462NULLB_DEVICE_ATTR(no_sched, bool, NULL);
14509b74 463NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
7012eef5 464NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
f4f84586 465NULLB_DEVICE_ATTR(fua, bool, NULL);
3bf2bd20 466
cedcafad
SL
467static ssize_t nullb_device_power_show(struct config_item *item, char *page)
468{
469 return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
470}
471
472static ssize_t nullb_device_power_store(struct config_item *item,
473 const char *page, size_t count)
474{
475 struct nullb_device *dev = to_nullb_device(item);
476 bool newp = false;
477 ssize_t ret;
478
479 ret = nullb_device_bool_attr_store(&newp, page, count);
480 if (ret < 0)
481 return ret;
482
a2db328b
YK
483 ret = count;
484 mutex_lock(&lock);
cedcafad
SL
485 if (!dev->power && newp) {
486 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
a2db328b
YK
487 goto out;
488
a75110c3
CK
489 ret = null_add_dev(dev);
490 if (ret) {
cedcafad 491 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
a2db328b 492 goto out;
cedcafad
SL
493 }
494
495 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
496 dev->power = newp;
b3c30512 497 } else if (dev->power && !newp) {
7602843f 498 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
7602843f
BL
499 dev->power = newp;
500 null_del_dev(dev->nullb);
7602843f 501 }
00a8cdb8 502 clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
cedcafad
SL
503 }
504
a2db328b
YK
505out:
506 mutex_unlock(&lock);
507 return ret;
cedcafad
SL
508}
509
510CONFIGFS_ATTR(nullb_device_, power);
511
2f54a613
SL
512static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
513{
514 struct nullb_device *t_dev = to_nullb_device(item);
515
516 return badblocks_show(&t_dev->badblocks, page, 0);
517}
518
519static ssize_t nullb_device_badblocks_store(struct config_item *item,
520 const char *page, size_t count)
521{
522 struct nullb_device *t_dev = to_nullb_device(item);
523 char *orig, *buf, *tmp;
524 u64 start, end;
525 int ret;
526
527 orig = kstrndup(page, count, GFP_KERNEL);
528 if (!orig)
529 return -ENOMEM;
530
531 buf = strstrip(orig);
532
533 ret = -EINVAL;
534 if (buf[0] != '+' && buf[0] != '-')
535 goto out;
536 tmp = strchr(&buf[1], '-');
537 if (!tmp)
538 goto out;
539 *tmp = '\0';
540 ret = kstrtoull(buf + 1, 0, &start);
541 if (ret)
542 goto out;
543 ret = kstrtoull(tmp + 1, 0, &end);
544 if (ret)
545 goto out;
546 ret = -EINVAL;
547 if (start > end)
548 goto out;
549 /* enable badblocks */
550 cmpxchg(&t_dev->badblocks.shift, -1, 0);
551 if (buf[0] == '+')
552 ret = badblocks_set(&t_dev->badblocks, start,
553 end - start + 1, 1);
554 else
555 ret = badblocks_clear(&t_dev->badblocks, start,
556 end - start + 1);
557 if (ret == 0)
558 ret = count;
559out:
560 kfree(orig);
561 return ret;
562}
563CONFIGFS_ATTR(nullb_device_, badblocks);
564
d3a57388
SK
565static ssize_t nullb_device_zone_readonly_store(struct config_item *item,
566 const char *page, size_t count)
567{
568 struct nullb_device *dev = to_nullb_device(item);
569
570 return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY);
571}
572CONFIGFS_ATTR_WO(nullb_device_, zone_readonly);
573
574static ssize_t nullb_device_zone_offline_store(struct config_item *item,
575 const char *page, size_t count)
576{
577 struct nullb_device *dev = to_nullb_device(item);
578
579 return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE);
580}
581CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
582
3bf2bd20
SL
583static struct configfs_attribute *nullb_device_attrs[] = {
584 &nullb_device_attr_size,
585 &nullb_device_attr_completion_nsec,
586 &nullb_device_attr_submit_queues,
0a593fbb 587 &nullb_device_attr_poll_queues,
3bf2bd20
SL
588 &nullb_device_attr_home_node,
589 &nullb_device_attr_queue_mode,
590 &nullb_device_attr_blocksize,
ea17fd35 591 &nullb_device_attr_max_sectors,
3bf2bd20
SL
592 &nullb_device_attr_irqmode,
593 &nullb_device_attr_hw_queue_depth,
cedcafad 594 &nullb_device_attr_index,
3bf2bd20
SL
595 &nullb_device_attr_blocking,
596 &nullb_device_attr_use_per_node_hctx,
cedcafad 597 &nullb_device_attr_power,
5bcd0e0c 598 &nullb_device_attr_memory_backed,
306eb6b4 599 &nullb_device_attr_discard,
eff2c4f1 600 &nullb_device_attr_mbps,
deb78b41 601 &nullb_device_attr_cache_size,
2f54a613 602 &nullb_device_attr_badblocks,
ca4b2a01
MB
603 &nullb_device_attr_zoned,
604 &nullb_device_attr_zone_size,
089565fb 605 &nullb_device_attr_zone_capacity,
ea2c18e1 606 &nullb_device_attr_zone_nr_conv,
dc4d137e
NC
607 &nullb_device_attr_zone_max_open,
608 &nullb_device_attr_zone_max_active,
997a1f08 609 &nullb_device_attr_zone_append_max_sectors,
d3a57388
SK
610 &nullb_device_attr_zone_readonly,
611 &nullb_device_attr_zone_offline,
cee1b215 612 &nullb_device_attr_virt_boundary,
7012eef5 613 &nullb_device_attr_no_sched,
14509b74 614 &nullb_device_attr_shared_tags,
7012eef5 615 &nullb_device_attr_shared_tag_bitmap,
f4f84586 616 &nullb_device_attr_fua,
3bf2bd20
SL
617 NULL,
618};
619
620static void nullb_device_release(struct config_item *item)
621{
5bcd0e0c
SL
622 struct nullb_device *dev = to_nullb_device(item);
623
deb78b41 624 null_free_device_storage(dev, false);
5bcd0e0c 625 null_free_dev(dev);
3bf2bd20
SL
626}
627
628static struct configfs_item_operations nullb_device_ops = {
629 .release = nullb_device_release,
630};
631
e1919dff 632static const struct config_item_type nullb_device_type = {
3bf2bd20
SL
633 .ct_item_ops = &nullb_device_ops,
634 .ct_attrs = nullb_device_attrs,
635 .ct_owner = THIS_MODULE,
636};
637
bb4c19e0
AM
638#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
639
640static void nullb_add_fault_config(struct nullb_device *dev)
641{
642 fault_config_init(&dev->timeout_config, "timeout_inject");
643 fault_config_init(&dev->requeue_config, "requeue_inject");
644 fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
645
646 configfs_add_default_group(&dev->timeout_config.group, &dev->group);
647 configfs_add_default_group(&dev->requeue_config.group, &dev->group);
648 configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
649}
650
651#else
652
653static void nullb_add_fault_config(struct nullb_device *dev)
654{
655}
656
657#endif
658
3bf2bd20 659static struct
bb4c19e0 660config_group *nullb_group_make_group(struct config_group *group, const char *name)
3bf2bd20
SL
661{
662 struct nullb_device *dev;
663
49c3b926
DLM
664 if (null_find_dev_by_name(name))
665 return ERR_PTR(-EEXIST);
666
3bf2bd20
SL
667 dev = null_alloc_dev();
668 if (!dev)
669 return ERR_PTR(-ENOMEM);
670
bb4c19e0
AM
671 config_group_init_type_name(&dev->group, name, &nullb_device_type);
672 nullb_add_fault_config(dev);
3bf2bd20 673
bb4c19e0 674 return &dev->group;
3bf2bd20
SL
675}
676
677static void
678nullb_group_drop_item(struct config_group *group, struct config_item *item)
679{
cedcafad
SL
680 struct nullb_device *dev = to_nullb_device(item);
681
682 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
683 mutex_lock(&lock);
684 dev->power = false;
685 null_del_dev(dev->nullb);
686 mutex_unlock(&lock);
687 }
688
3bf2bd20
SL
689 config_item_put(item);
690}
691
692static ssize_t memb_group_features_show(struct config_item *item, char *page)
693{
089565fb 694 return snprintf(page, PAGE_SIZE,
f4f84586 695 "badblocks,blocking,blocksize,cache_size,fua,"
7012eef5
VF
696 "completion_nsec,discard,home_node,hw_queue_depth,"
697 "irqmode,max_sectors,mbps,memory_backed,no_sched,"
14509b74
SK
698 "poll_queues,power,queue_mode,shared_tag_bitmap,"
699 "shared_tags,size,submit_queues,use_per_node_hctx,"
700 "virt_boundary,zoned,zone_capacity,zone_max_active,"
701 "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
997a1f08 702 "zone_size,zone_append_max_sectors\n");
3bf2bd20
SL
703}
704
705CONFIGFS_ATTR_RO(memb_group_, features);
706
707static struct configfs_attribute *nullb_group_attrs[] = {
708 &memb_group_attr_features,
709 NULL,
710};
711
712static struct configfs_group_operations nullb_group_ops = {
bb4c19e0 713 .make_group = nullb_group_make_group,
3bf2bd20
SL
714 .drop_item = nullb_group_drop_item,
715};
716
e1919dff 717static const struct config_item_type nullb_group_type = {
3bf2bd20
SL
718 .ct_group_ops = &nullb_group_ops,
719 .ct_attrs = nullb_group_attrs,
720 .ct_owner = THIS_MODULE,
721};
722
723static struct configfs_subsystem nullb_subsys = {
724 .su_group = {
725 .cg_item = {
726 .ci_namebuf = "nullb",
727 .ci_type = &nullb_group_type,
728 },
729 },
730};
731
deb78b41
SL
732static inline int null_cache_active(struct nullb *nullb)
733{
734 return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
735}
736
2984c868
SL
737static struct nullb_device *null_alloc_dev(void)
738{
739 struct nullb_device *dev;
740
741 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
742 if (!dev)
743 return NULL;
bb4c19e0
AM
744
745#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
746 dev->timeout_config.attr = null_timeout_attr;
747 dev->requeue_config.attr = null_requeue_attr;
748 dev->init_hctx_fault_config.attr = null_init_hctx_attr;
749#endif
750
5bcd0e0c 751 INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
deb78b41 752 INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
2f54a613
SL
753 if (badblocks_init(&dev->badblocks, 0)) {
754 kfree(dev);
755 return NULL;
756 }
757
2984c868
SL
758 dev->size = g_gb * 1024;
759 dev->completion_nsec = g_completion_nsec;
760 dev->submit_queues = g_submit_queues;
15dfc662 761 dev->prev_submit_queues = g_submit_queues;
0a593fbb 762 dev->poll_queues = g_poll_queues;
15dfc662 763 dev->prev_poll_queues = g_poll_queues;
2984c868
SL
764 dev->home_node = g_home_node;
765 dev->queue_mode = g_queue_mode;
766 dev->blocksize = g_bs;
ea17fd35 767 dev->max_sectors = g_max_sectors;
2984c868
SL
768 dev->irqmode = g_irqmode;
769 dev->hw_queue_depth = g_hw_queue_depth;
2984c868 770 dev->blocking = g_blocking;
058efe00
VF
771 dev->memory_backed = g_memory_backed;
772 dev->discard = g_discard;
773 dev->cache_size = g_cache_size;
774 dev->mbps = g_mbps;
2984c868 775 dev->use_per_node_hctx = g_use_per_node_hctx;
ca4b2a01
MB
776 dev->zoned = g_zoned;
777 dev->zone_size = g_zone_size;
089565fb 778 dev->zone_capacity = g_zone_capacity;
ea2c18e1 779 dev->zone_nr_conv = g_zone_nr_conv;
dc4d137e
NC
780 dev->zone_max_open = g_zone_max_open;
781 dev->zone_max_active = g_zone_max_active;
997a1f08 782 dev->zone_append_max_sectors = g_zone_append_max_sectors;
cee1b215 783 dev->virt_boundary = g_virt_boundary;
7012eef5 784 dev->no_sched = g_no_sched;
14509b74 785 dev->shared_tags = g_shared_tags;
7012eef5 786 dev->shared_tag_bitmap = g_shared_tag_bitmap;
f4f84586
DLM
787 dev->fua = g_fua;
788
2984c868
SL
789 return dev;
790}
791
792static void null_free_dev(struct nullb_device *dev)
793{
1addb798
DD
794 if (!dev)
795 return;
796
d205bde7 797 null_free_zoned_dev(dev);
1addb798 798 badblocks_exit(&dev->badblocks);
2984c868
SL
799 kfree(dev);
800}
801
cf8ecc5a
AA
802static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
803{
8b631f9c 804 struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
f2298c04 805
8b631f9c 806 blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
f2298c04
JA
807 return HRTIMER_NORESTART;
808}
809
810static void null_cmd_end_timer(struct nullb_cmd *cmd)
811{
2984c868 812 ktime_t kt = cmd->nq->dev->completion_nsec;
f2298c04 813
3c395a96 814 hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
f2298c04
JA
815}
816
49f66136 817static void null_complete_rq(struct request *rq)
f2298c04 818{
8b631f9c
CH
819 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
820
821 blk_mq_end_request(rq, cmd->error);
f2298c04
JA
822}
823
c90b6b50 824static struct nullb_page *null_alloc_page(void)
5bcd0e0c
SL
825{
826 struct nullb_page *t_page;
827
c90b6b50 828 t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
5bcd0e0c 829 if (!t_page)
df00b1d2 830 return NULL;
5bcd0e0c 831
c90b6b50 832 t_page->page = alloc_pages(GFP_NOIO, 0);
df00b1d2
CK
833 if (!t_page->page) {
834 kfree(t_page);
835 return NULL;
836 }
5bcd0e0c 837
66231ad3 838 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
5bcd0e0c 839 return t_page;
5bcd0e0c
SL
840}
841
842static void null_free_page(struct nullb_page *t_page)
843{
66231ad3
ML
844 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
845 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
deb78b41 846 return;
5bcd0e0c
SL
847 __free_page(t_page->page);
848 kfree(t_page);
849}
850
66231ad3
ML
851static bool null_page_empty(struct nullb_page *page)
852{
853 int size = MAP_SZ - 2;
854
855 return find_first_bit(page->bitmap, size) == size;
856}
857
deb78b41
SL
858static void null_free_sector(struct nullb *nullb, sector_t sector,
859 bool is_cache)
5bcd0e0c
SL
860{
861 unsigned int sector_bit;
862 u64 idx;
863 struct nullb_page *t_page, *ret;
864 struct radix_tree_root *root;
865
deb78b41 866 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
867 idx = sector >> PAGE_SECTORS_SHIFT;
868 sector_bit = (sector & SECTOR_MASK);
869
870 t_page = radix_tree_lookup(root, idx);
871 if (t_page) {
66231ad3 872 __clear_bit(sector_bit, t_page->bitmap);
5bcd0e0c 873
66231ad3 874 if (null_page_empty(t_page)) {
5bcd0e0c
SL
875 ret = radix_tree_delete_item(root, idx, t_page);
876 WARN_ON(ret != t_page);
877 null_free_page(ret);
deb78b41
SL
878 if (is_cache)
879 nullb->dev->curr_cache -= PAGE_SIZE;
5bcd0e0c
SL
880 }
881 }
882}
883
884static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
deb78b41 885 struct nullb_page *t_page, bool is_cache)
5bcd0e0c
SL
886{
887 struct radix_tree_root *root;
888
deb78b41 889 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
890
891 if (radix_tree_insert(root, idx, t_page)) {
892 null_free_page(t_page);
893 t_page = radix_tree_lookup(root, idx);
894 WARN_ON(!t_page || t_page->page->index != idx);
deb78b41
SL
895 } else if (is_cache)
896 nullb->dev->curr_cache += PAGE_SIZE;
5bcd0e0c
SL
897
898 return t_page;
899}
900
deb78b41 901static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
5bcd0e0c
SL
902{
903 unsigned long pos = 0;
904 int nr_pages;
905 struct nullb_page *ret, *t_pages[FREE_BATCH];
906 struct radix_tree_root *root;
907
deb78b41 908 root = is_cache ? &dev->cache : &dev->data;
5bcd0e0c
SL
909
910 do {
911 int i;
912
913 nr_pages = radix_tree_gang_lookup(root,
914 (void **)t_pages, pos, FREE_BATCH);
915
916 for (i = 0; i < nr_pages; i++) {
917 pos = t_pages[i]->page->index;
918 ret = radix_tree_delete_item(root, pos, t_pages[i]);
919 WARN_ON(ret != t_pages[i]);
920 null_free_page(ret);
921 }
922
923 pos++;
924 } while (nr_pages == FREE_BATCH);
deb78b41
SL
925
926 if (is_cache)
927 dev->curr_cache = 0;
5bcd0e0c
SL
928}
929
deb78b41
SL
930static struct nullb_page *__null_lookup_page(struct nullb *nullb,
931 sector_t sector, bool for_write, bool is_cache)
5bcd0e0c
SL
932{
933 unsigned int sector_bit;
934 u64 idx;
935 struct nullb_page *t_page;
deb78b41 936 struct radix_tree_root *root;
5bcd0e0c
SL
937
938 idx = sector >> PAGE_SECTORS_SHIFT;
939 sector_bit = (sector & SECTOR_MASK);
940
deb78b41
SL
941 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
942 t_page = radix_tree_lookup(root, idx);
5bcd0e0c
SL
943 WARN_ON(t_page && t_page->page->index != idx);
944
66231ad3 945 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
5bcd0e0c
SL
946 return t_page;
947
948 return NULL;
949}
950
deb78b41
SL
951static struct nullb_page *null_lookup_page(struct nullb *nullb,
952 sector_t sector, bool for_write, bool ignore_cache)
953{
954 struct nullb_page *page = NULL;
955
956 if (!ignore_cache)
957 page = __null_lookup_page(nullb, sector, for_write, true);
958 if (page)
959 return page;
960 return __null_lookup_page(nullb, sector, for_write, false);
961}
962
5bcd0e0c 963static struct nullb_page *null_insert_page(struct nullb *nullb,
61884de0
JA
964 sector_t sector, bool ignore_cache)
965 __releases(&nullb->lock)
966 __acquires(&nullb->lock)
5bcd0e0c
SL
967{
968 u64 idx;
969 struct nullb_page *t_page;
970
deb78b41 971 t_page = null_lookup_page(nullb, sector, true, ignore_cache);
5bcd0e0c
SL
972 if (t_page)
973 return t_page;
974
975 spin_unlock_irq(&nullb->lock);
976
c90b6b50 977 t_page = null_alloc_page();
5bcd0e0c
SL
978 if (!t_page)
979 goto out_lock;
980
981 if (radix_tree_preload(GFP_NOIO))
982 goto out_freepage;
983
984 spin_lock_irq(&nullb->lock);
985 idx = sector >> PAGE_SECTORS_SHIFT;
986 t_page->page->index = idx;
deb78b41 987 t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
5bcd0e0c
SL
988 radix_tree_preload_end();
989
990 return t_page;
991out_freepage:
992 null_free_page(t_page);
993out_lock:
994 spin_lock_irq(&nullb->lock);
deb78b41
SL
995 return null_lookup_page(nullb, sector, true, ignore_cache);
996}
997
998static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
999{
1000 int i;
1001 unsigned int offset;
1002 u64 idx;
1003 struct nullb_page *t_page, *ret;
1004 void *dst, *src;
1005
1006 idx = c_page->page->index;
1007
1008 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
1009
66231ad3
ML
1010 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
1011 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
deb78b41 1012 null_free_page(c_page);
66231ad3 1013 if (t_page && null_page_empty(t_page)) {
deb78b41
SL
1014 ret = radix_tree_delete_item(&nullb->dev->data,
1015 idx, t_page);
1016 null_free_page(t_page);
1017 }
1018 return 0;
1019 }
1020
1021 if (!t_page)
1022 return -ENOMEM;
1023
acc3c879
CK
1024 src = kmap_local_page(c_page->page);
1025 dst = kmap_local_page(t_page->page);
deb78b41
SL
1026
1027 for (i = 0; i < PAGE_SECTORS;
1028 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
66231ad3 1029 if (test_bit(i, c_page->bitmap)) {
deb78b41
SL
1030 offset = (i << SECTOR_SHIFT);
1031 memcpy(dst + offset, src + offset,
1032 nullb->dev->blocksize);
66231ad3 1033 __set_bit(i, t_page->bitmap);
deb78b41
SL
1034 }
1035 }
1036
acc3c879
CK
1037 kunmap_local(dst);
1038 kunmap_local(src);
deb78b41
SL
1039
1040 ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
1041 null_free_page(ret);
1042 nullb->dev->curr_cache -= PAGE_SIZE;
1043
1044 return 0;
1045}
1046
1047static int null_make_cache_space(struct nullb *nullb, unsigned long n)
f2298c04 1048{
deb78b41
SL
1049 int i, err, nr_pages;
1050 struct nullb_page *c_pages[FREE_BATCH];
1051 unsigned long flushed = 0, one_round;
1052
1053again:
1054 if ((nullb->dev->cache_size * 1024 * 1024) >
1055 nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
1056 return 0;
1057
1058 nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
1059 (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
1060 /*
1061 * nullb_flush_cache_page could unlock before using the c_pages. To
1062 * avoid race, we don't allow page free
1063 */
1064 for (i = 0; i < nr_pages; i++) {
1065 nullb->cache_flush_pos = c_pages[i]->page->index;
1066 /*
1067 * We found the page which is being flushed to disk by other
1068 * threads
1069 */
66231ad3 1070 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
deb78b41
SL
1071 c_pages[i] = NULL;
1072 else
66231ad3 1073 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
deb78b41
SL
1074 }
1075
1076 one_round = 0;
1077 for (i = 0; i < nr_pages; i++) {
1078 if (c_pages[i] == NULL)
1079 continue;
1080 err = null_flush_cache_page(nullb, c_pages[i]);
1081 if (err)
1082 return err;
1083 one_round++;
1084 }
1085 flushed += one_round << PAGE_SHIFT;
1086
1087 if (n > flushed) {
1088 if (nr_pages == 0)
1089 nullb->cache_flush_pos = 0;
1090 if (one_round == 0) {
1091 /* give other threads a chance */
1092 spin_unlock_irq(&nullb->lock);
1093 spin_lock_irq(&nullb->lock);
1094 }
1095 goto again;
1096 }
1097 return 0;
5bcd0e0c
SL
1098}
1099
1100static int copy_to_nullb(struct nullb *nullb, struct page *source,
deb78b41 1101 unsigned int off, sector_t sector, size_t n, bool is_fua)
5bcd0e0c
SL
1102{
1103 size_t temp, count = 0;
1104 unsigned int offset;
1105 struct nullb_page *t_page;
5bcd0e0c
SL
1106
1107 while (count < n) {
1108 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1109
deb78b41
SL
1110 if (null_cache_active(nullb) && !is_fua)
1111 null_make_cache_space(nullb, PAGE_SIZE);
1112
5bcd0e0c 1113 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1114 t_page = null_insert_page(nullb, sector,
1115 !null_cache_active(nullb) || is_fua);
5bcd0e0c
SL
1116 if (!t_page)
1117 return -ENOSPC;
1118
fbb5615f 1119 memcpy_page(t_page->page, offset, source, off + count, temp);
5bcd0e0c 1120
66231ad3 1121 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
5bcd0e0c 1122
deb78b41
SL
1123 if (is_fua)
1124 null_free_sector(nullb, sector, true);
1125
5bcd0e0c
SL
1126 count += temp;
1127 sector += temp >> SECTOR_SHIFT;
1128 }
1129 return 0;
1130}
1131
1132static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1133 unsigned int off, sector_t sector, size_t n)
1134{
1135 size_t temp, count = 0;
1136 unsigned int offset;
1137 struct nullb_page *t_page;
5bcd0e0c
SL
1138
1139 while (count < n) {
1140 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1141
1142 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1143 t_page = null_lookup_page(nullb, sector, false,
1144 !null_cache_active(nullb));
5bcd0e0c 1145
fbb5615f
CK
1146 if (t_page)
1147 memcpy_page(dest, off + count, t_page->page, offset,
1148 temp);
1149 else
1150 zero_user(dest, off + count, temp);
5bcd0e0c
SL
1151
1152 count += temp;
1153 sector += temp >> SECTOR_SHIFT;
1154 }
1155 return 0;
1156}
1157
dd85b492
AJ
1158static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
1159 unsigned int len, unsigned int off)
1160{
fbb5615f 1161 memset_page(page, off, 0xff, len);
dd85b492
AJ
1162}
1163
0ec4d913
DLM
1164blk_status_t null_handle_discard(struct nullb_device *dev,
1165 sector_t sector, sector_t nr_sectors)
306eb6b4 1166{
49c7089f
DLM
1167 struct nullb *nullb = dev->nullb;
1168 size_t n = nr_sectors << SECTOR_SHIFT;
306eb6b4
SL
1169 size_t temp;
1170
1171 spin_lock_irq(&nullb->lock);
1172 while (n > 0) {
49c7089f 1173 temp = min_t(size_t, n, dev->blocksize);
deb78b41
SL
1174 null_free_sector(nullb, sector, false);
1175 if (null_cache_active(nullb))
1176 null_free_sector(nullb, sector, true);
306eb6b4
SL
1177 sector += temp >> SECTOR_SHIFT;
1178 n -= temp;
1179 }
1180 spin_unlock_irq(&nullb->lock);
49c7089f
DLM
1181
1182 return BLK_STS_OK;
306eb6b4
SL
1183}
1184
cb9e5273 1185static blk_status_t null_handle_flush(struct nullb *nullb)
deb78b41
SL
1186{
1187 int err;
1188
1189 if (!null_cache_active(nullb))
1190 return 0;
1191
1192 spin_lock_irq(&nullb->lock);
1193 while (true) {
1194 err = null_make_cache_space(nullb,
1195 nullb->dev->cache_size * 1024 * 1024);
1196 if (err || nullb->dev->curr_cache == 0)
1197 break;
1198 }
1199
1200 WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1201 spin_unlock_irq(&nullb->lock);
cb9e5273 1202 return errno_to_blk_status(err);
deb78b41
SL
1203}
1204
5bcd0e0c 1205static int null_transfer(struct nullb *nullb, struct page *page,
deb78b41
SL
1206 unsigned int len, unsigned int off, bool is_write, sector_t sector,
1207 bool is_fua)
f2298c04 1208{
dd85b492
AJ
1209 struct nullb_device *dev = nullb->dev;
1210 unsigned int valid_len = len;
5bcd0e0c
SL
1211 int err = 0;
1212
1213 if (!is_write) {
dd85b492
AJ
1214 if (dev->zoned)
1215 valid_len = null_zone_valid_read_len(nullb,
1216 sector, len);
1217
1218 if (valid_len) {
1219 err = copy_from_nullb(nullb, page, off,
1220 sector, valid_len);
1221 off += valid_len;
1222 len -= valid_len;
1223 }
1224
1225 if (len)
1226 nullb_fill_pattern(nullb, page, len, off);
5bcd0e0c
SL
1227 flush_dcache_page(page);
1228 } else {
1229 flush_dcache_page(page);
deb78b41 1230 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
5bcd0e0c
SL
1231 }
1232
1233 return err;
1234}
1235
25260555 1236static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
5bcd0e0c 1237{
8b631f9c 1238 struct request *rq = blk_mq_rq_from_pdu(cmd);
5bcd0e0c 1239 struct nullb *nullb = cmd->nq->dev->nullb;
cb9e5273 1240 int err = 0;
5bcd0e0c 1241 unsigned int len;
49c7089f 1242 sector_t sector = blk_rq_pos(rq);
5bcd0e0c
SL
1243 struct req_iterator iter;
1244 struct bio_vec bvec;
1245
5bcd0e0c
SL
1246 spin_lock_irq(&nullb->lock);
1247 rq_for_each_segment(bvec, rq, iter) {
1248 len = bvec.bv_len;
1249 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
deb78b41 1250 op_is_write(req_op(rq)), sector,
2d62e6b0 1251 rq->cmd_flags & REQ_FUA);
cb9e5273
DLM
1252 if (err)
1253 break;
5bcd0e0c
SL
1254 sector += len >> SECTOR_SHIFT;
1255 }
1256 spin_unlock_irq(&nullb->lock);
1257
cb9e5273 1258 return errno_to_blk_status(err);
5bcd0e0c
SL
1259}
1260
adb84284
CK
1261static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
1262{
1263 struct nullb_device *dev = cmd->nq->dev;
1264 struct nullb *nullb = dev->nullb;
1265 blk_status_t sts = BLK_STS_OK;
8b631f9c 1266 struct request *rq = blk_mq_rq_from_pdu(cmd);
adb84284
CK
1267
1268 if (!hrtimer_active(&nullb->bw_timer))
1269 hrtimer_restart(&nullb->bw_timer);
1270
1271 if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
8b631f9c 1272 blk_mq_stop_hw_queues(nullb->q);
adb84284
CK
1273 /* race with timer */
1274 if (atomic_long_read(&nullb->cur_bytes) > 0)
8b631f9c 1275 blk_mq_start_stopped_hw_queues(nullb->q, true);
adb84284
CK
1276 /* requeue request */
1277 sts = BLK_STS_DEV_RESOURCE;
1278 }
1279 return sts;
1280}
1281
8f94d1c1
CK
1282static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
1283 sector_t sector,
1284 sector_t nr_sectors)
1285{
1286 struct badblocks *bb = &cmd->nq->dev->badblocks;
1287 sector_t first_bad;
1288 int bad_sectors;
1289
1290 if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
1291 return BLK_STS_IOERR;
1292
1293 return BLK_STS_OK;
1294}
1295
7ea88e22 1296static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
ff07a02e 1297 enum req_op op,
49c7089f
DLM
1298 sector_t sector,
1299 sector_t nr_sectors)
7ea88e22
CK
1300{
1301 struct nullb_device *dev = cmd->nq->dev;
7ea88e22 1302
49c7089f
DLM
1303 if (op == REQ_OP_DISCARD)
1304 return null_handle_discard(dev, sector, nr_sectors);
1305
cb9e5273 1306 return null_handle_rq(cmd);
7ea88e22
CK
1307}
1308
cecbc9ce
BVA
1309static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
1310{
8b631f9c 1311 struct request *rq = blk_mq_rq_from_pdu(cmd);
cecbc9ce
BVA
1312 struct nullb_device *dev = cmd->nq->dev;
1313 struct bio *bio;
1314
8b631f9c
CH
1315 if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
1316 __rq_for_each_bio(bio, rq)
cecbc9ce
BVA
1317 zero_fill_bio(bio);
1318 }
1319}
1320
a3d7d674
CK
1321static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
1322{
8b631f9c
CH
1323 struct request *rq = blk_mq_rq_from_pdu(cmd);
1324
cecbc9ce
BVA
1325 /*
1326 * Since root privileges are required to configure the null_blk
1327 * driver, it is fine that this driver does not initialize the
1328 * data buffers of read commands. Zero-initialize these buffers
1329 * anyway if KMSAN is enabled to prevent that KMSAN complains
1330 * about null_blk not initializing read data buffers.
1331 */
1332 if (IS_ENABLED(CONFIG_KMSAN))
1333 nullb_zero_read_cmd_buffer(cmd);
1334
a3d7d674
CK
1335 /* Complete IO by inline, softirq or timer */
1336 switch (cmd->nq->dev->irqmode) {
1337 case NULL_IRQ_SOFTIRQ:
8b631f9c 1338 blk_mq_complete_request(rq);
a3d7d674
CK
1339 break;
1340 case NULL_IRQ_NONE:
8b631f9c 1341 blk_mq_end_request(rq, cmd->error);
a3d7d674
CK
1342 break;
1343 case NULL_IRQ_TIMER:
1344 null_cmd_end_timer(cmd);
1345 break;
1346 }
1347}
1348
ff07a02e
BVA
1349blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
1350 sector_t sector, unsigned int nr_sectors)
9dd44c7e
DLM
1351{
1352 struct nullb_device *dev = cmd->nq->dev;
1353 blk_status_t ret;
1354
1355 if (dev->badblocks.shift != -1) {
1356 ret = null_handle_badblocks(cmd, sector, nr_sectors);
1357 if (ret != BLK_STS_OK)
1358 return ret;
1359 }
1360
1361 if (dev->memory_backed)
49c7089f 1362 return null_handle_memory_backed(cmd, op, sector, nr_sectors);
9dd44c7e
DLM
1363
1364 return BLK_STS_OK;
1365}
1366
53f2bca2
CZ
1367static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
1368 sector_t nr_sectors, enum req_op op)
5bcd0e0c
SL
1369{
1370 struct nullb_device *dev = cmd->nq->dev;
eff2c4f1 1371 struct nullb *nullb = dev->nullb;
adb84284 1372 blk_status_t sts;
5bcd0e0c 1373
d4b186ed 1374 if (op == REQ_OP_FLUSH) {
cb9e5273 1375 cmd->error = null_handle_flush(nullb);
d4b186ed
CK
1376 goto out;
1377 }
d4b186ed 1378
9dd44c7e 1379 if (dev->zoned)
de3510e5 1380 sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
9dd44c7e 1381 else
de3510e5
DLM
1382 sts = null_process_cmd(cmd, op, sector, nr_sectors);
1383
1384 /* Do not overwrite errors (e.g. timeout errors) */
1385 if (cmd->error == BLK_STS_OK)
1386 cmd->error = sts;
fceb5d1b 1387
2f54a613 1388out:
a3d7d674 1389 nullb_complete_cmd(cmd);
f2298c04
JA
1390}
1391
eff2c4f1
SL
1392static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1393{
1394 struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1395 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1396 unsigned int mbps = nullb->dev->mbps;
1397
1398 if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1399 return HRTIMER_NORESTART;
1400
1401 atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
8b631f9c 1402 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1403
1404 hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1405
1406 return HRTIMER_RESTART;
1407}
1408
1409static void nullb_setup_bwtimer(struct nullb *nullb)
1410{
1411 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1412
1413 hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1414 nullb->bw_timer.function = nullb_bwtimer_fn;
1415 atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1416 hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
f2298c04
JA
1417}
1418
bb4c19e0
AM
1419#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1420
1421static bool should_timeout_request(struct request *rq)
1422{
1423 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1424 struct nullb_device *dev = cmd->nq->dev;
1425
1426 return should_fail(&dev->timeout_config.attr, 1);
1427}
1428
1429static bool should_requeue_request(struct request *rq)
1430{
1431 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1432 struct nullb_device *dev = cmd->nq->dev;
1433
1434 return should_fail(&dev->requeue_config.attr, 1);
1435}
1436
1437static bool should_init_hctx_fail(struct nullb_device *dev)
1438{
1439 return should_fail(&dev->init_hctx_fault_config.attr, 1);
1440}
1441
1442#else
1443
93b57046
JA
1444static bool should_timeout_request(struct request *rq)
1445{
24941b90
JA
1446 return false;
1447}
93b57046 1448
24941b90
JA
1449static bool should_requeue_request(struct request *rq)
1450{
93b57046
JA
1451 return false;
1452}
1453
bb4c19e0
AM
1454static bool should_init_hctx_fail(struct nullb_device *dev)
1455{
1456 return false;
1457}
1458
1459#endif
1460
a4e1d0b7 1461static void null_map_queues(struct blk_mq_tag_set *set)
0a593fbb
JA
1462{
1463 struct nullb *nullb = set->driver_data;
1464 int i, qoff;
15dfc662
SK
1465 unsigned int submit_queues = g_submit_queues;
1466 unsigned int poll_queues = g_poll_queues;
1467
1468 if (nullb) {
1469 struct nullb_device *dev = nullb->dev;
1470
1471 /*
1472 * Refer nr_hw_queues of the tag set to check if the expected
1473 * number of hardware queues are prepared. If block layer failed
1474 * to prepare them, use previous numbers of submit queues and
1475 * poll queues to map queues.
1476 */
1477 if (set->nr_hw_queues ==
1478 dev->submit_queues + dev->poll_queues) {
1479 submit_queues = dev->submit_queues;
1480 poll_queues = dev->poll_queues;
1481 } else if (set->nr_hw_queues ==
1482 dev->prev_submit_queues + dev->prev_poll_queues) {
1483 submit_queues = dev->prev_submit_queues;
1484 poll_queues = dev->prev_poll_queues;
1485 } else {
1486 pr_warn("tag set has unexpected nr_hw_queues: %d\n",
1487 set->nr_hw_queues);
10b41ea1
BVA
1488 WARN_ON_ONCE(true);
1489 submit_queues = 1;
1490 poll_queues = 0;
15dfc662
SK
1491 }
1492 }
0a593fbb
JA
1493
1494 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
1495 struct blk_mq_queue_map *map = &set->map[i];
1496
1497 switch (i) {
1498 case HCTX_TYPE_DEFAULT:
15dfc662 1499 map->nr_queues = submit_queues;
0a593fbb
JA
1500 break;
1501 case HCTX_TYPE_READ:
1502 map->nr_queues = 0;
1503 continue;
1504 case HCTX_TYPE_POLL:
15dfc662 1505 map->nr_queues = poll_queues;
0a593fbb
JA
1506 break;
1507 }
1508 map->queue_offset = qoff;
1509 qoff += map->nr_queues;
1510 blk_mq_map_queues(map);
1511 }
0a593fbb
JA
1512}
1513
1514static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
1515{
1516 struct nullb_queue *nq = hctx->driver_data;
1517 LIST_HEAD(list);
1518 int nr = 0;
5a26e45e 1519 struct request *rq;
0a593fbb
JA
1520
1521 spin_lock(&nq->poll_lock);
1522 list_splice_init(&nq->poll_list, &list);
5a26e45e
CZ
1523 list_for_each_entry(rq, &list, queuelist)
1524 blk_mq_set_request_complete(rq);
0a593fbb
JA
1525 spin_unlock(&nq->poll_lock);
1526
1527 while (!list_empty(&list)) {
1528 struct nullb_cmd *cmd;
1529 struct request *req;
1530
1531 req = list_first_entry(&list, struct request, queuelist);
1532 list_del_init(&req->queuelist);
1533 cmd = blk_mq_rq_to_pdu(req);
1534 cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
1535 blk_rq_sectors(req));
c5eafd79 1536 if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
2385ebf3 1537 blk_mq_end_request_batch))
8b631f9c 1538 blk_mq_end_request(req, cmd->error);
0a593fbb
JA
1539 nr++;
1540 }
1541
1542 return nr;
1543}
1544
9bdb4833 1545static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
5448aca4 1546{
0a593fbb 1547 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
de3510e5
DLM
1548 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1549
0a593fbb
JA
1550 if (hctx->type == HCTX_TYPE_POLL) {
1551 struct nullb_queue *nq = hctx->driver_data;
1552
1553 spin_lock(&nq->poll_lock);
5a26e45e
CZ
1554 /* The request may have completed meanwhile. */
1555 if (blk_mq_request_completed(rq)) {
1556 spin_unlock(&nq->poll_lock);
1557 return BLK_EH_DONE;
1558 }
0a593fbb
JA
1559 list_del_init(&rq->queuelist);
1560 spin_unlock(&nq->poll_lock);
1561 }
1562
5a26e45e
CZ
1563 pr_info("rq %p timed out\n", rq);
1564
de3510e5
DLM
1565 /*
1566 * If the device is marked as blocking (i.e. memory backed or zoned
1567 * device), the submission path may be blocked waiting for resources
1568 * and cause real timeouts. For these real timeouts, the submission
1569 * path will complete the request using blk_mq_complete_request().
1570 * Only fake timeouts need to execute blk_mq_complete_request() here.
1571 */
1572 cmd->error = BLK_STS_TIMEOUT;
3e3876d3 1573 if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
de3510e5 1574 blk_mq_complete_request(rq);
0df0bb08 1575 return BLK_EH_DONE;
5448aca4
JA
1576}
1577
fc17b653 1578static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
b6402014 1579 const struct blk_mq_queue_data *bd)
f2298c04 1580{
b6402014
DLM
1581 struct request *rq = bd->rq;
1582 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
2984c868 1583 struct nullb_queue *nq = hctx->driver_data;
b6402014
DLM
1584 sector_t nr_sectors = blk_rq_sectors(rq);
1585 sector_t sector = blk_rq_pos(rq);
0a593fbb 1586 const bool is_poll = hctx->type == HCTX_TYPE_POLL;
f2298c04 1587
db5bcf87
JA
1588 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1589
0a593fbb 1590 if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
3c395a96
PV
1591 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1592 cmd->timer.function = null_cmd_timer_expired;
1593 }
ff770422 1594 cmd->error = BLK_STS_OK;
2984c868 1595 cmd->nq = nq;
b6402014
DLM
1596 cmd->fake_timeout = should_timeout_request(rq) ||
1597 blk_should_fake_timeout(rq->q);
f2298c04 1598
b6402014 1599 if (should_requeue_request(rq)) {
24941b90
JA
1600 /*
1601 * Alternate between hitting the core BUSY path, and the
1602 * driver driven requeue path
1603 */
1604 nq->requeue_selection++;
1605 if (nq->requeue_selection & 1)
1606 return BLK_STS_RESOURCE;
b6402014
DLM
1607 blk_mq_requeue_request(rq, true);
1608 return BLK_STS_OK;
24941b90 1609 }
0a593fbb 1610
53f2bca2
CZ
1611 if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
1612 blk_status_t sts = null_handle_throttled(cmd);
1613
1614 if (sts != BLK_STS_OK)
1615 return sts;
1616 }
1617
1618 blk_mq_start_request(rq);
1619
0a593fbb
JA
1620 if (is_poll) {
1621 spin_lock(&nq->poll_lock);
b6402014 1622 list_add_tail(&rq->queuelist, &nq->poll_list);
0a593fbb
JA
1623 spin_unlock(&nq->poll_lock);
1624 return BLK_STS_OK;
1625 }
de3510e5 1626 if (cmd->fake_timeout)
24941b90 1627 return BLK_STS_OK;
93b57046 1628
53f2bca2
CZ
1629 null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
1630 return BLK_STS_OK;
f2298c04
JA
1631}
1632
d78bfa13
CZ
1633static void null_queue_rqs(struct request **rqlist)
1634{
1635 struct request *requeue_list = NULL;
1636 struct request **requeue_lastp = &requeue_list;
1637 struct blk_mq_queue_data bd = { };
1638 blk_status_t ret;
1639
1640 do {
1641 struct request *rq = rq_list_pop(rqlist);
1642
1643 bd.rq = rq;
1644 ret = null_queue_rq(rq->mq_hctx, &bd);
1645 if (ret != BLK_STS_OK)
1646 rq_list_add_tail(&requeue_lastp, rq);
1647 } while (!rq_list_empty(*rqlist));
1648
1649 *rqlist = requeue_list;
1650}
1651
78b10be2
BVA
1652static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1653{
78b10be2 1654 nq->dev = nullb->dev;
0a593fbb
JA
1655 INIT_LIST_HEAD(&nq->poll_list);
1656 spin_lock_init(&nq->poll_lock);
78b10be2
BVA
1657}
1658
1659static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
1660 unsigned int hctx_idx)
1661{
1662 struct nullb *nullb = hctx->queue->queuedata;
1663 struct nullb_queue *nq;
1664
bb4c19e0 1665 if (should_init_hctx_fail(nullb->dev))
596444e7 1666 return -EFAULT;
596444e7 1667
78b10be2
BVA
1668 nq = &nullb->queues[hctx_idx];
1669 hctx->driver_data = nq;
1670 null_init_queue(nullb, nq);
78b10be2
BVA
1671
1672 return 0;
1673}
1674
1675static const struct blk_mq_ops null_mq_ops = {
1676 .queue_rq = null_queue_rq,
d78bfa13 1677 .queue_rqs = null_queue_rqs,
78b10be2
BVA
1678 .complete = null_complete_rq,
1679 .timeout = null_timeout_rq,
0a593fbb
JA
1680 .poll = null_poll,
1681 .map_queues = null_map_queues,
78b10be2 1682 .init_hctx = null_init_hctx,
78b10be2
BVA
1683};
1684
9ae2d0aa
MB
1685static void null_del_dev(struct nullb *nullb)
1686{
9b03b713
BVA
1687 struct nullb_device *dev;
1688
1689 if (!nullb)
1690 return;
1691
1692 dev = nullb->dev;
2984c868 1693
95931a24 1694 ida_free(&nullb_indexes, nullb->index);
94bc02e3 1695
9ae2d0aa
MB
1696 list_del_init(&nullb->list);
1697
74ede5af 1698 del_gendisk(nullb->disk);
eff2c4f1
SL
1699
1700 if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1701 hrtimer_cancel(&nullb->bw_timer);
1702 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
8b631f9c 1703 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1704 }
1705
8b9ab626 1706 put_disk(nullb->disk);
8b631f9c 1707 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1708 blk_mq_free_tag_set(nullb->tag_set);
8b631f9c 1709 kfree(nullb->queues);
deb78b41
SL
1710 if (null_cache_active(nullb))
1711 null_free_device_storage(nullb->dev, true);
9ae2d0aa 1712 kfree(nullb);
2984c868 1713 dev->nullb = NULL;
9ae2d0aa
MB
1714}
1715
e440626b 1716static void null_config_discard(struct nullb *nullb, struct queue_limits *lim)
306eb6b4
SL
1717{
1718 if (nullb->dev->discard == false)
1719 return;
1592cd15 1720
49c7089f
DLM
1721 if (!nullb->dev->memory_backed) {
1722 nullb->dev->discard = false;
1723 pr_info("discard option is ignored without memory backing\n");
1724 return;
1725 }
1726
1592cd15
CK
1727 if (nullb->dev->zoned) {
1728 nullb->dev->discard = false;
1729 pr_info("discard option is ignored in zoned mode\n");
1730 return;
1731 }
1732
e440626b 1733 lim->max_hw_discard_sectors = UINT_MAX >> 9;
9ae2d0aa
MB
1734}
1735
8b631f9c 1736static const struct block_device_operations null_ops = {
c62b37d9 1737 .owner = THIS_MODULE,
c62b37d9
CH
1738 .report_zones = null_report_zones,
1739};
1740
f2298c04
JA
1741static int setup_queues(struct nullb *nullb)
1742{
0a593fbb
JA
1743 int nqueues = nr_cpu_ids;
1744
1745 if (g_poll_queues)
1746 nqueues += g_poll_queues;
1747
1748 nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
6396bb22 1749 GFP_KERNEL);
f2298c04 1750 if (!nullb->queues)
2d263a78 1751 return -ENOMEM;
f2298c04 1752
2d263a78
MB
1753 return 0;
1754}
1755
72ca2876 1756static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
82f402fe
JA
1757{
1758 set->ops = &null_mq_ops;
72ca2876 1759 set->cmd_size = sizeof(struct nullb_cmd);
e32b0855 1760 set->timeout = 5 * HZ;
72ca2876 1761 set->nr_maps = 1;
7012eef5
VF
1762 if (poll_queues) {
1763 set->nr_hw_queues += poll_queues;
72ca2876 1764 set->nr_maps += 2;
7012eef5 1765 }
82f402fe
JA
1766 return blk_mq_alloc_tag_set(set);
1767}
1768
72ca2876
CH
1769static int null_init_global_tag_set(void)
1770{
1771 int error;
1772
1773 if (tag_set.ops)
1774 return 0;
1775
1776 tag_set.nr_hw_queues = g_submit_queues;
1777 tag_set.queue_depth = g_hw_queue_depth;
1778 tag_set.numa_node = g_home_node;
1779 tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
1780 if (g_no_sched)
1781 tag_set.flags |= BLK_MQ_F_NO_SCHED;
1782 if (g_shared_tag_bitmap)
1783 tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1784 if (g_blocking)
1785 tag_set.flags |= BLK_MQ_F_BLOCKING;
1786
1787 error = null_init_tag_set(&tag_set, g_poll_queues);
1788 if (error)
1789 tag_set.ops = NULL;
1790 return error;
1791}
1792
1793static int null_setup_tagset(struct nullb *nullb)
1794{
1795 if (nullb->dev->shared_tags) {
1796 nullb->tag_set = &tag_set;
1797 return null_init_global_tag_set();
1798 }
1799
1800 nullb->tag_set = &nullb->__tag_set;
1801 nullb->tag_set->driver_data = nullb;
1802 nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
1803 nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
1804 nullb->tag_set->numa_node = nullb->dev->home_node;
1805 nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
1806 if (nullb->dev->no_sched)
1807 nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED;
1808 if (nullb->dev->shared_tag_bitmap)
1809 nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1810 if (nullb->dev->blocking)
1811 nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
1812 return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
1813}
1814
5c4bd1f4 1815static int null_validate_conf(struct nullb_device *dev)
cedcafad 1816{
63f8793e
CK
1817 if (dev->queue_mode == NULL_Q_RQ) {
1818 pr_err("legacy IO path is no longer available\n");
1819 return -EINVAL;
1820 }
8b631f9c
CH
1821 if (dev->queue_mode == NULL_Q_BIO) {
1822 pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
1823 dev->queue_mode = NULL_Q_MQ;
1824 }
63f8793e 1825
cedcafad
SL
1826 dev->blocksize = round_down(dev->blocksize, 512);
1827 dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
cedcafad 1828
8b631f9c 1829 if (dev->use_per_node_hctx) {
cedcafad
SL
1830 if (dev->submit_queues != nr_online_nodes)
1831 dev->submit_queues = nr_online_nodes;
1832 } else if (dev->submit_queues > nr_cpu_ids)
1833 dev->submit_queues = nr_cpu_ids;
1834 else if (dev->submit_queues == 0)
1835 dev->submit_queues = 1;
15dfc662
SK
1836 dev->prev_submit_queues = dev->submit_queues;
1837
1838 if (dev->poll_queues > g_poll_queues)
1839 dev->poll_queues = g_poll_queues;
15dfc662 1840 dev->prev_poll_queues = dev->poll_queues;
cedcafad 1841 dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
5bcd0e0c
SL
1842
1843 /* Do memory allocation, so set blocking */
1844 if (dev->memory_backed)
1845 dev->blocking = true;
deb78b41
SL
1846 else /* cache is meaningless */
1847 dev->cache_size = 0;
1848 dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1849 dev->cache_size);
eff2c4f1 1850 dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
5c4bd1f4
DLM
1851
1852 if (dev->zoned &&
1853 (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
1854 pr_err("zone_size must be power-of-two\n");
1855 return -EINVAL;
1856 }
1857
1858 return 0;
cedcafad
SL
1859}
1860
33f782c4 1861#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
24941b90
JA
1862static bool __null_setup_fault(struct fault_attr *attr, char *str)
1863{
1864 if (!str[0])
93b57046
JA
1865 return true;
1866
24941b90 1867 if (!setup_fault_attr(attr, str))
93b57046
JA
1868 return false;
1869
24941b90
JA
1870 attr->verbose = 0;
1871 return true;
1872}
1873#endif
1874
1875static bool null_setup_fault(void)
1876{
1877#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1878 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1879 return false;
1880 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1881 return false;
596444e7
BVA
1882 if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
1883 return false;
33f782c4 1884#endif
93b57046
JA
1885 return true;
1886}
1887
2984c868 1888static int null_add_dev(struct nullb_device *dev)
9ae2d0aa 1889{
e440626b
CH
1890 struct queue_limits lim = {
1891 .logical_block_size = dev->blocksize,
1892 .physical_block_size = dev->blocksize,
1893 .max_hw_sectors = dev->max_sectors,
1894 };
1895
9ae2d0aa 1896 struct nullb *nullb;
dc501dc0 1897 int rv;
f2298c04 1898
5c4bd1f4
DLM
1899 rv = null_validate_conf(dev);
1900 if (rv)
1901 return rv;
cedcafad 1902
2984c868 1903 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
dc501dc0
RE
1904 if (!nullb) {
1905 rv = -ENOMEM;
24d2f903 1906 goto out;
dc501dc0 1907 }
2984c868
SL
1908 nullb->dev = dev;
1909 dev->nullb = nullb;
f2298c04
JA
1910
1911 spin_lock_init(&nullb->lock);
1912
dc501dc0
RE
1913 rv = setup_queues(nullb);
1914 if (rv)
24d2f903 1915 goto out_free_nullb;
f2298c04 1916
72ca2876 1917 rv = null_setup_tagset(nullb);
8b631f9c
CH
1918 if (rv)
1919 goto out_cleanup_queues;
132226b3 1920
e440626b
CH
1921 if (dev->virt_boundary)
1922 lim.virt_boundary_mask = PAGE_SIZE - 1;
1923 null_config_discard(nullb, &lim);
1924 if (dev->zoned) {
1925 rv = null_init_zoned_dev(dev, &lim);
1926 if (rv)
1927 goto out_cleanup_tags;
1928 }
1929
1930 nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
8b631f9c
CH
1931 if (IS_ERR(nullb->disk)) {
1932 rv = PTR_ERR(nullb->disk);
e440626b 1933 goto out_cleanup_zone;
f2298c04 1934 }
8b631f9c 1935 nullb->q = nullb->disk->queue;
f2298c04 1936
eff2c4f1
SL
1937 if (dev->mbps) {
1938 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
1939 nullb_setup_bwtimer(nullb);
1940 }
1941
deb78b41
SL
1942 if (dev->cache_size > 0) {
1943 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
f4f84586 1944 blk_queue_write_cache(nullb->q, true, dev->fua);
deb78b41
SL
1945 }
1946
f2298c04 1947 nullb->q->queuedata = nullb;
8b904b5b 1948 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
f2298c04 1949
95931a24 1950 rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
a2db328b 1951 if (rv < 0)
e440626b 1952 goto out_cleanup_disk;
a2db328b 1953
ee452a8d
DC
1954 nullb->index = rv;
1955 dev->index = rv;
f2298c04 1956
bb4c19e0 1957 if (config_item_name(&dev->group.cg_item)) {
49c3b926
DLM
1958 /* Use configfs dir name as the device name */
1959 snprintf(nullb->disk_name, sizeof(nullb->disk_name),
bb4c19e0 1960 "%s", config_item_name(&dev->group.cg_item));
49c3b926
DLM
1961 } else {
1962 sprintf(nullb->disk_name, "nullb%d", nullb->index);
1963 }
b2b7e001 1964
0a39e550
CH
1965 set_capacity(nullb->disk,
1966 ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT);
1967 nullb->disk->major = null_major;
1968 nullb->disk->first_minor = nullb->index;
1969 nullb->disk->minors = 1;
1970 nullb->disk->fops = &null_ops;
1971 nullb->disk->private_data = nullb;
1972 strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
1973
1974 if (nullb->dev->zoned) {
1975 rv = null_register_zoned_dev(nullb);
1976 if (rv)
1977 goto out_ida_free;
1978 }
1979
1980 rv = add_disk(nullb->disk);
9ae2d0aa 1981 if (rv)
ee452a8d 1982 goto out_ida_free;
a514379b 1983
a514379b 1984 list_add_tail(&nullb->list, &nullb_list);
3681c85d 1985
db060f54
DLM
1986 pr_info("disk %s created\n", nullb->disk_name);
1987
f2298c04 1988 return 0;
ee452a8d
DC
1989
1990out_ida_free:
1991 ida_free(&nullb_indexes, nullb->index);
132226b3 1992out_cleanup_disk:
8b9ab626 1993 put_disk(nullb->disk);
fbbd5d3a
DLM
1994out_cleanup_zone:
1995 null_free_zoned_dev(dev);
24d2f903 1996out_cleanup_tags:
8b631f9c 1997 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1998 blk_mq_free_tag_set(nullb->tag_set);
24d2f903 1999out_cleanup_queues:
8b631f9c 2000 kfree(nullb->queues);
24d2f903
CH
2001out_free_nullb:
2002 kfree(nullb);
2004bfde 2003 dev->nullb = NULL;
24d2f903 2004out:
dc501dc0 2005 return rv;
f2298c04
JA
2006}
2007
49c3b926
DLM
2008static struct nullb *null_find_dev_by_name(const char *name)
2009{
2010 struct nullb *nullb = NULL, *nb;
2011
2012 mutex_lock(&lock);
2013 list_for_each_entry(nb, &nullb_list, list) {
2014 if (strcmp(nb->disk_name, name) == 0) {
2015 nullb = nb;
2016 break;
2017 }
2018 }
2019 mutex_unlock(&lock);
2020
2021 return nullb;
2022}
2023
b3a0a73e
DLM
2024static int null_create_dev(void)
2025{
2026 struct nullb_device *dev;
2027 int ret;
2028
2029 dev = null_alloc_dev();
2030 if (!dev)
2031 return -ENOMEM;
2032
a2db328b 2033 mutex_lock(&lock);
b3a0a73e 2034 ret = null_add_dev(dev);
a2db328b 2035 mutex_unlock(&lock);
b3a0a73e
DLM
2036 if (ret) {
2037 null_free_dev(dev);
2038 return ret;
2039 }
2040
2041 return 0;
2042}
2043
2044static void null_destroy_dev(struct nullb *nullb)
2045{
2046 struct nullb_device *dev = nullb->dev;
2047
2048 null_del_dev(nullb);
8cfb9819 2049 null_free_device_storage(dev, false);
b3a0a73e
DLM
2050 null_free_dev(dev);
2051}
2052
f2298c04
JA
2053static int __init null_init(void)
2054{
af096e22 2055 int ret = 0;
f2298c04 2056 unsigned int i;
af096e22 2057 struct nullb *nullb;
f2298c04 2058
2984c868 2059 if (g_bs > PAGE_SIZE) {
9c7eddf1
AA
2060 pr_warn("invalid block size\n");
2061 pr_warn("defaults block size to %lu\n", PAGE_SIZE);
2984c868 2062 g_bs = PAGE_SIZE;
9967d8ac 2063 }
f2298c04 2064
7ff684a6 2065 if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
9c7eddf1 2066 pr_err("invalid home_node value\n");
7ff684a6
JP
2067 g_home_node = NUMA_NO_NODE;
2068 }
2069
bb4c19e0
AM
2070 if (!null_setup_fault())
2071 return -EINVAL;
2072
e50b1e32 2073 if (g_queue_mode == NULL_Q_RQ) {
525323d2 2074 pr_err("legacy IO path is no longer available\n");
e50b1e32
JA
2075 return -EINVAL;
2076 }
525323d2 2077
8b631f9c 2078 if (g_use_per_node_hctx) {
2984c868 2079 if (g_submit_queues != nr_online_nodes) {
9c7eddf1 2080 pr_warn("submit_queues param is set to %u.\n",
525323d2 2081 nr_online_nodes);
2984c868 2082 g_submit_queues = nr_online_nodes;
fc1bc354 2083 }
525323d2 2084 } else if (g_submit_queues > nr_cpu_ids) {
2984c868 2085 g_submit_queues = nr_cpu_ids;
525323d2 2086 } else if (g_submit_queues <= 0) {
2984c868 2087 g_submit_queues = 1;
525323d2 2088 }
f2298c04 2089
3bf2bd20
SL
2090 config_group_init(&nullb_subsys.su_group);
2091 mutex_init(&nullb_subsys.su_mutex);
2092
2093 ret = configfs_register_subsystem(&nullb_subsys);
2094 if (ret)
14509b74 2095 return ret;
3bf2bd20 2096
f2298c04
JA
2097 mutex_init(&lock);
2098
f2298c04 2099 null_major = register_blkdev(0, "nullb");
db2d153d
MG
2100 if (null_major < 0) {
2101 ret = null_major;
3bf2bd20 2102 goto err_conf;
db2d153d 2103 }
f2298c04
JA
2104
2105 for (i = 0; i < nr_devices; i++) {
b3a0a73e
DLM
2106 ret = null_create_dev();
2107 if (ret)
af096e22 2108 goto err_dev;
f2298c04
JA
2109 }
2110
9c7eddf1 2111 pr_info("module loaded\n");
f2298c04 2112 return 0;
af096e22
MH
2113
2114err_dev:
2115 while (!list_empty(&nullb_list)) {
2116 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2117 null_destroy_dev(nullb);
af096e22 2118 }
af096e22 2119 unregister_blkdev(null_major, "nullb");
3bf2bd20
SL
2120err_conf:
2121 configfs_unregister_subsystem(&nullb_subsys);
af096e22 2122 return ret;
f2298c04
JA
2123}
2124
2125static void __exit null_exit(void)
2126{
2127 struct nullb *nullb;
2128
3bf2bd20
SL
2129 configfs_unregister_subsystem(&nullb_subsys);
2130
f2298c04
JA
2131 unregister_blkdev(null_major, "nullb");
2132
2133 mutex_lock(&lock);
2134 while (!list_empty(&nullb_list)) {
2135 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2136 null_destroy_dev(nullb);
f2298c04
JA
2137 }
2138 mutex_unlock(&lock);
6bb9535b 2139
14509b74 2140 if (tag_set.ops)
82f402fe 2141 blk_mq_free_tag_set(&tag_set);
07d1b998
ZY
2142
2143 mutex_destroy(&lock);
f2298c04
JA
2144}
2145
2146module_init(null_init);
2147module_exit(null_exit);
2148
231b3db1 2149MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
9e6727f8 2150MODULE_DESCRIPTION("multi queue aware block test driver");
f2298c04 2151MODULE_LICENSE("GPL");