From: Greg Kroah-Hartman Date: Wed, 4 Nov 2020 12:23:31 +0000 (+0100) Subject: 5.9-stable patches X-Git-Tag: v4.14.204~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=14b7a4e39c504cc297e10c95035aedd3e3510e7e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.9-stable patches added patches: null_blk-fix-locking-in-zoned-mode.patch null_blk-fix-zone-reset-all-tracing.patch --- diff --git a/queue-5.9/null_blk-fix-locking-in-zoned-mode.patch b/queue-5.9/null_blk-fix-locking-in-zoned-mode.patch new file mode 100644 index 00000000000..ca754e34e7e --- /dev/null +++ b/queue-5.9/null_blk-fix-locking-in-zoned-mode.patch @@ -0,0 +1,324 @@ +From aa1c09cb65e2ed17cb8e652bc7ec84e0af1229eb Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 29 Oct 2020 20:05:00 +0900 +Subject: null_blk: Fix locking in zoned mode + +From: Damien Le Moal + +commit aa1c09cb65e2ed17cb8e652bc7ec84e0af1229eb upstream. + +When the zoned mode is enabled in null_blk, Serializing read, write +and zone management operations for each zone is necessary to protect +device level information for managing zone resources (zone open and +closed counters) as well as each zone condition and write pointer +position. Commit 35bc10b2eafb ("null_blk: synchronization fix for +zoned device") introduced a spinlock to implement this serialization. +However, when memory backing is also enabled, GFP_NOIO memory +allocations are executed under the spinlock, resulting in might_sleep() +warnings. Furthermore, the zone_lock spinlock is locked/unlocked using +spin_lock_irq/spin_unlock_irq, similarly to the memory backing code with +the nullb->lock spinlock. This nested use of irq locks wrecks the irq +enabled/disabled state. + +Fix all this by introducing a bitmap for per-zone lock, with locking +implemented using wait_on_bit_lock_io() and clear_and_wake_up_bit(). +This locking mechanism allows keeping a zone locked while executing +null_process_cmd(), serializing all operations to the zone while +allowing to sleep during memory backing allocation with GFP_NOIO. +Device level zone resource management information is protected using +a spinlock which is not held while executing null_process_cmd(); + +Fixes: 35bc10b2eafb ("null_blk: synchronization fix for zoned device") +Signed-off-by: Damien Le Moal +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/block/null_blk.h | 2 + drivers/block/null_blk_zoned.c | 133 +++++++++++++++++++++++++++++------------ + 2 files changed, 98 insertions(+), 37 deletions(-) + +--- a/drivers/block/null_blk.h ++++ b/drivers/block/null_blk.h +@@ -44,7 +44,7 @@ struct nullb_device { + unsigned int nr_zones; + struct blk_zone *zones; + sector_t zone_size_sects; +- spinlock_t zone_lock; ++ unsigned long *zone_locks; + + unsigned long size; /* device size in MB */ + unsigned long completion_nsec; /* time in ns to complete a request */ +--- a/drivers/block/null_blk_zoned.c ++++ b/drivers/block/null_blk_zoned.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + #include ++#include + #include "null_blk.h" + + #define CREATE_TRACE_POINTS +@@ -45,7 +46,12 @@ int null_init_zoned_dev(struct nullb_dev + if (!dev->zones) + return -ENOMEM; + +- spin_lock_init(&dev->zone_lock); ++ dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); ++ if (!dev->zone_locks) { ++ kvfree(dev->zones); ++ return -ENOMEM; ++ } ++ + if (dev->zone_nr_conv >= dev->nr_zones) { + dev->zone_nr_conv = dev->nr_zones - 1; + pr_info("changed the number of conventional zones to %u", +@@ -106,15 +112,26 @@ int null_register_zoned_dev(struct nullb + + void null_free_zoned_dev(struct nullb_device *dev) + { ++ bitmap_free(dev->zone_locks); + kvfree(dev->zones); + } + ++static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) ++{ ++ wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); ++} ++ ++static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) ++{ ++ clear_and_wake_up_bit(zno, dev->zone_locks); ++} ++ + int null_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data) + { + struct nullb *nullb = disk->private_data; + struct nullb_device *dev = nullb->dev; +- unsigned int first_zone, i; ++ unsigned int first_zone, i, zno; + struct blk_zone zone; + int error; + +@@ -125,17 +142,17 @@ int null_report_zones(struct gendisk *di + nr_zones = min(nr_zones, dev->nr_zones - first_zone); + trace_nullb_report_zones(nullb, nr_zones); + +- for (i = 0; i < nr_zones; i++) { ++ zno = first_zone; ++ for (i = 0; i < nr_zones; i++, zno++) { + /* + * Stacked DM target drivers will remap the zone information by + * modifying the zone information passed to the report callback. + * So use a local copy to avoid corruption of the device zone + * array. + */ +- spin_lock_irq(&dev->zone_lock); +- memcpy(&zone, &dev->zones[first_zone + i], +- sizeof(struct blk_zone)); +- spin_unlock_irq(&dev->zone_lock); ++ null_lock_zone(dev, zno); ++ memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone)); ++ null_unlock_zone(dev, zno); + + error = cb(&zone, i, data); + if (error) +@@ -145,6 +162,10 @@ int null_report_zones(struct gendisk *di + return nr_zones; + } + ++/* ++ * This is called in the case of memory backing from null_process_cmd() ++ * with the target zone already locked. ++ */ + size_t null_zone_valid_read_len(struct nullb *nullb, + sector_t sector, unsigned int len) + { +@@ -176,10 +197,13 @@ static blk_status_t null_zone_write(stru + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + ++ null_lock_zone(dev, zno); ++ + switch (zone->cond) { + case BLK_ZONE_COND_FULL: + /* Cannot write to a full zone */ +- return BLK_STS_IOERR; ++ ret = BLK_STS_IOERR; ++ break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: +@@ -197,68 +221,96 @@ static blk_status_t null_zone_write(stru + else + cmd->rq->__sector = sector; + } else if (sector != zone->wp) { +- return BLK_STS_IOERR; ++ ret = BLK_STS_IOERR; ++ break; + } + +- if (zone->wp + nr_sectors > zone->start + zone->capacity) +- return BLK_STS_IOERR; ++ if (zone->wp + nr_sectors > zone->start + zone->capacity) { ++ ret = BLK_STS_IOERR; ++ break; ++ } + + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + if (ret != BLK_STS_OK) +- return ret; ++ break; + + zone->wp += nr_sectors; + if (zone->wp == zone->start + zone->capacity) + zone->cond = BLK_ZONE_COND_FULL; +- return BLK_STS_OK; ++ ret = BLK_STS_OK; ++ break; + default: + /* Invalid zone condition */ +- return BLK_STS_IOERR; ++ ret = BLK_STS_IOERR; + } ++ ++ null_unlock_zone(dev, zno); ++ ++ return ret; + } + + static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, + sector_t sector) + { + struct nullb_device *dev = cmd->nq->dev; +- unsigned int zone_no = null_zone_no(dev, sector); +- struct blk_zone *zone = &dev->zones[zone_no]; ++ unsigned int zone_no; ++ struct blk_zone *zone; ++ blk_status_t ret = BLK_STS_OK; + size_t i; + +- switch (op) { +- case REQ_OP_ZONE_RESET_ALL: ++ if (op == REQ_OP_ZONE_RESET_ALL) { + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { ++ null_lock_zone(dev, i); + zone = &dev->zones[i]; + if (zone->cond != BLK_ZONE_COND_EMPTY) { + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + trace_nullb_zone_op(cmd, i, zone->cond); + } ++ null_unlock_zone(dev, i); + } + return BLK_STS_OK; ++ } ++ ++ zone_no = null_zone_no(dev, sector); ++ zone = &dev->zones[zone_no]; ++ ++ null_lock_zone(dev, zone_no); ++ ++ switch (op) { + case REQ_OP_ZONE_RESET: +- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) +- return BLK_STS_IOERR; ++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } + + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + break; + case REQ_OP_ZONE_OPEN: +- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) +- return BLK_STS_IOERR; +- if (zone->cond == BLK_ZONE_COND_FULL) +- return BLK_STS_IOERR; ++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } ++ if (zone->cond == BLK_ZONE_COND_FULL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } + + zone->cond = BLK_ZONE_COND_EXP_OPEN; + break; + case REQ_OP_ZONE_CLOSE: +- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) +- return BLK_STS_IOERR; +- if (zone->cond == BLK_ZONE_COND_FULL) +- return BLK_STS_IOERR; ++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } ++ if (zone->cond == BLK_ZONE_COND_FULL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } + + if (zone->wp == zone->start) + zone->cond = BLK_ZONE_COND_EMPTY; +@@ -266,27 +318,35 @@ static blk_status_t null_zone_mgmt(struc + zone->cond = BLK_ZONE_COND_CLOSED; + break; + case REQ_OP_ZONE_FINISH: +- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) +- return BLK_STS_IOERR; ++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { ++ ret = BLK_STS_IOERR; ++ break; ++ } + + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = zone->start + zone->len; ++ ret = BLK_STS_OK; + break; + default: +- return BLK_STS_NOTSUPP; ++ ret = BLK_STS_NOTSUPP; ++ break; + } + +- trace_nullb_zone_op(cmd, zone_no, zone->cond); +- return BLK_STS_OK; ++ if (ret == BLK_STS_OK) ++ trace_nullb_zone_op(cmd, zone_no, zone->cond); ++ ++ null_unlock_zone(dev, zone_no); ++ ++ return ret; + } + + blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, + sector_t sector, sector_t nr_sectors) + { +- blk_status_t sts; + struct nullb_device *dev = cmd->nq->dev; ++ unsigned int zno = null_zone_no(dev, sector); ++ blk_status_t sts; + +- spin_lock_irq(&dev->zone_lock); + switch (op) { + case REQ_OP_WRITE: + sts = null_zone_write(cmd, sector, nr_sectors, false); +@@ -302,9 +362,10 @@ blk_status_t null_process_zoned_cmd(stru + sts = null_zone_mgmt(cmd, op, sector); + break; + default: ++ null_lock_zone(dev, zno); + sts = null_process_cmd(cmd, op, sector, nr_sectors); ++ null_unlock_zone(dev, zno); + } +- spin_unlock_irq(&dev->zone_lock); + + return sts; + } diff --git a/queue-5.9/null_blk-fix-zone-reset-all-tracing.patch b/queue-5.9/null_blk-fix-zone-reset-all-tracing.patch new file mode 100644 index 00000000000..968cc5eae25 --- /dev/null +++ b/queue-5.9/null_blk-fix-zone-reset-all-tracing.patch @@ -0,0 +1,53 @@ +From f9c9104288da543cd64f186f9e2fba389f415630 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 29 Oct 2020 20:04:59 +0900 +Subject: null_blk: Fix zone reset all tracing + +From: Damien Le Moal + +commit f9c9104288da543cd64f186f9e2fba389f415630 upstream. + +In the cae of the REQ_OP_ZONE_RESET_ALL operation, the command sector is +ignored and the operation is applied to all sequential zones. For these +commands, tracing the effect of the command using the command sector to +determine the target zone is thus incorrect. + +Fix null_zone_mgmt() zone condition tracing in the case of +REQ_OP_ZONE_RESET_ALL to apply tracing to all sequential zones that are +not already empty. + +Fixes: 766c3297d7e1 ("null_blk: add trace in null_blk_zoned.c") +Signed-off-by: Damien Le Moal +Cc: stable@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/block/null_blk_zoned.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/drivers/block/null_blk_zoned.c ++++ b/drivers/block/null_blk_zoned.c +@@ -230,13 +230,15 @@ static blk_status_t null_zone_mgmt(struc + + switch (op) { + case REQ_OP_ZONE_RESET_ALL: +- for (i = 0; i < dev->nr_zones; i++) { +- if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL) +- continue; +- zone[i].cond = BLK_ZONE_COND_EMPTY; +- zone[i].wp = zone[i].start; ++ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { ++ zone = &dev->zones[i]; ++ if (zone->cond != BLK_ZONE_COND_EMPTY) { ++ zone->cond = BLK_ZONE_COND_EMPTY; ++ zone->wp = zone->start; ++ trace_nullb_zone_op(cmd, i, zone->cond); ++ } + } +- break; ++ return BLK_STS_OK; + case REQ_OP_ZONE_RESET: + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; diff --git a/queue-5.9/series b/queue-5.9/series index 2857957d5dc..87c3b3a4a27 100644 --- a/queue-5.9/series +++ b/queue-5.9/series @@ -387,3 +387,5 @@ vhost_vdpa-return-efault-if-copy_from_user-fails.patch vdpa-mlx5-fix-error-return-in-map_direct_mr.patch time-prevent-undefined-behaviour-in-timespec64_to_ns.patch time-sched_clock-mark-sched_clock_read_begin-retry-as-notrace.patch +null_blk-fix-zone-reset-all-tracing.patch +null_blk-fix-locking-in-zoned-mode.patch