]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Nov 2020 12:23:31 +0000 (13:23 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Nov 2020 12:23:31 +0000 (13:23 +0100)
added patches:
null_blk-fix-locking-in-zoned-mode.patch
null_blk-fix-zone-reset-all-tracing.patch

queue-5.9/null_blk-fix-locking-in-zoned-mode.patch [new file with mode: 0644]
queue-5.9/null_blk-fix-zone-reset-all-tracing.patch [new file with mode: 0644]
queue-5.9/series

diff --git a/queue-5.9/null_blk-fix-locking-in-zoned-mode.patch b/queue-5.9/null_blk-fix-locking-in-zoned-mode.patch
new file mode 100644 (file)
index 0000000..ca754e3
--- /dev/null
@@ -0,0 +1,324 @@
+From aa1c09cb65e2ed17cb8e652bc7ec84e0af1229eb Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@wdc.com>
+Date: Thu, 29 Oct 2020 20:05:00 +0900
+Subject: null_blk: Fix locking in zoned mode
+
+From: Damien Le Moal <damien.lemoal@wdc.com>
+
+commit aa1c09cb65e2ed17cb8e652bc7ec84e0af1229eb upstream.
+
+When the zoned mode is enabled in null_blk, Serializing read, write
+and zone management operations for each zone is necessary to protect
+device level information for managing zone resources (zone open and
+closed counters) as well as each zone condition and write pointer
+position. Commit 35bc10b2eafb ("null_blk: synchronization fix for
+zoned device") introduced a spinlock to implement this serialization.
+However, when memory backing is also enabled, GFP_NOIO memory
+allocations are executed under the spinlock, resulting in might_sleep()
+warnings. Furthermore, the zone_lock spinlock is locked/unlocked using
+spin_lock_irq/spin_unlock_irq, similarly to the memory backing code with
+the nullb->lock spinlock. This nested use of irq locks wrecks the irq
+enabled/disabled state.
+
+Fix all this by introducing a bitmap for per-zone lock, with locking
+implemented using wait_on_bit_lock_io() and clear_and_wake_up_bit().
+This locking mechanism allows keeping a zone locked while executing
+null_process_cmd(), serializing all operations to the zone while
+allowing to sleep during memory backing allocation with GFP_NOIO.
+Device level zone resource management information is protected using
+a spinlock which is not held while executing null_process_cmd();
+
+Fixes: 35bc10b2eafb ("null_blk: synchronization fix for zoned device")
+Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/block/null_blk.h       |    2 
+ drivers/block/null_blk_zoned.c |  133 +++++++++++++++++++++++++++++------------
+ 2 files changed, 98 insertions(+), 37 deletions(-)
+
+--- a/drivers/block/null_blk.h
++++ b/drivers/block/null_blk.h
+@@ -44,7 +44,7 @@ struct nullb_device {
+       unsigned int nr_zones;
+       struct blk_zone *zones;
+       sector_t zone_size_sects;
+-      spinlock_t zone_lock;
++      unsigned long *zone_locks;
+       unsigned long size; /* device size in MB */
+       unsigned long completion_nsec; /* time in ns to complete a request */
+--- a/drivers/block/null_blk_zoned.c
++++ b/drivers/block/null_blk_zoned.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <linux/vmalloc.h>
++#include <linux/bitmap.h>
+ #include "null_blk.h"
+ #define CREATE_TRACE_POINTS
+@@ -45,7 +46,12 @@ int null_init_zoned_dev(struct nullb_dev
+       if (!dev->zones)
+               return -ENOMEM;
+-      spin_lock_init(&dev->zone_lock);
++      dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
++      if (!dev->zone_locks) {
++              kvfree(dev->zones);
++              return -ENOMEM;
++      }
++
+       if (dev->zone_nr_conv >= dev->nr_zones) {
+               dev->zone_nr_conv = dev->nr_zones - 1;
+               pr_info("changed the number of conventional zones to %u",
+@@ -106,15 +112,26 @@ int null_register_zoned_dev(struct nullb
+ void null_free_zoned_dev(struct nullb_device *dev)
+ {
++      bitmap_free(dev->zone_locks);
+       kvfree(dev->zones);
+ }
++static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
++{
++      wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
++}
++
++static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
++{
++      clear_and_wake_up_bit(zno, dev->zone_locks);
++}
++
+ int null_report_zones(struct gendisk *disk, sector_t sector,
+               unsigned int nr_zones, report_zones_cb cb, void *data)
+ {
+       struct nullb *nullb = disk->private_data;
+       struct nullb_device *dev = nullb->dev;
+-      unsigned int first_zone, i;
++      unsigned int first_zone, i, zno;
+       struct blk_zone zone;
+       int error;
+@@ -125,17 +142,17 @@ int null_report_zones(struct gendisk *di
+       nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+       trace_nullb_report_zones(nullb, nr_zones);
+-      for (i = 0; i < nr_zones; i++) {
++      zno = first_zone;
++      for (i = 0; i < nr_zones; i++, zno++) {
+               /*
+                * Stacked DM target drivers will remap the zone information by
+                * modifying the zone information passed to the report callback.
+                * So use a local copy to avoid corruption of the device zone
+                * array.
+                */
+-              spin_lock_irq(&dev->zone_lock);
+-              memcpy(&zone, &dev->zones[first_zone + i],
+-                     sizeof(struct blk_zone));
+-              spin_unlock_irq(&dev->zone_lock);
++              null_lock_zone(dev, zno);
++              memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
++              null_unlock_zone(dev, zno);
+               error = cb(&zone, i, data);
+               if (error)
+@@ -145,6 +162,10 @@ int null_report_zones(struct gendisk *di
+       return nr_zones;
+ }
++/*
++ * This is called in the case of memory backing from null_process_cmd()
++ * with the target zone already locked.
++ */
+ size_t null_zone_valid_read_len(struct nullb *nullb,
+                               sector_t sector, unsigned int len)
+ {
+@@ -176,10 +197,13 @@ static blk_status_t null_zone_write(stru
+       if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+               return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
++      null_lock_zone(dev, zno);
++
+       switch (zone->cond) {
+       case BLK_ZONE_COND_FULL:
+               /* Cannot write to a full zone */
+-              return BLK_STS_IOERR;
++              ret = BLK_STS_IOERR;
++              break;
+       case BLK_ZONE_COND_EMPTY:
+       case BLK_ZONE_COND_IMP_OPEN:
+       case BLK_ZONE_COND_EXP_OPEN:
+@@ -197,68 +221,96 @@ static blk_status_t null_zone_write(stru
+                       else
+                               cmd->rq->__sector = sector;
+               } else if (sector != zone->wp) {
+-                      return BLK_STS_IOERR;
++                      ret = BLK_STS_IOERR;
++                      break;
+               }
+-              if (zone->wp + nr_sectors > zone->start + zone->capacity)
+-                      return BLK_STS_IOERR;
++              if (zone->wp + nr_sectors > zone->start + zone->capacity) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
+               if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
+                       zone->cond = BLK_ZONE_COND_IMP_OPEN;
+               ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+               if (ret != BLK_STS_OK)
+-                      return ret;
++                      break;
+               zone->wp += nr_sectors;
+               if (zone->wp == zone->start + zone->capacity)
+                       zone->cond = BLK_ZONE_COND_FULL;
+-              return BLK_STS_OK;
++              ret = BLK_STS_OK;
++              break;
+       default:
+               /* Invalid zone condition */
+-              return BLK_STS_IOERR;
++              ret = BLK_STS_IOERR;
+       }
++
++      null_unlock_zone(dev, zno);
++
++      return ret;
+ }
+ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+                                  sector_t sector)
+ {
+       struct nullb_device *dev = cmd->nq->dev;
+-      unsigned int zone_no = null_zone_no(dev, sector);
+-      struct blk_zone *zone = &dev->zones[zone_no];
++      unsigned int zone_no;
++      struct blk_zone *zone;
++      blk_status_t ret = BLK_STS_OK;
+       size_t i;
+-      switch (op) {
+-      case REQ_OP_ZONE_RESET_ALL:
++      if (op == REQ_OP_ZONE_RESET_ALL) {
+               for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++                      null_lock_zone(dev, i);
+                       zone = &dev->zones[i];
+                       if (zone->cond != BLK_ZONE_COND_EMPTY) {
+                               zone->cond = BLK_ZONE_COND_EMPTY;
+                               zone->wp = zone->start;
+                               trace_nullb_zone_op(cmd, i, zone->cond);
+                       }
++                      null_unlock_zone(dev, i);
+               }
+               return BLK_STS_OK;
++      }
++
++      zone_no = null_zone_no(dev, sector);
++      zone = &dev->zones[zone_no];
++
++      null_lock_zone(dev, zone_no);
++
++      switch (op) {
+       case REQ_OP_ZONE_RESET:
+-              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-                      return BLK_STS_IOERR;
++              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
+               zone->cond = BLK_ZONE_COND_EMPTY;
+               zone->wp = zone->start;
+               break;
+       case REQ_OP_ZONE_OPEN:
+-              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-                      return BLK_STS_IOERR;
+-              if (zone->cond == BLK_ZONE_COND_FULL)
+-                      return BLK_STS_IOERR;
++              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
++              if (zone->cond == BLK_ZONE_COND_FULL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
+               zone->cond = BLK_ZONE_COND_EXP_OPEN;
+               break;
+       case REQ_OP_ZONE_CLOSE:
+-              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-                      return BLK_STS_IOERR;
+-              if (zone->cond == BLK_ZONE_COND_FULL)
+-                      return BLK_STS_IOERR;
++              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
++              if (zone->cond == BLK_ZONE_COND_FULL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
+               if (zone->wp == zone->start)
+                       zone->cond = BLK_ZONE_COND_EMPTY;
+@@ -266,27 +318,35 @@ static blk_status_t null_zone_mgmt(struc
+                       zone->cond = BLK_ZONE_COND_CLOSED;
+               break;
+       case REQ_OP_ZONE_FINISH:
+-              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-                      return BLK_STS_IOERR;
++              if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++                      ret = BLK_STS_IOERR;
++                      break;
++              }
+               zone->cond = BLK_ZONE_COND_FULL;
+               zone->wp = zone->start + zone->len;
++              ret = BLK_STS_OK;
+               break;
+       default:
+-              return BLK_STS_NOTSUPP;
++              ret = BLK_STS_NOTSUPP;
++              break;
+       }
+-      trace_nullb_zone_op(cmd, zone_no, zone->cond);
+-      return BLK_STS_OK;
++      if (ret == BLK_STS_OK)
++              trace_nullb_zone_op(cmd, zone_no, zone->cond);
++
++      null_unlock_zone(dev, zone_no);
++
++      return ret;
+ }
+ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
+                                   sector_t sector, sector_t nr_sectors)
+ {
+-      blk_status_t sts;
+       struct nullb_device *dev = cmd->nq->dev;
++      unsigned int zno = null_zone_no(dev, sector);
++      blk_status_t sts;
+-      spin_lock_irq(&dev->zone_lock);
+       switch (op) {
+       case REQ_OP_WRITE:
+               sts = null_zone_write(cmd, sector, nr_sectors, false);
+@@ -302,9 +362,10 @@ blk_status_t null_process_zoned_cmd(stru
+               sts = null_zone_mgmt(cmd, op, sector);
+               break;
+       default:
++              null_lock_zone(dev, zno);
+               sts = null_process_cmd(cmd, op, sector, nr_sectors);
++              null_unlock_zone(dev, zno);
+       }
+-      spin_unlock_irq(&dev->zone_lock);
+       return sts;
+ }
diff --git a/queue-5.9/null_blk-fix-zone-reset-all-tracing.patch b/queue-5.9/null_blk-fix-zone-reset-all-tracing.patch
new file mode 100644 (file)
index 0000000..968cc5e
--- /dev/null
@@ -0,0 +1,53 @@
+From f9c9104288da543cd64f186f9e2fba389f415630 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@wdc.com>
+Date: Thu, 29 Oct 2020 20:04:59 +0900
+Subject: null_blk: Fix zone reset all tracing
+
+From: Damien Le Moal <damien.lemoal@wdc.com>
+
+commit f9c9104288da543cd64f186f9e2fba389f415630 upstream.
+
+In the cae of the REQ_OP_ZONE_RESET_ALL operation, the command sector is
+ignored and the operation is applied to all sequential zones. For these
+commands, tracing the effect of the command using the command sector to
+determine the target zone is thus incorrect.
+
+Fix null_zone_mgmt() zone condition tracing in the case of
+REQ_OP_ZONE_RESET_ALL to apply tracing to all sequential zones that are
+not already empty.
+
+Fixes: 766c3297d7e1 ("null_blk: add trace in null_blk_zoned.c")
+Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/block/null_blk_zoned.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/block/null_blk_zoned.c
++++ b/drivers/block/null_blk_zoned.c
+@@ -230,13 +230,15 @@ static blk_status_t null_zone_mgmt(struc
+       switch (op) {
+       case REQ_OP_ZONE_RESET_ALL:
+-              for (i = 0; i < dev->nr_zones; i++) {
+-                      if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
+-                              continue;
+-                      zone[i].cond = BLK_ZONE_COND_EMPTY;
+-                      zone[i].wp = zone[i].start;
++              for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++                      zone = &dev->zones[i];
++                      if (zone->cond != BLK_ZONE_COND_EMPTY) {
++                              zone->cond = BLK_ZONE_COND_EMPTY;
++                              zone->wp = zone->start;
++                              trace_nullb_zone_op(cmd, i, zone->cond);
++                      }
+               }
+-              break;
++              return BLK_STS_OK;
+       case REQ_OP_ZONE_RESET:
+               if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+                       return BLK_STS_IOERR;
index 2857957d5dc96220cf7b5ead80548218d7645867..87c3b3a4a27b7f29d05b563c92aa4ebcd1db3c6f 100644 (file)
@@ -387,3 +387,5 @@ vhost_vdpa-return-efault-if-copy_from_user-fails.patch
 vdpa-mlx5-fix-error-return-in-map_direct_mr.patch
 time-prevent-undefined-behaviour-in-timespec64_to_ns.patch
 time-sched_clock-mark-sched_clock_read_begin-retry-as-notrace.patch
+null_blk-fix-zone-reset-all-tracing.patch
+null_blk-fix-locking-in-zoned-mode.patch