4.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)
diff --git a/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch

new file mode 100644 (file)

index 0000000..f78c2a8
--- /dev/null
+++ b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
@@ -0,0 +1,48 @@
+From 72961c4e6082be79825265d9193272b8a1634dec Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 17 Apr 2018 17:08:52 -0600
+Subject: bfq-iosched: ensure to clear bic/bfqq pointers when preparing request
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 72961c4e6082be79825265d9193272b8a1634dec upstream.
+
+Even if we don't have an IO context attached to a request, we still
+need to clear the priv[0..1] pointers, as they could be pointing
+to previously used bic/bfqq structures. If we don't do so, we'll
+either corrupt memory on dispatching a request, or cause an
+imbalance in counters.
+
+Inspired by a fix from Kees.
+
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Reported-by: Kees Cook <keescook@chromium.org>
+Cc: stable@vger.kernel.org
+Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/bfq-iosched.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4911,8 +4911,16 @@ static void bfq_prepare_request(struct r
+       bool new_queue = false;
+       bool bfqq_already_existing = false, split = false;
+ 
+-      if (!rq->elv.icq)
++      /*
++       * Even if we don't have an icq attached, we should still clear
++       * the scheduler pointers, as they might point to previously
++       * allocated bic/bfqq structs.
++       */
++      if (!rq->elv.icq) {
++              rq->elv.priv[0] = rq->elv.priv[1] = NULL;
+               return;
++      }
++
+       bic = icq_to_bic(rq->elv.icq);
+ 
+       spin_lock_irq(&bfqd->lock);
diff --git a/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch

new file mode 100644 (file)

index 0000000..e4fb483
--- /dev/null
+++ b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch
@@ -0,0 +1,64 @@
+From f4560231ec42092c6662acccabb28c6cac9f5dfb Mon Sep 17 00:00:00 2001
+From: Jianchao Wang <jianchao.w.wang@oracle.com>
+Date: Tue, 17 Apr 2018 11:46:20 +0800
+Subject: blk-mq: start request gstate with gen 1
+
+From: Jianchao Wang <jianchao.w.wang@oracle.com>
+
+commit f4560231ec42092c6662acccabb28c6cac9f5dfb upstream.
+
+rq->gstate and rq->aborted_gstate both are zero before rqs are
+allocated. If we have a small timeout, when the timer fires,
+there could be rqs that are never allocated, and also there could
+be rq that has been allocated but not initialized and started. At
+the moment, the rq->gstate and rq->aborted_gstate both are 0, thus
+the blk_mq_terminate_expired will identify the rq is timed out and
+invoke .timeout early.
+
+For scsi, this will cause scsi_times_out to be invoked before the
+scsi_cmnd is not initialized, scsi_cmnd->device is still NULL at
+the moment, then we will get crash.
+
+Cc: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Martin Steigerwald <Martin@Lichtvoll.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 ++++
+ block/blk-mq.c   |    7 +++++++
+ 2 files changed, 11 insertions(+)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -129,6 +129,10 @@ void blk_rq_init(struct request_queue *q
+       rq->part = NULL;
+       seqcount_init(&rq->gstate_seq);
+       u64_stats_init(&rq->aborted_gstate_sync);
++      /*
++       * See comment of blk_mq_init_request
++       */
++      WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
+ }
+ EXPORT_SYMBOL(blk_rq_init);
+ 
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -2076,6 +2076,13 @@ static int blk_mq_init_request(struct bl
+ 
+       seqcount_init(&rq->gstate_seq);
+       u64_stats_init(&rq->aborted_gstate_sync);
++      /*
++       * start gstate with gen 1 instead of 0, otherwise it will be equal
++       * to aborted_gstate, and be identified timed out by
++       * blk_mq_terminate_expired.
++       */
++      WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
++
+       return 0;
+ }
+ 
diff --git a/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch

new file mode 100644 (file)

index 0000000..77eec75
--- /dev/null
+++ b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch
@@ -0,0 +1,74 @@
+From 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 Mon Sep 17 00:00:00 2001
+From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+Date: Thu, 12 Apr 2018 19:11:58 +0100
+Subject: block: do not use interruptible wait anywhere
+
+From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+
+commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream.
+
+When blk_queue_enter() waits for a queue to unfreeze, or unset the
+PREEMPT_ONLY flag, do not allow it to be interrupted by a signal.
+
+The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec
+("block, scsi: Make SCSI quiesce and resume work reliably").  Note the SCSI
+device is resumed asynchronously, i.e. after un-freezing userspace tasks.
+
+So that commit exposed the bug as a regression in v4.15.  A mysterious
+SIGBUS (or -EIO) sometimes happened during the time the device was being
+resumed.  Most frequently, there was no kernel log message, and we saw Xorg
+or Xwayland killed by SIGBUS.[1]
+
+[1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979
+
+Without this fix, I get an IO error in this test:
+
+# dd if=/dev/sda of=/dev/null iflag=direct & \
+  while killall -SIGUSR1 dd; do sleep 0.1; done & \
+  echo mem > /sys/power/state ; \
+  sleep 5; killall dd  # stop after 5 seconds
+
+The interruptible wait was added to blk_queue_enter in
+commit 3ef28e83ab15 ("block: generic request_queue reference counting").
+Before then, the interruptible wait was only in blk-mq, but I don't think
+it could ever have been correct.
+
+Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -829,7 +829,6 @@ int blk_queue_enter(struct request_queue
+ 
+       while (true) {
+               bool success = false;
+-              int ret;
+ 
+               rcu_read_lock();
+               if (percpu_ref_tryget_live(&q->q_usage_counter)) {
+@@ -861,14 +860,12 @@ int blk_queue_enter(struct request_queue
+                */
+               smp_rmb();
+ 
+-              ret = wait_event_interruptible(q->mq_freeze_wq,
+-                              (atomic_read(&q->mq_freeze_depth) == 0 &&
+-                               (preempt || !blk_queue_preempt_only(q))) ||
+-                              blk_queue_dying(q));
++              wait_event(q->mq_freeze_wq,
++                         (atomic_read(&q->mq_freeze_depth) == 0 &&
++                          (preempt || !blk_queue_preempt_only(q))) ||
++                         blk_queue_dying(q));
+               if (blk_queue_dying(q))
+                       return -ENODEV;
+-              if (ret)
+-                      return ret;
+       }
+ }
+ 
diff --git a/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch

new file mode 100644 (file)

index 0000000..50023d1
--- /dev/null
+++ b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch
@@ -0,0 +1,47 @@
+From 3e14c6abbfb5c94506edda9d8e2c145d79375798 Mon Sep 17 00:00:00 2001
+From: Dmitry Vyukov <dvyukov@google.com>
+Date: Wed, 11 Apr 2018 17:22:43 +0200
+Subject: kobject: don't use WARN for registration failures
+
+From: Dmitry Vyukov <dvyukov@google.com>
+
+commit 3e14c6abbfb5c94506edda9d8e2c145d79375798 upstream.
+
+This WARNING proved to be noisy. The function still returns an error
+and callers should handle it. That's how most of kernel code works.
+Downgrade the WARNING to pr_err() and leave WARNINGs for kernel bugs.
+
+Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: syzbot+209c0f67f99fec8eb14b@syzkaller.appspotmail.com
+Reported-by: syzbot+7fb6d9525a4528104e05@syzkaller.appspotmail.com
+Reported-by: syzbot+2e63711063e2d8f9ea27@syzkaller.appspotmail.com
+Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/kobject.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/lib/kobject.c
++++ b/lib/kobject.c
+@@ -232,14 +232,12 @@ static int kobject_add_internal(struct k
+ 
+               /* be noisy on error issues */
+               if (error == -EEXIST)
+-                      WARN(1, "%s failed for %s with "
+-                           "-EEXIST, don't try to register things with "
+-                           "the same name in the same directory.\n",
+-                           __func__, kobject_name(kobj));
++                      pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
++                             __func__, kobject_name(kobj));
+               else
+-                      WARN(1, "%s failed for %s (error: %d parent: %s)\n",
+-                           __func__, kobject_name(kobj), error,
+-                           parent ? kobject_name(parent) : "'none'");
++                      pr_err("%s failed for %s (error: %d parent: %s)\n",
++                             __func__, kobject_name(kobj), error,
++                             parent ? kobject_name(parent) : "'none'");
+       } else
+               kobj->state_in_sysfs = 1;
+ 
diff --git a/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch

new file mode 100644 (file)

index 0000000..1518844
--- /dev/null
+++ b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
@@ -0,0 +1,123 @@
+From f6997bec6af43396ff530caee79e178d32774a49 Mon Sep 17 00:00:00 2001
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+Date: Wed, 25 Apr 2018 16:16:32 +0200
+Subject: mtd: rawnand: marvell: fix the chip-select DT parsing logic
+
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+
+commit f6997bec6af43396ff530caee79e178d32774a49 upstream.
+
+The block responsible of parsing the DT for the number of chip-select
+lines uses an 'if/else if/else if' block. The content of the second and
+third 'else if' conditions are:
+        1/ the actual condition to enter the sub-block and
+        2/ the operation to do in this sub-block.
+
+        [...]
+        else if (condition1_to_enter && action1() == failed)
+                raise_error();
+        else if (condition2_to_enter && action2() == failed)
+                raise_error();
+        [...]
+
+In case of failure, the sub-block is entered and an error raised.
+Otherwise, in case of success, the code would continue erroneously in
+the next 'else if' statement because it did not failed (and did not
+enter the first 'else if' sub-block).
+
+The first 'else if' refers to legacy bindings while the second 'else if'
+refers to new bindings. The second 'else if', which is entered
+erroneously, checks for the 'reg' property, which, for old bindings,
+does not mean anything because it would not be the number of CS
+available, but the regular register map of almost any DT node. This
+being said, the content of the 'reg' property being the register map
+offset and length, it has '2' values, so the number of CS in this
+situation is assumed to be '2'.
+
+When running nand_scan_ident() with 2 CS, the core will check for an
+array of chips. It will first issue a RESET and then a READ_ID. Of
+course this will trigger two timeouts because there is no chip in front
+of the second CS:
+
+[    1.367460] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000080)
+[    1.474292] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000280)
+
+Indeed, this is harmless and the core will then assume there is only one
+valid CS.
+
+Fix the logic in the whole block by entering each sub-block just on the
+'is legacy' condition, doing the action inside the sub-block. This way,
+when the action succeeds, the whole block is left.
+
+Furthermore, for both the old bindings and the new bindings the same
+logic was applied to retrieve the number of CS lines:
+using of_get_property() to get a size in bytes, converted in the actual
+number of lines by dividing it per sizeof(u32) (4 bytes).
+
+This is fine for the 'reg' property which is a list of the CS IDs but
+not for the 'num-cs' property which is directly the value of the number
+of CS.
+
+Anyway, no existing DT uses another value than 'num-cs = <1>' and no
+other value has ever been supported by the old driver (pxa3xx_nand.c).
+Remove this condition and apply a number of 1 CS anyway, as already
+described in the bindings.
+
+Finally, the 'reg' property of a 'nand' node (with the new bindings)
+gives the IDs of each CS line in use. marvell_nand.c driver first look
+at the number of CS lines that are present in this property.
+
+Better use of_property_count_elems_of_size() than dividing by 4 the size
+of the number of bytes returned by of_get_property().
+
+Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Tested-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/marvell_nand.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/drivers/mtd/nand/marvell_nand.c
++++ b/drivers/mtd/nand/marvell_nand.c
+@@ -2277,29 +2277,20 @@ static int marvell_nand_chip_init(struct
+       /*
+        * The legacy "num-cs" property indicates the number of CS on the only
+        * chip connected to the controller (legacy bindings does not support
+-       * more than one chip). CS are only incremented one by one while the RB
+-       * pin is always the #0.
++       * more than one chip). The CS and RB pins are always the #0.
+        *
+        * When not using legacy bindings, a couple of "reg" and "nand-rb"
+        * properties must be filled. For each chip, expressed as a subnode,
+        * "reg" points to the CS lines and "nand-rb" to the RB line.
+        */
+-      if (pdata) {
++      if (pdata || nfc->caps->legacy_of_bindings) {
+               nsels = 1;
+-      } else if (nfc->caps->legacy_of_bindings &&
+-                 !of_get_property(np, "num-cs", &nsels)) {
+-              dev_err(dev, "missing num-cs property\n");
+-              return -EINVAL;
+-      } else if (!of_get_property(np, "reg", &nsels)) {
+-              dev_err(dev, "missing reg property\n");
+-              return -EINVAL;
+-      }
+-
+-      if (!pdata)
+-              nsels /= sizeof(u32);
+-      if (!nsels) {
+-              dev_err(dev, "invalid reg property size\n");
+-              return -EINVAL;
++      } else {
++              nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32));
++              if (nsels <= 0) {
++                      dev_err(dev, "missing/invalid reg property\n");
++                      return -EINVAL;
++              }
+       }
+ 
+       /* Alloc the nand chip structure */
diff --git a/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch

new file mode 100644 (file)

index 0000000..6c7becb
--- /dev/null
+++ b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
@@ -0,0 +1,34 @@
+From 007b4e8b705a4eff184d567c5a8b496622f9e116 Mon Sep 17 00:00:00 2001
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Date: Thu, 5 Apr 2018 14:57:59 +0200
+Subject: mtd: rawnand: tango: Fix struct clk memory leak
+
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+
+commit 007b4e8b705a4eff184d567c5a8b496622f9e116 upstream.
+
+Use devm_clk_get() to let Linux manage struct clk memory.
+
+Fixes: 6956e2385a16 ("add tango NAND flash controller support")
+Cc: stable@vger.kernel.org
+Reported-by: Xidong Wang <wangxidong_97@163.com>
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/tango_nand.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mtd/nand/tango_nand.c
++++ b/drivers/mtd/nand/tango_nand.c
+@@ -643,7 +643,7 @@ static int tango_nand_probe(struct platf
+ 
+       writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE);
+ 
+-      clk = clk_get(&pdev->dev, NULL);
++      clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+ 
diff --git a/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch

new file mode 100644 (file)

index 0000000..b9d0aeb
--- /dev/null
+++ b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
@@ -0,0 +1,36 @@
+From 505aa4b6a8834a2300971c5220c380c3271ebde3 Mon Sep 17 00:00:00 2001
+From: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+Date: Tue, 17 Apr 2018 17:03:12 +0530
+Subject: scsi: sd: Defer spinning up drive while SANITIZE is in progress
+
+From: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+
+commit 505aa4b6a8834a2300971c5220c380c3271ebde3 upstream.
+
+A drive being sanitized will return NOT READY / ASC 0x4 / ASCQ
+0x1b ("LOGICAL UNIT NOT READY. SANITIZE IN PROGRESS").
+
+Prevent spinning up the drive until this condition clears.
+
+[mkp: tweaked commit message]
+
+Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -2121,6 +2121,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
+                               break;  /* standby */
+                       if (sshdr.asc == 4 && sshdr.ascq == 0xc)
+                               break;  /* unavailable */
++                      if (sshdr.asc == 4 && sshdr.ascq == 0x1b)
++                              break;  /* sanitize in progress */
+                       /*
+                        * Issue command to spin up drive when not ready
+                        */
diff --git a/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch

new file mode 100644 (file)

index 0000000..0b4defe
--- /dev/null
+++ b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
@@ -0,0 +1,342 @@
+From ccce20fc7968d546fb1e8e147bf5cdc8afc4278a Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Mon, 16 Apr 2018 18:04:41 -0700
+Subject: scsi: sd_zbc: Avoid that resetting a zone fails sporadically
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit ccce20fc7968d546fb1e8e147bf5cdc8afc4278a upstream.
+
+Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
+called from sd_probe_async() and since sd_revalidate_disk() calls
+sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
+concurrently with blkdev_report_zones() and/or blkdev_reset_zones().  That can
+cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
+q->nr_zones to zero before restoring it to the actual value, even if no drive
+characteristics have changed.  Avoid that this can happen by making the
+following changes:
+
+- Protect the code that updates zone information with blk_queue_enter()
+  and blk_queue_exit().
+- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
+  these functions do not modify struct scsi_disk before all zone
+  information has been obtained.
+
+Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track
+legacy requests"; kernel v4.15) the request queue freezing mechanism also
+affects legacy request queues.
+
+Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Damien Le Moal <damien.lemoal@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: stable@vger.kernel.org # v4.16
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd_zbc.c  |  140 ++++++++++++++++++++++++++++---------------------
+ include/linux/blkdev.h |    5 +
+ 2 files changed, 87 insertions(+), 58 deletions(-)
+
+--- a/drivers/scsi/sd_zbc.c
++++ b/drivers/scsi/sd_zbc.c
+@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct
+  *
+  * Check that all zones of the device are equal. The last zone can however
+  * be smaller. The zone size must also be a power of two number of LBAs.
++ *
++ * Returns the zone size in bytes upon success or an error code upon failure.
+  */
+-static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
++static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+ {
+       u64 zone_blocks = 0;
+       sector_t block = 0;
+@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct
+       int ret;
+       u8 same;
+ 
+-      sdkp->zone_blocks = 0;
+-
+       /* Get a buffer */
+       buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
+       if (!buf)
+@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct
+ 
+               /* Parse zone descriptors */
+               while (rec < buf + buf_len) {
+-                      zone_blocks = get_unaligned_be64(&rec[8]);
+-                      if (sdkp->zone_blocks == 0) {
+-                              sdkp->zone_blocks = zone_blocks;
+-                      } else if (zone_blocks != sdkp->zone_blocks &&
+-                                 (block + zone_blocks < sdkp->capacity
+-                                  || zone_blocks > sdkp->zone_blocks)) {
+-                              zone_blocks = 0;
++                      u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
++
++                      if (zone_blocks == 0) {
++                              zone_blocks = this_zone_blocks;
++                      } else if (this_zone_blocks != zone_blocks &&
++                                 (block + this_zone_blocks < sdkp->capacity
++                                  || this_zone_blocks > zone_blocks)) {
++                              this_zone_blocks = 0;
+                               goto out;
+                       }
+-                      block += zone_blocks;
++                      block += this_zone_blocks;
+                       rec += 64;
+               }
+ 
+@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct
+ 
+       } while (block < sdkp->capacity);
+ 
+-      zone_blocks = sdkp->zone_blocks;
+-
+ out:
+       if (!zone_blocks) {
+               if (sdkp->first_scan)
+@@ -488,8 +487,7 @@ out:
+                                 "Zone size too large\n");
+               ret = -ENODEV;
+       } else {
+-              sdkp->zone_blocks = zone_blocks;
+-              sdkp->zone_shift = ilog2(zone_blocks);
++              ret = zone_blocks;
+       }
+ 
+ out_free:
+@@ -500,21 +498,21 @@ out_free:
+ 
+ /**
+  * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
+- * @sdkp: The disk of the bitmap
++ * @nr_zones: Number of zones to allocate space for.
++ * @numa_node: NUMA node to allocate the memory from.
+  */
+-static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
++static inline unsigned long *
++sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
+ {
+-      struct request_queue *q = sdkp->disk->queue;
+-
+-      return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
+-                          * sizeof(unsigned long),
+-                          GFP_KERNEL, q->node);
++      return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
++                          GFP_KERNEL, numa_node);
+ }
+ 
+ /**
+  * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
+  * @sdkp: disk used
+  * @buf: report reply buffer
++ * @zone_shift: logarithm base 2 of the number of blocks in a zone
+  * @seq_zone_bitamp: bitmap of sequential zones to set
+  *
+  * Parse reported zone descriptors in @buf to identify sequential zones and
+@@ -524,7 +522,7 @@ static inline unsigned long *sd_zbc_allo
+  * Return the LBA after the last zone reported.
+  */
+ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
+-                                   unsigned int buflen,
++                                   unsigned int buflen, u32 zone_shift,
+                                    unsigned long *seq_zones_bitmap)
+ {
+       sector_t lba, next_lba = sdkp->capacity;
+@@ -543,7 +541,7 @@ static sector_t sd_zbc_get_seq_zones(str
+               if (type != ZBC_ZONE_TYPE_CONV &&
+                   cond != ZBC_ZONE_COND_READONLY &&
+                   cond != ZBC_ZONE_COND_OFFLINE)
+-                      set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
++                      set_bit(lba >> zone_shift, seq_zones_bitmap);
+               next_lba = lba + get_unaligned_be64(&rec[8]);
+               rec += 64;
+       }
+@@ -552,12 +550,16 @@ static sector_t sd_zbc_get_seq_zones(str
+ }
+ 
+ /**
+- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
++ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
+  * @sdkp: target disk
++ * @zone_shift: logarithm base 2 of the number of blocks in a zone
++ * @nr_zones: number of zones to set up a seq zone bitmap for
+  *
+  * Allocate a zone bitmap and initialize it by identifying sequential zones.
+  */
+-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
++static unsigned long *
++sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
++                            u32 nr_zones)
+ {
+       struct request_queue *q = sdkp->disk->queue;
+       unsigned long *seq_zones_bitmap;
+@@ -565,9 +567,9 @@ static int sd_zbc_setup_seq_zones_bitmap
+       unsigned char *buf;
+       int ret = -ENOMEM;
+ 
+-      seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
++      seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
+       if (!seq_zones_bitmap)
+-              return -ENOMEM;
++              return ERR_PTR(-ENOMEM);
+ 
+       buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
+       if (!buf)
+@@ -578,7 +580,7 @@ static int sd_zbc_setup_seq_zones_bitmap
+               if (ret)
+                       goto out;
+               lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
+-                                         seq_zones_bitmap);
++                                         zone_shift, seq_zones_bitmap);
+       }
+ 
+       if (lba != sdkp->capacity) {
+@@ -590,12 +592,9 @@ out:
+       kfree(buf);
+       if (ret) {
+               kfree(seq_zones_bitmap);
+-              return ret;
++              return ERR_PTR(ret);
+       }
+-
+-      q->seq_zones_bitmap = seq_zones_bitmap;
+-
+-      return 0;
++      return seq_zones_bitmap;
+ }
+ 
+ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
+@@ -611,44 +610,64 @@ static void sd_zbc_cleanup(struct scsi_d
+       q->nr_zones = 0;
+ }
+ 
+-static int sd_zbc_setup(struct scsi_disk *sdkp)
++static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
+ {
+       struct request_queue *q = sdkp->disk->queue;
++      u32 zone_shift = ilog2(zone_blocks);
++      u32 nr_zones;
+       int ret;
+ 
+-      /* READ16/WRITE16 is mandatory for ZBC disks */
+-      sdkp->device->use_16_for_rw = 1;
+-      sdkp->device->use_10_for_rw = 0;
+-
+       /* chunk_sectors indicates the zone size */
+-      blk_queue_chunk_sectors(sdkp->disk->queue,
+-                      logical_to_sectors(sdkp->device, sdkp->zone_blocks));
+-      sdkp->nr_zones =
+-              round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
++      blk_queue_chunk_sectors(q,
++                      logical_to_sectors(sdkp->device, zone_blocks));
++      nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
+ 
+       /*
+        * Initialize the device request queue information if the number
+        * of zones changed.
+        */
+-      if (sdkp->nr_zones != q->nr_zones) {
+-
+-              sd_zbc_cleanup(sdkp);
+-
+-              q->nr_zones = sdkp->nr_zones;
+-              if (sdkp->nr_zones) {
+-                      q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
+-                      if (!q->seq_zones_wlock) {
++      if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
++              unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
++              size_t zone_bitmap_size;
++
++              if (nr_zones) {
++                      seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
++                                                                 q->node);
++                      if (!seq_zones_wlock) {
+                               ret = -ENOMEM;
+                               goto err;
+                       }
+ 
+-                      ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
+-                      if (ret) {
+-                              sd_zbc_cleanup(sdkp);
++                      seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
++                                                      zone_shift, nr_zones);
++                      if (IS_ERR(seq_zones_bitmap)) {
++                              ret = PTR_ERR(seq_zones_bitmap);
++                              kfree(seq_zones_wlock);
+                               goto err;
+                       }
+               }
+-
++              zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
++                      sizeof(unsigned long);
++              blk_mq_freeze_queue(q);
++              if (q->nr_zones != nr_zones) {
++                      /* READ16/WRITE16 is mandatory for ZBC disks */
++                      sdkp->device->use_16_for_rw = 1;
++                      sdkp->device->use_10_for_rw = 0;
++
++                      sdkp->zone_blocks = zone_blocks;
++                      sdkp->zone_shift = zone_shift;
++                      sdkp->nr_zones = nr_zones;
++                      q->nr_zones = nr_zones;
++                      swap(q->seq_zones_wlock, seq_zones_wlock);
++                      swap(q->seq_zones_bitmap, seq_zones_bitmap);
++              } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
++                                zone_bitmap_size) != 0) {
++                      memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
++                             zone_bitmap_size);
++              }
++              blk_mq_unfreeze_queue(q);
++              kfree(seq_zones_wlock);
++              kfree(seq_zones_bitmap);
+       }
+ 
+       return 0;
+@@ -660,6 +679,7 @@ err:
+ 
+ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
+ {
++      int64_t zone_blocks;
+       int ret;
+ 
+       if (!sd_is_zoned(sdkp))
+@@ -696,12 +716,16 @@ int sd_zbc_read_zones(struct scsi_disk *
+        * Check zone size: only devices with a constant zone size (except
+        * an eventual last runt zone) that is a power of 2 are supported.
+        */
+-      ret = sd_zbc_check_zone_size(sdkp);
+-      if (ret)
++      zone_blocks = sd_zbc_check_zone_size(sdkp);
++      ret = -EFBIG;
++      if (zone_blocks != (u32)zone_blocks)
++              goto err;
++      ret = zone_blocks;
++      if (ret < 0)
+               goto err;
+ 
+       /* The drive satisfies the kernel restrictions: set it up */
+-      ret = sd_zbc_setup(sdkp);
++      ret = sd_zbc_setup(sdkp, zone_blocks);
+       if (ret)
+               goto err;
+ 
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -605,6 +605,11 @@ struct request_queue {
+        * initialized by the low level device driver (e.g. scsi/sd.c).
+        * Stacking drivers (device mappers) may or may not initialize
+        * these fields.
++       *
++       * Reads of this information must be protected with blk_queue_enter() /
++       * blk_queue_exit(). Modifying this information is only allowed while
++       * no requests are being processed. See also blk_mq_freeze_queue() and
++       * blk_mq_unfreeze_queue().
+        */
+       unsigned int            nr_zones;
+       unsigned long           *seq_zones_bitmap;
diff --git a/queue-4.16/series b/queue-4.16/series

index 726778039491b2c7b327459f5f517de7416590d6..c94945cdb4c369c84fc526e1b287367c3593597c 100644 (file)
--- a/queue-4.16/series
+++ b/queue-4.16/series
@@ -55,3 +55,12 @@ mtd-spi-nor-cadence-quadspi-fix-page-fault-kernel-panic.patch
  mtd-cfi-cmdset_0001-do-not-allow-read-write-to-suspend-erase-block.patch
  mtd-cfi-cmdset_0001-workaround-micron-erase-suspend-bug.patch
  mtd-cfi-cmdset_0002-do-not-allow-read-write-to-suspend-erase-block.patch
+mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
+mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
+kobject-don-t-use-warn-for-registration-failures.patch
+scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
+scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
+blk-mq-start-request-gstate-with-gen-1.patch
+bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
+block-do-not-use-interruptible-wait-anywhere.patch
+vfio-ccw-process-ssch-with-interrupts-disabled.patch
diff --git a/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch

new file mode 100644 (file)

index 0000000..7f3de27
--- /dev/null
+++ b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch
@@ -0,0 +1,78 @@
+From 3368e547c52b96586f0edf9657ca12b94d8e61a7 Mon Sep 17 00:00:00 2001
+From: Cornelia Huck <cohuck@redhat.com>
+Date: Fri, 20 Apr 2018 10:24:04 +0200
+Subject: vfio: ccw: process ssch with interrupts disabled
+
+From: Cornelia Huck <cohuck@redhat.com>
+
+commit 3368e547c52b96586f0edf9657ca12b94d8e61a7 upstream.
+
+When we call ssch, an interrupt might already be pending once we
+return from the START SUBCHANNEL instruction. Therefore we need to
+make sure interrupts are disabled while holding the subchannel lock
+until after we're done with our processing.
+
+Cc: stable@vger.kernel.org #v4.12+
+Reviewed-by: Dong Jia Shi <bjsdjshi@linux.ibm.com>
+Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
+Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
+Signed-off-by: Cornelia Huck <cohuck@redhat.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/vfio_ccw_fsm.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/drivers/s390/cio/vfio_ccw_fsm.c
++++ b/drivers/s390/cio/vfio_ccw_fsm.c
+@@ -20,12 +20,12 @@ static int fsm_io_helper(struct vfio_ccw
+       int ccode;
+       __u8 lpm;
+       unsigned long flags;
++      int ret;
+ 
+       sch = private->sch;
+ 
+       spin_lock_irqsave(sch->lock, flags);
+       private->state = VFIO_CCW_STATE_BUSY;
+-      spin_unlock_irqrestore(sch->lock, flags);
+ 
+       orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+ 
+@@ -38,10 +38,12 @@ static int fsm_io_helper(struct vfio_ccw
+                * Initialize device status information
+                */
+               sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
+-              return 0;
++              ret = 0;
++              break;
+       case 1:         /* Status pending */
+       case 2:         /* Busy */
+-              return -EBUSY;
++              ret = -EBUSY;
++              break;
+       case 3:         /* Device/path not operational */
+       {
+               lpm = orb->cmd.lpm;
+@@ -51,13 +53,16 @@ static int fsm_io_helper(struct vfio_ccw
+                       sch->lpm = 0;
+ 
+               if (cio_update_schib(sch))
+-                      return -ENODEV;
+-
+-              return sch->lpm ? -EACCES : -ENODEV;
++                      ret = -ENODEV;
++              else
++                      ret = sch->lpm ? -EACCES : -ENODEV;
++              break;
+       }
+       default:
+-              return ccode;
++              ret = ccode;
+       }
++      spin_unlock_irqrestore(sch->lock, flags);
++      return ret;
+ }
+ 
+ static void fsm_notoper(struct vfio_ccw_private *private,
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 29 Apr 2018 11:49:27 +0000 (13:49 +0200)
queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch	[new file with mode: 0644]	patch \| blob
queue-4.16/series		patch \| blob \| blame \| history
queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch	[new file with mode: 0644]	patch \| blob