From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 29 Apr 2018 11:49:27 +0000 (+0200)
Subject: 4.16-stable patches
X-Git-Tag: v4.16.7~22
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad36eb46615811508d056b178a8530db55a0a4f2;p=thirdparty%2Fkernel%2Fstable-queue.git

4.16-stable patches

added patches:
	bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
	blk-mq-start-request-gstate-with-gen-1.patch
	block-do-not-use-interruptible-wait-anywhere.patch
	kobject-don-t-use-warn-for-registration-failures.patch
	mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
	mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
	scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
	scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
	vfio-ccw-process-ssch-with-interrupts-disabled.patch
---

diff --git a/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
new file mode 100644
index 00000000000..f78c2a86a4e
--- /dev/null
+++ b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
@@ -0,0 +1,48 @@
+From 72961c4e6082be79825265d9193272b8a1634dec Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 17 Apr 2018 17:08:52 -0600
+Subject: bfq-iosched: ensure to clear bic/bfqq pointers when preparing request
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 72961c4e6082be79825265d9193272b8a1634dec upstream.
+
+Even if we don't have an IO context attached to a request, we still
+need to clear the priv[0..1] pointers, as they could be pointing
+to previously used bic/bfqq structures. If we don't do so, we'll
+either corrupt memory on dispatching a request, or cause an
+imbalance in counters.
+
+Inspired by a fix from Kees.
+
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Reported-by: Kees Cook <keescook@chromium.org>
+Cc: stable@vger.kernel.org
+Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/bfq-iosched.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4911,8 +4911,16 @@ static void bfq_prepare_request(struct r
+ 	bool new_queue = false;
+ 	bool bfqq_already_existing = false, split = false;
+ 
+-	if (!rq->elv.icq)
++	/*
++	 * Even if we don't have an icq attached, we should still clear
++	 * the scheduler pointers, as they might point to previously
++	 * allocated bic/bfqq structs.
++	 */
++	if (!rq->elv.icq) {
++		rq->elv.priv[0] = rq->elv.priv[1] = NULL;
+ 		return;
++	}
++
+ 	bic = icq_to_bic(rq->elv.icq);
+ 
+ 	spin_lock_irq(&bfqd->lock);
diff --git a/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch
new file mode 100644
index 00000000000..e4fb483423a
--- /dev/null
+++ b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch
@@ -0,0 +1,64 @@
+From f4560231ec42092c6662acccabb28c6cac9f5dfb Mon Sep 17 00:00:00 2001
+From: Jianchao Wang <jianchao.w.wang@oracle.com>
+Date: Tue, 17 Apr 2018 11:46:20 +0800
+Subject: blk-mq: start request gstate with gen 1
+
+From: Jianchao Wang <jianchao.w.wang@oracle.com>
+
+commit f4560231ec42092c6662acccabb28c6cac9f5dfb upstream.
+
+rq->gstate and rq->aborted_gstate both are zero before rqs are
+allocated. If we have a small timeout, when the timer fires,
+there could be rqs that are never allocated, and also there could
+be rq that has been allocated but not initialized and started. At
+the moment, the rq->gstate and rq->aborted_gstate both are 0, thus
+the blk_mq_terminate_expired will identify the rq is timed out and
+invoke .timeout early.
+
+For scsi, this will cause scsi_times_out to be invoked before the
+scsi_cmnd is not initialized, scsi_cmnd->device is still NULL at
+the moment, then we will get crash.
+
+Cc: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Martin Steigerwald <Martin@Lichtvoll.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 ++++
+ block/blk-mq.c   |    7 +++++++
+ 2 files changed, 11 insertions(+)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -129,6 +129,10 @@ void blk_rq_init(struct request_queue *q
+ 	rq->part = NULL;
+ 	seqcount_init(&rq->gstate_seq);
+ 	u64_stats_init(&rq->aborted_gstate_sync);
++	/*
++	 * See comment of blk_mq_init_request
++	 */
++	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
+ }
+ EXPORT_SYMBOL(blk_rq_init);
+ 
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -2076,6 +2076,13 @@ static int blk_mq_init_request(struct bl
+ 
+ 	seqcount_init(&rq->gstate_seq);
+ 	u64_stats_init(&rq->aborted_gstate_sync);
++	/*
++	 * start gstate with gen 1 instead of 0, otherwise it will be equal
++	 * to aborted_gstate, and be identified timed out by
++	 * blk_mq_terminate_expired.
++	 */
++	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
++
+ 	return 0;
+ }
+ 
diff --git a/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch
new file mode 100644
index 00000000000..77eec75144e
--- /dev/null
+++ b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch
@@ -0,0 +1,74 @@
+From 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 Mon Sep 17 00:00:00 2001
+From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+Date: Thu, 12 Apr 2018 19:11:58 +0100
+Subject: block: do not use interruptible wait anywhere
+
+From: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+
+commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream.
+
+When blk_queue_enter() waits for a queue to unfreeze, or unset the
+PREEMPT_ONLY flag, do not allow it to be interrupted by a signal.
+
+The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec
+("block, scsi: Make SCSI quiesce and resume work reliably").  Note the SCSI
+device is resumed asynchronously, i.e. after un-freezing userspace tasks.
+
+So that commit exposed the bug as a regression in v4.15.  A mysterious
+SIGBUS (or -EIO) sometimes happened during the time the device was being
+resumed.  Most frequently, there was no kernel log message, and we saw Xorg
+or Xwayland killed by SIGBUS.[1]
+
+[1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979
+
+Without this fix, I get an IO error in this test:
+
+# dd if=/dev/sda of=/dev/null iflag=direct & \
+  while killall -SIGUSR1 dd; do sleep 0.1; done & \
+  echo mem > /sys/power/state ; \
+  sleep 5; killall dd  # stop after 5 seconds
+
+The interruptible wait was added to blk_queue_enter in
+commit 3ef28e83ab15 ("block: generic request_queue reference counting").
+Before then, the interruptible wait was only in blk-mq, but I don't think
+it could ever have been correct.
+
+Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Alan Jenkins <alan.christopher.jenkins@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -829,7 +829,6 @@ int blk_queue_enter(struct request_queue
+ 
+ 	while (true) {
+ 		bool success = false;
+-		int ret;
+ 
+ 		rcu_read_lock();
+ 		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
+@@ -861,14 +860,12 @@ int blk_queue_enter(struct request_queue
+ 		 */
+ 		smp_rmb();
+ 
+-		ret = wait_event_interruptible(q->mq_freeze_wq,
+-				(atomic_read(&q->mq_freeze_depth) == 0 &&
+-				 (preempt || !blk_queue_preempt_only(q))) ||
+-				blk_queue_dying(q));
++		wait_event(q->mq_freeze_wq,
++			   (atomic_read(&q->mq_freeze_depth) == 0 &&
++			    (preempt || !blk_queue_preempt_only(q))) ||
++			   blk_queue_dying(q));
+ 		if (blk_queue_dying(q))
+ 			return -ENODEV;
+-		if (ret)
+-			return ret;
+ 	}
+ }
+ 
diff --git a/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch
new file mode 100644
index 00000000000..50023d18229
--- /dev/null
+++ b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch
@@ -0,0 +1,47 @@
+From 3e14c6abbfb5c94506edda9d8e2c145d79375798 Mon Sep 17 00:00:00 2001
+From: Dmitry Vyukov <dvyukov@google.com>
+Date: Wed, 11 Apr 2018 17:22:43 +0200
+Subject: kobject: don't use WARN for registration failures
+
+From: Dmitry Vyukov <dvyukov@google.com>
+
+commit 3e14c6abbfb5c94506edda9d8e2c145d79375798 upstream.
+
+This WARNING proved to be noisy. The function still returns an error
+and callers should handle it. That's how most of kernel code works.
+Downgrade the WARNING to pr_err() and leave WARNINGs for kernel bugs.
+
+Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: syzbot+209c0f67f99fec8eb14b@syzkaller.appspotmail.com
+Reported-by: syzbot+7fb6d9525a4528104e05@syzkaller.appspotmail.com
+Reported-by: syzbot+2e63711063e2d8f9ea27@syzkaller.appspotmail.com
+Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/kobject.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/lib/kobject.c
++++ b/lib/kobject.c
+@@ -232,14 +232,12 @@ static int kobject_add_internal(struct k
+ 
+ 		/* be noisy on error issues */
+ 		if (error == -EEXIST)
+-			WARN(1, "%s failed for %s with "
+-			     "-EEXIST, don't try to register things with "
+-			     "the same name in the same directory.\n",
+-			     __func__, kobject_name(kobj));
++			pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
++			       __func__, kobject_name(kobj));
+ 		else
+-			WARN(1, "%s failed for %s (error: %d parent: %s)\n",
+-			     __func__, kobject_name(kobj), error,
+-			     parent ? kobject_name(parent) : "'none'");
++			pr_err("%s failed for %s (error: %d parent: %s)\n",
++			       __func__, kobject_name(kobj), error,
++			       parent ? kobject_name(parent) : "'none'");
+ 	} else
+ 		kobj->state_in_sysfs = 1;
+ 
diff --git a/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
new file mode 100644
index 00000000000..1518844a9d4
--- /dev/null
+++ b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
@@ -0,0 +1,123 @@
+From f6997bec6af43396ff530caee79e178d32774a49 Mon Sep 17 00:00:00 2001
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+Date: Wed, 25 Apr 2018 16:16:32 +0200
+Subject: mtd: rawnand: marvell: fix the chip-select DT parsing logic
+
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+
+commit f6997bec6af43396ff530caee79e178d32774a49 upstream.
+
+The block responsible of parsing the DT for the number of chip-select
+lines uses an 'if/else if/else if' block. The content of the second and
+third 'else if' conditions are:
+        1/ the actual condition to enter the sub-block and
+        2/ the operation to do in this sub-block.
+
+        [...]
+        else if (condition1_to_enter && action1() == failed)
+                raise_error();
+        else if (condition2_to_enter && action2() == failed)
+                raise_error();
+        [...]
+
+In case of failure, the sub-block is entered and an error raised.
+Otherwise, in case of success, the code would continue erroneously in
+the next 'else if' statement because it did not failed (and did not
+enter the first 'else if' sub-block).
+
+The first 'else if' refers to legacy bindings while the second 'else if'
+refers to new bindings. The second 'else if', which is entered
+erroneously, checks for the 'reg' property, which, for old bindings,
+does not mean anything because it would not be the number of CS
+available, but the regular register map of almost any DT node. This
+being said, the content of the 'reg' property being the register map
+offset and length, it has '2' values, so the number of CS in this
+situation is assumed to be '2'.
+
+When running nand_scan_ident() with 2 CS, the core will check for an
+array of chips. It will first issue a RESET and then a READ_ID. Of
+course this will trigger two timeouts because there is no chip in front
+of the second CS:
+
+[    1.367460] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000080)
+[    1.474292] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000280)
+
+Indeed, this is harmless and the core will then assume there is only one
+valid CS.
+
+Fix the logic in the whole block by entering each sub-block just on the
+'is legacy' condition, doing the action inside the sub-block. This way,
+when the action succeeds, the whole block is left.
+
+Furthermore, for both the old bindings and the new bindings the same
+logic was applied to retrieve the number of CS lines:
+using of_get_property() to get a size in bytes, converted in the actual
+number of lines by dividing it per sizeof(u32) (4 bytes).
+
+This is fine for the 'reg' property which is a list of the CS IDs but
+not for the 'num-cs' property which is directly the value of the number
+of CS.
+
+Anyway, no existing DT uses another value than 'num-cs = <1>' and no
+other value has ever been supported by the old driver (pxa3xx_nand.c).
+Remove this condition and apply a number of 1 CS anyway, as already
+described in the bindings.
+
+Finally, the 'reg' property of a 'nand' node (with the new bindings)
+gives the IDs of each CS line in use. marvell_nand.c driver first look
+at the number of CS lines that are present in this property.
+
+Better use of_property_count_elems_of_size() than dividing by 4 the size
+of the number of bytes returned by of_get_property().
+
+Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Tested-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/marvell_nand.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/drivers/mtd/nand/marvell_nand.c
++++ b/drivers/mtd/nand/marvell_nand.c
+@@ -2277,29 +2277,20 @@ static int marvell_nand_chip_init(struct
+ 	/*
+ 	 * The legacy "num-cs" property indicates the number of CS on the only
+ 	 * chip connected to the controller (legacy bindings does not support
+-	 * more than one chip). CS are only incremented one by one while the RB
+-	 * pin is always the #0.
++	 * more than one chip). The CS and RB pins are always the #0.
+ 	 *
+ 	 * When not using legacy bindings, a couple of "reg" and "nand-rb"
+ 	 * properties must be filled. For each chip, expressed as a subnode,
+ 	 * "reg" points to the CS lines and "nand-rb" to the RB line.
+ 	 */
+-	if (pdata) {
++	if (pdata || nfc->caps->legacy_of_bindings) {
+ 		nsels = 1;
+-	} else if (nfc->caps->legacy_of_bindings &&
+-		   !of_get_property(np, "num-cs", &nsels)) {
+-		dev_err(dev, "missing num-cs property\n");
+-		return -EINVAL;
+-	} else if (!of_get_property(np, "reg", &nsels)) {
+-		dev_err(dev, "missing reg property\n");
+-		return -EINVAL;
+-	}
+-
+-	if (!pdata)
+-		nsels /= sizeof(u32);
+-	if (!nsels) {
+-		dev_err(dev, "invalid reg property size\n");
+-		return -EINVAL;
++	} else {
++		nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32));
++		if (nsels <= 0) {
++			dev_err(dev, "missing/invalid reg property\n");
++			return -EINVAL;
++		}
+ 	}
+ 
+ 	/* Alloc the nand chip structure */
diff --git a/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
new file mode 100644
index 00000000000..6c7becbc5f8
--- /dev/null
+++ b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
@@ -0,0 +1,34 @@
+From 007b4e8b705a4eff184d567c5a8b496622f9e116 Mon Sep 17 00:00:00 2001
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Date: Thu, 5 Apr 2018 14:57:59 +0200
+Subject: mtd: rawnand: tango: Fix struct clk memory leak
+
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+
+commit 007b4e8b705a4eff184d567c5a8b496622f9e116 upstream.
+
+Use devm_clk_get() to let Linux manage struct clk memory.
+
+Fixes: 6956e2385a16 ("add tango NAND flash controller support")
+Cc: stable@vger.kernel.org
+Reported-by: Xidong Wang <wangxidong_97@163.com>
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/tango_nand.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mtd/nand/tango_nand.c
++++ b/drivers/mtd/nand/tango_nand.c
+@@ -643,7 +643,7 @@ static int tango_nand_probe(struct platf
+ 
+ 	writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE);
+ 
+-	clk = clk_get(&pdev->dev, NULL);
++	clk = devm_clk_get(&pdev->dev, NULL);
+ 	if (IS_ERR(clk))
+ 		return PTR_ERR(clk);
+ 
diff --git a/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
new file mode 100644
index 00000000000..b9d0aebf652
--- /dev/null
+++ b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
@@ -0,0 +1,36 @@
+From 505aa4b6a8834a2300971c5220c380c3271ebde3 Mon Sep 17 00:00:00 2001
+From: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+Date: Tue, 17 Apr 2018 17:03:12 +0530
+Subject: scsi: sd: Defer spinning up drive while SANITIZE is in progress
+
+From: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+
+commit 505aa4b6a8834a2300971c5220c380c3271ebde3 upstream.
+
+A drive being sanitized will return NOT READY / ASC 0x4 / ASCQ
+0x1b ("LOGICAL UNIT NOT READY. SANITIZE IN PROGRESS").
+
+Prevent spinning up the drive until this condition clears.
+
+[mkp: tweaked commit message]
+
+Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microsemi.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -2121,6 +2121,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
+ 				break;	/* standby */
+ 			if (sshdr.asc == 4 && sshdr.ascq == 0xc)
+ 				break;	/* unavailable */
++			if (sshdr.asc == 4 && sshdr.ascq == 0x1b)
++				break;	/* sanitize in progress */
+ 			/*
+ 			 * Issue command to spin up drive when not ready
+ 			 */
diff --git a/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
new file mode 100644
index 00000000000..0b4defe41cd
--- /dev/null
+++ b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
@@ -0,0 +1,342 @@
+From ccce20fc7968d546fb1e8e147bf5cdc8afc4278a Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Mon, 16 Apr 2018 18:04:41 -0700
+Subject: scsi: sd_zbc: Avoid that resetting a zone fails sporadically
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit ccce20fc7968d546fb1e8e147bf5cdc8afc4278a upstream.
+
+Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
+called from sd_probe_async() and since sd_revalidate_disk() calls
+sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
+concurrently with blkdev_report_zones() and/or blkdev_reset_zones().  That can
+cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
+q->nr_zones to zero before restoring it to the actual value, even if no drive
+characteristics have changed.  Avoid that this can happen by making the
+following changes:
+
+- Protect the code that updates zone information with blk_queue_enter()
+  and blk_queue_exit().
+- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
+  these functions do not modify struct scsi_disk before all zone
+  information has been obtained.
+
+Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track
+legacy requests"; kernel v4.15) the request queue freezing mechanism also
+affects legacy request queues.
+
+Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Damien Le Moal <damien.lemoal@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: stable@vger.kernel.org # v4.16
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd_zbc.c  |  140 ++++++++++++++++++++++++++++---------------------
+ include/linux/blkdev.h |    5 +
+ 2 files changed, 87 insertions(+), 58 deletions(-)
+
+--- a/drivers/scsi/sd_zbc.c
++++ b/drivers/scsi/sd_zbc.c
+@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct
+  *
+  * Check that all zones of the device are equal. The last zone can however
+  * be smaller. The zone size must also be a power of two number of LBAs.
++ *
++ * Returns the zone size in bytes upon success or an error code upon failure.
+  */
+-static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
++static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+ {
+ 	u64 zone_blocks = 0;
+ 	sector_t block = 0;
+@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct
+ 	int ret;
+ 	u8 same;
+ 
+-	sdkp->zone_blocks = 0;
+-
+ 	/* Get a buffer */
+ 	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
+ 	if (!buf)
+@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct
+ 
+ 		/* Parse zone descriptors */
+ 		while (rec < buf + buf_len) {
+-			zone_blocks = get_unaligned_be64(&rec[8]);
+-			if (sdkp->zone_blocks == 0) {
+-				sdkp->zone_blocks = zone_blocks;
+-			} else if (zone_blocks != sdkp->zone_blocks &&
+-				   (block + zone_blocks < sdkp->capacity
+-				    || zone_blocks > sdkp->zone_blocks)) {
+-				zone_blocks = 0;
++			u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
++
++			if (zone_blocks == 0) {
++				zone_blocks = this_zone_blocks;
++			} else if (this_zone_blocks != zone_blocks &&
++				   (block + this_zone_blocks < sdkp->capacity
++				    || this_zone_blocks > zone_blocks)) {
++				this_zone_blocks = 0;
+ 				goto out;
+ 			}
+-			block += zone_blocks;
++			block += this_zone_blocks;
+ 			rec += 64;
+ 		}
+ 
+@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct
+ 
+ 	} while (block < sdkp->capacity);
+ 
+-	zone_blocks = sdkp->zone_blocks;
+-
+ out:
+ 	if (!zone_blocks) {
+ 		if (sdkp->first_scan)
+@@ -488,8 +487,7 @@ out:
+ 				  "Zone size too large\n");
+ 		ret = -ENODEV;
+ 	} else {
+-		sdkp->zone_blocks = zone_blocks;
+-		sdkp->zone_shift = ilog2(zone_blocks);
++		ret = zone_blocks;
+ 	}
+ 
+ out_free:
+@@ -500,21 +498,21 @@ out_free:
+ 
+ /**
+  * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
+- * @sdkp: The disk of the bitmap
++ * @nr_zones: Number of zones to allocate space for.
++ * @numa_node: NUMA node to allocate the memory from.
+  */
+-static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
++static inline unsigned long *
++sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
+ {
+-	struct request_queue *q = sdkp->disk->queue;
+-
+-	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
+-			    * sizeof(unsigned long),
+-			    GFP_KERNEL, q->node);
++	return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
++			    GFP_KERNEL, numa_node);
+ }
+ 
+ /**
+  * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
+  * @sdkp: disk used
+  * @buf: report reply buffer
++ * @zone_shift: logarithm base 2 of the number of blocks in a zone
+  * @seq_zone_bitamp: bitmap of sequential zones to set
+  *
+  * Parse reported zone descriptors in @buf to identify sequential zones and
+@@ -524,7 +522,7 @@ static inline unsigned long *sd_zbc_allo
+  * Return the LBA after the last zone reported.
+  */
+ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
+-				     unsigned int buflen,
++				     unsigned int buflen, u32 zone_shift,
+ 				     unsigned long *seq_zones_bitmap)
+ {
+ 	sector_t lba, next_lba = sdkp->capacity;
+@@ -543,7 +541,7 @@ static sector_t sd_zbc_get_seq_zones(str
+ 		if (type != ZBC_ZONE_TYPE_CONV &&
+ 		    cond != ZBC_ZONE_COND_READONLY &&
+ 		    cond != ZBC_ZONE_COND_OFFLINE)
+-			set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
++			set_bit(lba >> zone_shift, seq_zones_bitmap);
+ 		next_lba = lba + get_unaligned_be64(&rec[8]);
+ 		rec += 64;
+ 	}
+@@ -552,12 +550,16 @@ static sector_t sd_zbc_get_seq_zones(str
+ }
+ 
+ /**
+- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
++ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
+  * @sdkp: target disk
++ * @zone_shift: logarithm base 2 of the number of blocks in a zone
++ * @nr_zones: number of zones to set up a seq zone bitmap for
+  *
+  * Allocate a zone bitmap and initialize it by identifying sequential zones.
+  */
+-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
++static unsigned long *
++sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
++			      u32 nr_zones)
+ {
+ 	struct request_queue *q = sdkp->disk->queue;
+ 	unsigned long *seq_zones_bitmap;
+@@ -565,9 +567,9 @@ static int sd_zbc_setup_seq_zones_bitmap
+ 	unsigned char *buf;
+ 	int ret = -ENOMEM;
+ 
+-	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
++	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
+ 	if (!seq_zones_bitmap)
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
+ 	if (!buf)
+@@ -578,7 +580,7 @@ static int sd_zbc_setup_seq_zones_bitmap
+ 		if (ret)
+ 			goto out;
+ 		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
+-					   seq_zones_bitmap);
++					   zone_shift, seq_zones_bitmap);
+ 	}
+ 
+ 	if (lba != sdkp->capacity) {
+@@ -590,12 +592,9 @@ out:
+ 	kfree(buf);
+ 	if (ret) {
+ 		kfree(seq_zones_bitmap);
+-		return ret;
++		return ERR_PTR(ret);
+ 	}
+-
+-	q->seq_zones_bitmap = seq_zones_bitmap;
+-
+-	return 0;
++	return seq_zones_bitmap;
+ }
+ 
+ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
+@@ -611,44 +610,64 @@ static void sd_zbc_cleanup(struct scsi_d
+ 	q->nr_zones = 0;
+ }
+ 
+-static int sd_zbc_setup(struct scsi_disk *sdkp)
++static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
+ {
+ 	struct request_queue *q = sdkp->disk->queue;
++	u32 zone_shift = ilog2(zone_blocks);
++	u32 nr_zones;
+ 	int ret;
+ 
+-	/* READ16/WRITE16 is mandatory for ZBC disks */
+-	sdkp->device->use_16_for_rw = 1;
+-	sdkp->device->use_10_for_rw = 0;
+-
+ 	/* chunk_sectors indicates the zone size */
+-	blk_queue_chunk_sectors(sdkp->disk->queue,
+-			logical_to_sectors(sdkp->device, sdkp->zone_blocks));
+-	sdkp->nr_zones =
+-		round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
++	blk_queue_chunk_sectors(q,
++			logical_to_sectors(sdkp->device, zone_blocks));
++	nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
+ 
+ 	/*
+ 	 * Initialize the device request queue information if the number
+ 	 * of zones changed.
+ 	 */
+-	if (sdkp->nr_zones != q->nr_zones) {
+-
+-		sd_zbc_cleanup(sdkp);
+-
+-		q->nr_zones = sdkp->nr_zones;
+-		if (sdkp->nr_zones) {
+-			q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
+-			if (!q->seq_zones_wlock) {
++	if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
++		unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
++		size_t zone_bitmap_size;
++
++		if (nr_zones) {
++			seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
++								   q->node);
++			if (!seq_zones_wlock) {
+ 				ret = -ENOMEM;
+ 				goto err;
+ 			}
+ 
+-			ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
+-			if (ret) {
+-				sd_zbc_cleanup(sdkp);
++			seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
++							zone_shift, nr_zones);
++			if (IS_ERR(seq_zones_bitmap)) {
++				ret = PTR_ERR(seq_zones_bitmap);
++				kfree(seq_zones_wlock);
+ 				goto err;
+ 			}
+ 		}
+-
++		zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
++			sizeof(unsigned long);
++		blk_mq_freeze_queue(q);
++		if (q->nr_zones != nr_zones) {
++			/* READ16/WRITE16 is mandatory for ZBC disks */
++			sdkp->device->use_16_for_rw = 1;
++			sdkp->device->use_10_for_rw = 0;
++
++			sdkp->zone_blocks = zone_blocks;
++			sdkp->zone_shift = zone_shift;
++			sdkp->nr_zones = nr_zones;
++			q->nr_zones = nr_zones;
++			swap(q->seq_zones_wlock, seq_zones_wlock);
++			swap(q->seq_zones_bitmap, seq_zones_bitmap);
++		} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
++				  zone_bitmap_size) != 0) {
++			memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
++			       zone_bitmap_size);
++		}
++		blk_mq_unfreeze_queue(q);
++		kfree(seq_zones_wlock);
++		kfree(seq_zones_bitmap);
+ 	}
+ 
+ 	return 0;
+@@ -660,6 +679,7 @@ err:
+ 
+ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
+ {
++	int64_t zone_blocks;
+ 	int ret;
+ 
+ 	if (!sd_is_zoned(sdkp))
+@@ -696,12 +716,16 @@ int sd_zbc_read_zones(struct scsi_disk *
+ 	 * Check zone size: only devices with a constant zone size (except
+ 	 * an eventual last runt zone) that is a power of 2 are supported.
+ 	 */
+-	ret = sd_zbc_check_zone_size(sdkp);
+-	if (ret)
++	zone_blocks = sd_zbc_check_zone_size(sdkp);
++	ret = -EFBIG;
++	if (zone_blocks != (u32)zone_blocks)
++		goto err;
++	ret = zone_blocks;
++	if (ret < 0)
+ 		goto err;
+ 
+ 	/* The drive satisfies the kernel restrictions: set it up */
+-	ret = sd_zbc_setup(sdkp);
++	ret = sd_zbc_setup(sdkp, zone_blocks);
+ 	if (ret)
+ 		goto err;
+ 
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -605,6 +605,11 @@ struct request_queue {
+ 	 * initialized by the low level device driver (e.g. scsi/sd.c).
+ 	 * Stacking drivers (device mappers) may or may not initialize
+ 	 * these fields.
++	 *
++	 * Reads of this information must be protected with blk_queue_enter() /
++	 * blk_queue_exit(). Modifying this information is only allowed while
++	 * no requests are being processed. See also blk_mq_freeze_queue() and
++	 * blk_mq_unfreeze_queue().
+ 	 */
+ 	unsigned int		nr_zones;
+ 	unsigned long		*seq_zones_bitmap;
diff --git a/queue-4.16/series b/queue-4.16/series
index 72677803949..c94945cdb4c 100644
--- a/queue-4.16/series
+++ b/queue-4.16/series
@@ -55,3 +55,12 @@ mtd-spi-nor-cadence-quadspi-fix-page-fault-kernel-panic.patch
 mtd-cfi-cmdset_0001-do-not-allow-read-write-to-suspend-erase-block.patch
 mtd-cfi-cmdset_0001-workaround-micron-erase-suspend-bug.patch
 mtd-cfi-cmdset_0002-do-not-allow-read-write-to-suspend-erase-block.patch
+mtd-rawnand-tango-fix-struct-clk-memory-leak.patch
+mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch
+kobject-don-t-use-warn-for-registration-failures.patch
+scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch
+scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch
+blk-mq-start-request-gstate-with-gen-1.patch
+bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch
+block-do-not-use-interruptible-wait-anywhere.patch
+vfio-ccw-process-ssch-with-interrupts-disabled.patch
diff --git a/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch
new file mode 100644
index 00000000000..7f3de2799ca
--- /dev/null
+++ b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch
@@ -0,0 +1,78 @@
+From 3368e547c52b96586f0edf9657ca12b94d8e61a7 Mon Sep 17 00:00:00 2001
+From: Cornelia Huck <cohuck@redhat.com>
+Date: Fri, 20 Apr 2018 10:24:04 +0200
+Subject: vfio: ccw: process ssch with interrupts disabled
+
+From: Cornelia Huck <cohuck@redhat.com>
+
+commit 3368e547c52b96586f0edf9657ca12b94d8e61a7 upstream.
+
+When we call ssch, an interrupt might already be pending once we
+return from the START SUBCHANNEL instruction. Therefore we need to
+make sure interrupts are disabled while holding the subchannel lock
+until after we're done with our processing.
+
+Cc: stable@vger.kernel.org #v4.12+
+Reviewed-by: Dong Jia Shi <bjsdjshi@linux.ibm.com>
+Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
+Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
+Signed-off-by: Cornelia Huck <cohuck@redhat.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/vfio_ccw_fsm.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/drivers/s390/cio/vfio_ccw_fsm.c
++++ b/drivers/s390/cio/vfio_ccw_fsm.c
+@@ -20,12 +20,12 @@ static int fsm_io_helper(struct vfio_ccw
+ 	int ccode;
+ 	__u8 lpm;
+ 	unsigned long flags;
++	int ret;
+ 
+ 	sch = private->sch;
+ 
+ 	spin_lock_irqsave(sch->lock, flags);
+ 	private->state = VFIO_CCW_STATE_BUSY;
+-	spin_unlock_irqrestore(sch->lock, flags);
+ 
+ 	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+ 
+@@ -38,10 +38,12 @@ static int fsm_io_helper(struct vfio_ccw
+ 		 * Initialize device status information
+ 		 */
+ 		sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
+-		return 0;
++		ret = 0;
++		break;
+ 	case 1:		/* Status pending */
+ 	case 2:		/* Busy */
+-		return -EBUSY;
++		ret = -EBUSY;
++		break;
+ 	case 3:		/* Device/path not operational */
+ 	{
+ 		lpm = orb->cmd.lpm;
+@@ -51,13 +53,16 @@ static int fsm_io_helper(struct vfio_ccw
+ 			sch->lpm = 0;
+ 
+ 		if (cio_update_schib(sch))
+-			return -ENODEV;
+-
+-		return sch->lpm ? -EACCES : -ENODEV;
++			ret = -ENODEV;
++		else
++			ret = sch->lpm ? -EACCES : -ENODEV;
++		break;
+ 	}
+ 	default:
+-		return ccode;
++		ret = ccode;
+ 	}
++	spin_unlock_irqrestore(sch->lock, flags);
++	return ret;
+ }
+ 
+ static void fsm_notoper(struct vfio_ccw_private *private,