From: Greg Kroah-Hartman Date: Sun, 29 Apr 2018 11:49:27 +0000 (+0200) Subject: 4.16-stable patches X-Git-Tag: v4.16.7~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad36eb46615811508d056b178a8530db55a0a4f2;p=thirdparty%2Fkernel%2Fstable-queue.git 4.16-stable patches added patches: bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch blk-mq-start-request-gstate-with-gen-1.patch block-do-not-use-interruptible-wait-anywhere.patch kobject-don-t-use-warn-for-registration-failures.patch mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch mtd-rawnand-tango-fix-struct-clk-memory-leak.patch scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch vfio-ccw-process-ssch-with-interrupts-disabled.patch --- diff --git a/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch new file mode 100644 index 00000000000..f78c2a86a4e --- /dev/null +++ b/queue-4.16/bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch @@ -0,0 +1,48 @@ +From 72961c4e6082be79825265d9193272b8a1634dec Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 17 Apr 2018 17:08:52 -0600 +Subject: bfq-iosched: ensure to clear bic/bfqq pointers when preparing request + +From: Jens Axboe + +commit 72961c4e6082be79825265d9193272b8a1634dec upstream. + +Even if we don't have an IO context attached to a request, we still +need to clear the priv[0..1] pointers, as they could be pointing +to previously used bic/bfqq structures. If we don't do so, we'll +either corrupt memory on dispatching a request, or cause an +imbalance in counters. + +Inspired by a fix from Kees. + +Reported-by: Oleksandr Natalenko +Reported-by: Kees Cook +Cc: stable@vger.kernel.org +Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/bfq-iosched.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -4911,8 +4911,16 @@ static void bfq_prepare_request(struct r + bool new_queue = false; + bool bfqq_already_existing = false, split = false; + +- if (!rq->elv.icq) ++ /* ++ * Even if we don't have an icq attached, we should still clear ++ * the scheduler pointers, as they might point to previously ++ * allocated bic/bfqq structs. ++ */ ++ if (!rq->elv.icq) { ++ rq->elv.priv[0] = rq->elv.priv[1] = NULL; + return; ++ } ++ + bic = icq_to_bic(rq->elv.icq); + + spin_lock_irq(&bfqd->lock); diff --git a/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch new file mode 100644 index 00000000000..e4fb483423a --- /dev/null +++ b/queue-4.16/blk-mq-start-request-gstate-with-gen-1.patch @@ -0,0 +1,64 @@ +From f4560231ec42092c6662acccabb28c6cac9f5dfb Mon Sep 17 00:00:00 2001 +From: Jianchao Wang +Date: Tue, 17 Apr 2018 11:46:20 +0800 +Subject: blk-mq: start request gstate with gen 1 + +From: Jianchao Wang + +commit f4560231ec42092c6662acccabb28c6cac9f5dfb upstream. + +rq->gstate and rq->aborted_gstate both are zero before rqs are +allocated. If we have a small timeout, when the timer fires, +there could be rqs that are never allocated, and also there could +be rq that has been allocated but not initialized and started. At +the moment, the rq->gstate and rq->aborted_gstate both are 0, thus +the blk_mq_terminate_expired will identify the rq is timed out and +invoke .timeout early. + +For scsi, this will cause scsi_times_out to be invoked before the +scsi_cmnd is not initialized, scsi_cmnd->device is still NULL at +the moment, then we will get crash. + +Cc: Bart Van Assche +Cc: Tejun Heo +Cc: Ming Lei +Cc: Martin Steigerwald +Cc: stable@vger.kernel.org +Signed-off-by: Jianchao Wang +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-core.c | 4 ++++ + block/blk-mq.c | 7 +++++++ + 2 files changed, 11 insertions(+) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -129,6 +129,10 @@ void blk_rq_init(struct request_queue *q + rq->part = NULL; + seqcount_init(&rq->gstate_seq); + u64_stats_init(&rq->aborted_gstate_sync); ++ /* ++ * See comment of blk_mq_init_request ++ */ ++ WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); + } + EXPORT_SYMBOL(blk_rq_init); + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2076,6 +2076,13 @@ static int blk_mq_init_request(struct bl + + seqcount_init(&rq->gstate_seq); + u64_stats_init(&rq->aborted_gstate_sync); ++ /* ++ * start gstate with gen 1 instead of 0, otherwise it will be equal ++ * to aborted_gstate, and be identified timed out by ++ * blk_mq_terminate_expired. ++ */ ++ WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); ++ + return 0; + } + diff --git a/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch new file mode 100644 index 00000000000..77eec75144e --- /dev/null +++ b/queue-4.16/block-do-not-use-interruptible-wait-anywhere.patch @@ -0,0 +1,74 @@ +From 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 Mon Sep 17 00:00:00 2001 +From: Alan Jenkins +Date: Thu, 12 Apr 2018 19:11:58 +0100 +Subject: block: do not use interruptible wait anywhere + +From: Alan Jenkins + +commit 1dc3039bc87ae7d19a990c3ee71cfd8a9068f428 upstream. + +When blk_queue_enter() waits for a queue to unfreeze, or unset the +PREEMPT_ONLY flag, do not allow it to be interrupted by a signal. + +The PREEMPT_ONLY flag was introduced later in commit 3a0a529971ec +("block, scsi: Make SCSI quiesce and resume work reliably"). Note the SCSI +device is resumed asynchronously, i.e. after un-freezing userspace tasks. + +So that commit exposed the bug as a regression in v4.15. A mysterious +SIGBUS (or -EIO) sometimes happened during the time the device was being +resumed. Most frequently, there was no kernel log message, and we saw Xorg +or Xwayland killed by SIGBUS.[1] + +[1] E.g. https://bugzilla.redhat.com/show_bug.cgi?id=1553979 + +Without this fix, I get an IO error in this test: + +# dd if=/dev/sda of=/dev/null iflag=direct & \ + while killall -SIGUSR1 dd; do sleep 0.1; done & \ + echo mem > /sys/power/state ; \ + sleep 5; killall dd # stop after 5 seconds + +The interruptible wait was added to blk_queue_enter in +commit 3ef28e83ab15 ("block: generic request_queue reference counting"). +Before then, the interruptible wait was only in blk-mq, but I don't think +it could ever have been correct. + +Reviewed-by: Bart Van Assche +Cc: stable@vger.kernel.org +Signed-off-by: Alan Jenkins +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-core.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -829,7 +829,6 @@ int blk_queue_enter(struct request_queue + + while (true) { + bool success = false; +- int ret; + + rcu_read_lock(); + if (percpu_ref_tryget_live(&q->q_usage_counter)) { +@@ -861,14 +860,12 @@ int blk_queue_enter(struct request_queue + */ + smp_rmb(); + +- ret = wait_event_interruptible(q->mq_freeze_wq, +- (atomic_read(&q->mq_freeze_depth) == 0 && +- (preempt || !blk_queue_preempt_only(q))) || +- blk_queue_dying(q)); ++ wait_event(q->mq_freeze_wq, ++ (atomic_read(&q->mq_freeze_depth) == 0 && ++ (preempt || !blk_queue_preempt_only(q))) || ++ blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; +- if (ret) +- return ret; + } + } + diff --git a/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch new file mode 100644 index 00000000000..50023d18229 --- /dev/null +++ b/queue-4.16/kobject-don-t-use-warn-for-registration-failures.patch @@ -0,0 +1,47 @@ +From 3e14c6abbfb5c94506edda9d8e2c145d79375798 Mon Sep 17 00:00:00 2001 +From: Dmitry Vyukov +Date: Wed, 11 Apr 2018 17:22:43 +0200 +Subject: kobject: don't use WARN for registration failures + +From: Dmitry Vyukov + +commit 3e14c6abbfb5c94506edda9d8e2c145d79375798 upstream. + +This WARNING proved to be noisy. The function still returns an error +and callers should handle it. That's how most of kernel code works. +Downgrade the WARNING to pr_err() and leave WARNINGs for kernel bugs. + +Signed-off-by: Dmitry Vyukov +Reported-by: syzbot+209c0f67f99fec8eb14b@syzkaller.appspotmail.com +Reported-by: syzbot+7fb6d9525a4528104e05@syzkaller.appspotmail.com +Reported-by: syzbot+2e63711063e2d8f9ea27@syzkaller.appspotmail.com +Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com +Cc: stable +Signed-off-by: Greg Kroah-Hartman + +--- + lib/kobject.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/lib/kobject.c ++++ b/lib/kobject.c +@@ -232,14 +232,12 @@ static int kobject_add_internal(struct k + + /* be noisy on error issues */ + if (error == -EEXIST) +- WARN(1, "%s failed for %s with " +- "-EEXIST, don't try to register things with " +- "the same name in the same directory.\n", +- __func__, kobject_name(kobj)); ++ pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", ++ __func__, kobject_name(kobj)); + else +- WARN(1, "%s failed for %s (error: %d parent: %s)\n", +- __func__, kobject_name(kobj), error, +- parent ? kobject_name(parent) : "'none'"); ++ pr_err("%s failed for %s (error: %d parent: %s)\n", ++ __func__, kobject_name(kobj), error, ++ parent ? kobject_name(parent) : "'none'"); + } else + kobj->state_in_sysfs = 1; + diff --git a/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch new file mode 100644 index 00000000000..1518844a9d4 --- /dev/null +++ b/queue-4.16/mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch @@ -0,0 +1,123 @@ +From f6997bec6af43396ff530caee79e178d32774a49 Mon Sep 17 00:00:00 2001 +From: Miquel Raynal +Date: Wed, 25 Apr 2018 16:16:32 +0200 +Subject: mtd: rawnand: marvell: fix the chip-select DT parsing logic + +From: Miquel Raynal + +commit f6997bec6af43396ff530caee79e178d32774a49 upstream. + +The block responsible of parsing the DT for the number of chip-select +lines uses an 'if/else if/else if' block. The content of the second and +third 'else if' conditions are: + 1/ the actual condition to enter the sub-block and + 2/ the operation to do in this sub-block. + + [...] + else if (condition1_to_enter && action1() == failed) + raise_error(); + else if (condition2_to_enter && action2() == failed) + raise_error(); + [...] + +In case of failure, the sub-block is entered and an error raised. +Otherwise, in case of success, the code would continue erroneously in +the next 'else if' statement because it did not failed (and did not +enter the first 'else if' sub-block). + +The first 'else if' refers to legacy bindings while the second 'else if' +refers to new bindings. The second 'else if', which is entered +erroneously, checks for the 'reg' property, which, for old bindings, +does not mean anything because it would not be the number of CS +available, but the regular register map of almost any DT node. This +being said, the content of the 'reg' property being the register map +offset and length, it has '2' values, so the number of CS in this +situation is assumed to be '2'. + +When running nand_scan_ident() with 2 CS, the core will check for an +array of chips. It will first issue a RESET and then a READ_ID. Of +course this will trigger two timeouts because there is no chip in front +of the second CS: + +[ 1.367460] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000080) +[ 1.474292] marvell-nfc f2720000.nand: Timeout on CMDD (NDSR: 0x00000280) + +Indeed, this is harmless and the core will then assume there is only one +valid CS. + +Fix the logic in the whole block by entering each sub-block just on the +'is legacy' condition, doing the action inside the sub-block. This way, +when the action succeeds, the whole block is left. + +Furthermore, for both the old bindings and the new bindings the same +logic was applied to retrieve the number of CS lines: +using of_get_property() to get a size in bytes, converted in the actual +number of lines by dividing it per sizeof(u32) (4 bytes). + +This is fine for the 'reg' property which is a list of the CS IDs but +not for the 'num-cs' property which is directly the value of the number +of CS. + +Anyway, no existing DT uses another value than 'num-cs = <1>' and no +other value has ever been supported by the old driver (pxa3xx_nand.c). +Remove this condition and apply a number of 1 CS anyway, as already +described in the bindings. + +Finally, the 'reg' property of a 'nand' node (with the new bindings) +gives the IDs of each CS line in use. marvell_nand.c driver first look +at the number of CS lines that are present in this property. + +Better use of_property_count_elems_of_size() than dividing by 4 the size +of the number of bytes returned by of_get_property(). + +Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver") +Cc: stable@vger.kernel.org +Signed-off-by: Miquel Raynal +Tested-by: Chris Packham +Signed-off-by: Boris Brezillon +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/marvell_nand.c | 25 ++++++++----------------- + 1 file changed, 8 insertions(+), 17 deletions(-) + +--- a/drivers/mtd/nand/marvell_nand.c ++++ b/drivers/mtd/nand/marvell_nand.c +@@ -2277,29 +2277,20 @@ static int marvell_nand_chip_init(struct + /* + * The legacy "num-cs" property indicates the number of CS on the only + * chip connected to the controller (legacy bindings does not support +- * more than one chip). CS are only incremented one by one while the RB +- * pin is always the #0. ++ * more than one chip). The CS and RB pins are always the #0. + * + * When not using legacy bindings, a couple of "reg" and "nand-rb" + * properties must be filled. For each chip, expressed as a subnode, + * "reg" points to the CS lines and "nand-rb" to the RB line. + */ +- if (pdata) { ++ if (pdata || nfc->caps->legacy_of_bindings) { + nsels = 1; +- } else if (nfc->caps->legacy_of_bindings && +- !of_get_property(np, "num-cs", &nsels)) { +- dev_err(dev, "missing num-cs property\n"); +- return -EINVAL; +- } else if (!of_get_property(np, "reg", &nsels)) { +- dev_err(dev, "missing reg property\n"); +- return -EINVAL; +- } +- +- if (!pdata) +- nsels /= sizeof(u32); +- if (!nsels) { +- dev_err(dev, "invalid reg property size\n"); +- return -EINVAL; ++ } else { ++ nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32)); ++ if (nsels <= 0) { ++ dev_err(dev, "missing/invalid reg property\n"); ++ return -EINVAL; ++ } + } + + /* Alloc the nand chip structure */ diff --git a/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch new file mode 100644 index 00000000000..6c7becbc5f8 --- /dev/null +++ b/queue-4.16/mtd-rawnand-tango-fix-struct-clk-memory-leak.patch @@ -0,0 +1,34 @@ +From 007b4e8b705a4eff184d567c5a8b496622f9e116 Mon Sep 17 00:00:00 2001 +From: Marc Gonzalez +Date: Thu, 5 Apr 2018 14:57:59 +0200 +Subject: mtd: rawnand: tango: Fix struct clk memory leak + +From: Marc Gonzalez + +commit 007b4e8b705a4eff184d567c5a8b496622f9e116 upstream. + +Use devm_clk_get() to let Linux manage struct clk memory. + +Fixes: 6956e2385a16 ("add tango NAND flash controller support") +Cc: stable@vger.kernel.org +Reported-by: Xidong Wang +Signed-off-by: Marc Gonzalez +Reviewed-by: Miquel Raynal +Signed-off-by: Boris Brezillon +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/tango_nand.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mtd/nand/tango_nand.c ++++ b/drivers/mtd/nand/tango_nand.c +@@ -643,7 +643,7 @@ static int tango_nand_probe(struct platf + + writel_relaxed(MODE_RAW, nfc->pbus_base + PBUS_PAD_MODE); + +- clk = clk_get(&pdev->dev, NULL); ++ clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + diff --git a/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch new file mode 100644 index 00000000000..b9d0aebf652 --- /dev/null +++ b/queue-4.16/scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch @@ -0,0 +1,36 @@ +From 505aa4b6a8834a2300971c5220c380c3271ebde3 Mon Sep 17 00:00:00 2001 +From: Mahesh Rajashekhara +Date: Tue, 17 Apr 2018 17:03:12 +0530 +Subject: scsi: sd: Defer spinning up drive while SANITIZE is in progress + +From: Mahesh Rajashekhara + +commit 505aa4b6a8834a2300971c5220c380c3271ebde3 upstream. + +A drive being sanitized will return NOT READY / ASC 0x4 / ASCQ +0x1b ("LOGICAL UNIT NOT READY. SANITIZE IN PROGRESS"). + +Prevent spinning up the drive until this condition clears. + +[mkp: tweaked commit message] + +Signed-off-by: Mahesh Rajashekhara +Cc: +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/sd.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -2121,6 +2121,8 @@ sd_spinup_disk(struct scsi_disk *sdkp) + break; /* standby */ + if (sshdr.asc == 4 && sshdr.ascq == 0xc) + break; /* unavailable */ ++ if (sshdr.asc == 4 && sshdr.ascq == 0x1b) ++ break; /* sanitize in progress */ + /* + * Issue command to spin up drive when not ready + */ diff --git a/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch new file mode 100644 index 00000000000..0b4defe41cd --- /dev/null +++ b/queue-4.16/scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch @@ -0,0 +1,342 @@ +From ccce20fc7968d546fb1e8e147bf5cdc8afc4278a Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Mon, 16 Apr 2018 18:04:41 -0700 +Subject: scsi: sd_zbc: Avoid that resetting a zone fails sporadically + +From: Bart Van Assche + +commit ccce20fc7968d546fb1e8e147bf5cdc8afc4278a upstream. + +Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is +called from sd_probe_async() and since sd_revalidate_disk() calls +sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called +concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can +cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets +q->nr_zones to zero before restoring it to the actual value, even if no drive +characteristics have changed. Avoid that this can happen by making the +following changes: + +- Protect the code that updates zone information with blk_queue_enter() + and blk_queue_exit(). +- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that + these functions do not modify struct scsi_disk before all zone + information has been obtained. + +Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track +legacy requests"; kernel v4.15) the request queue freezing mechanism also +affects legacy request queues. + +Fixes: 89d947561077 ("sd: Implement support for ZBC devices") +Signed-off-by: Bart Van Assche +Cc: Jens Axboe +Cc: Damien Le Moal +Cc: Christoph Hellwig +Cc: Hannes Reinecke +Cc: stable@vger.kernel.org # v4.16 +Reviewed-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/sd_zbc.c | 140 ++++++++++++++++++++++++++++--------------------- + include/linux/blkdev.h | 5 + + 2 files changed, 87 insertions(+), 58 deletions(-) + +--- a/drivers/scsi/sd_zbc.c ++++ b/drivers/scsi/sd_zbc.c +@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct + * + * Check that all zones of the device are equal. The last zone can however + * be smaller. The zone size must also be a power of two number of LBAs. ++ * ++ * Returns the zone size in bytes upon success or an error code upon failure. + */ +-static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) ++static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) + { + u64 zone_blocks = 0; + sector_t block = 0; +@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct + int ret; + u8 same; + +- sdkp->zone_blocks = 0; +- + /* Get a buffer */ + buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); + if (!buf) +@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct + + /* Parse zone descriptors */ + while (rec < buf + buf_len) { +- zone_blocks = get_unaligned_be64(&rec[8]); +- if (sdkp->zone_blocks == 0) { +- sdkp->zone_blocks = zone_blocks; +- } else if (zone_blocks != sdkp->zone_blocks && +- (block + zone_blocks < sdkp->capacity +- || zone_blocks > sdkp->zone_blocks)) { +- zone_blocks = 0; ++ u64 this_zone_blocks = get_unaligned_be64(&rec[8]); ++ ++ if (zone_blocks == 0) { ++ zone_blocks = this_zone_blocks; ++ } else if (this_zone_blocks != zone_blocks && ++ (block + this_zone_blocks < sdkp->capacity ++ || this_zone_blocks > zone_blocks)) { ++ this_zone_blocks = 0; + goto out; + } +- block += zone_blocks; ++ block += this_zone_blocks; + rec += 64; + } + +@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct + + } while (block < sdkp->capacity); + +- zone_blocks = sdkp->zone_blocks; +- + out: + if (!zone_blocks) { + if (sdkp->first_scan) +@@ -488,8 +487,7 @@ out: + "Zone size too large\n"); + ret = -ENODEV; + } else { +- sdkp->zone_blocks = zone_blocks; +- sdkp->zone_shift = ilog2(zone_blocks); ++ ret = zone_blocks; + } + + out_free: +@@ -500,21 +498,21 @@ out_free: + + /** + * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). +- * @sdkp: The disk of the bitmap ++ * @nr_zones: Number of zones to allocate space for. ++ * @numa_node: NUMA node to allocate the memory from. + */ +-static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) ++static inline unsigned long * ++sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node) + { +- struct request_queue *q = sdkp->disk->queue; +- +- return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones) +- * sizeof(unsigned long), +- GFP_KERNEL, q->node); ++ return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long), ++ GFP_KERNEL, numa_node); + } + + /** + * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones + * @sdkp: disk used + * @buf: report reply buffer ++ * @zone_shift: logarithm base 2 of the number of blocks in a zone + * @seq_zone_bitamp: bitmap of sequential zones to set + * + * Parse reported zone descriptors in @buf to identify sequential zones and +@@ -524,7 +522,7 @@ static inline unsigned long *sd_zbc_allo + * Return the LBA after the last zone reported. + */ + static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, +- unsigned int buflen, ++ unsigned int buflen, u32 zone_shift, + unsigned long *seq_zones_bitmap) + { + sector_t lba, next_lba = sdkp->capacity; +@@ -543,7 +541,7 @@ static sector_t sd_zbc_get_seq_zones(str + if (type != ZBC_ZONE_TYPE_CONV && + cond != ZBC_ZONE_COND_READONLY && + cond != ZBC_ZONE_COND_OFFLINE) +- set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); ++ set_bit(lba >> zone_shift, seq_zones_bitmap); + next_lba = lba + get_unaligned_be64(&rec[8]); + rec += 64; + } +@@ -552,12 +550,16 @@ static sector_t sd_zbc_get_seq_zones(str + } + + /** +- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. ++ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. + * @sdkp: target disk ++ * @zone_shift: logarithm base 2 of the number of blocks in a zone ++ * @nr_zones: number of zones to set up a seq zone bitmap for + * + * Allocate a zone bitmap and initialize it by identifying sequential zones. + */ +-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) ++static unsigned long * ++sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, ++ u32 nr_zones) + { + struct request_queue *q = sdkp->disk->queue; + unsigned long *seq_zones_bitmap; +@@ -565,9 +567,9 @@ static int sd_zbc_setup_seq_zones_bitmap + unsigned char *buf; + int ret = -ENOMEM; + +- seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); ++ seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node); + if (!seq_zones_bitmap) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + + buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); + if (!buf) +@@ -578,7 +580,7 @@ static int sd_zbc_setup_seq_zones_bitmap + if (ret) + goto out; + lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, +- seq_zones_bitmap); ++ zone_shift, seq_zones_bitmap); + } + + if (lba != sdkp->capacity) { +@@ -590,12 +592,9 @@ out: + kfree(buf); + if (ret) { + kfree(seq_zones_bitmap); +- return ret; ++ return ERR_PTR(ret); + } +- +- q->seq_zones_bitmap = seq_zones_bitmap; +- +- return 0; ++ return seq_zones_bitmap; + } + + static void sd_zbc_cleanup(struct scsi_disk *sdkp) +@@ -611,44 +610,64 @@ static void sd_zbc_cleanup(struct scsi_d + q->nr_zones = 0; + } + +-static int sd_zbc_setup(struct scsi_disk *sdkp) ++static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks) + { + struct request_queue *q = sdkp->disk->queue; ++ u32 zone_shift = ilog2(zone_blocks); ++ u32 nr_zones; + int ret; + +- /* READ16/WRITE16 is mandatory for ZBC disks */ +- sdkp->device->use_16_for_rw = 1; +- sdkp->device->use_10_for_rw = 0; +- + /* chunk_sectors indicates the zone size */ +- blk_queue_chunk_sectors(sdkp->disk->queue, +- logical_to_sectors(sdkp->device, sdkp->zone_blocks)); +- sdkp->nr_zones = +- round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift; ++ blk_queue_chunk_sectors(q, ++ logical_to_sectors(sdkp->device, zone_blocks)); ++ nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift; + + /* + * Initialize the device request queue information if the number + * of zones changed. + */ +- if (sdkp->nr_zones != q->nr_zones) { +- +- sd_zbc_cleanup(sdkp); +- +- q->nr_zones = sdkp->nr_zones; +- if (sdkp->nr_zones) { +- q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); +- if (!q->seq_zones_wlock) { ++ if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) { ++ unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; ++ size_t zone_bitmap_size; ++ ++ if (nr_zones) { ++ seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones, ++ q->node); ++ if (!seq_zones_wlock) { + ret = -ENOMEM; + goto err; + } + +- ret = sd_zbc_setup_seq_zones_bitmap(sdkp); +- if (ret) { +- sd_zbc_cleanup(sdkp); ++ seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp, ++ zone_shift, nr_zones); ++ if (IS_ERR(seq_zones_bitmap)) { ++ ret = PTR_ERR(seq_zones_bitmap); ++ kfree(seq_zones_wlock); + goto err; + } + } +- ++ zone_bitmap_size = BITS_TO_LONGS(nr_zones) * ++ sizeof(unsigned long); ++ blk_mq_freeze_queue(q); ++ if (q->nr_zones != nr_zones) { ++ /* READ16/WRITE16 is mandatory for ZBC disks */ ++ sdkp->device->use_16_for_rw = 1; ++ sdkp->device->use_10_for_rw = 0; ++ ++ sdkp->zone_blocks = zone_blocks; ++ sdkp->zone_shift = zone_shift; ++ sdkp->nr_zones = nr_zones; ++ q->nr_zones = nr_zones; ++ swap(q->seq_zones_wlock, seq_zones_wlock); ++ swap(q->seq_zones_bitmap, seq_zones_bitmap); ++ } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, ++ zone_bitmap_size) != 0) { ++ memcpy(q->seq_zones_bitmap, seq_zones_bitmap, ++ zone_bitmap_size); ++ } ++ blk_mq_unfreeze_queue(q); ++ kfree(seq_zones_wlock); ++ kfree(seq_zones_bitmap); + } + + return 0; +@@ -660,6 +679,7 @@ err: + + int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) + { ++ int64_t zone_blocks; + int ret; + + if (!sd_is_zoned(sdkp)) +@@ -696,12 +716,16 @@ int sd_zbc_read_zones(struct scsi_disk * + * Check zone size: only devices with a constant zone size (except + * an eventual last runt zone) that is a power of 2 are supported. + */ +- ret = sd_zbc_check_zone_size(sdkp); +- if (ret) ++ zone_blocks = sd_zbc_check_zone_size(sdkp); ++ ret = -EFBIG; ++ if (zone_blocks != (u32)zone_blocks) ++ goto err; ++ ret = zone_blocks; ++ if (ret < 0) + goto err; + + /* The drive satisfies the kernel restrictions: set it up */ +- ret = sd_zbc_setup(sdkp); ++ ret = sd_zbc_setup(sdkp, zone_blocks); + if (ret) + goto err; + +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -605,6 +605,11 @@ struct request_queue { + * initialized by the low level device driver (e.g. scsi/sd.c). + * Stacking drivers (device mappers) may or may not initialize + * these fields. ++ * ++ * Reads of this information must be protected with blk_queue_enter() / ++ * blk_queue_exit(). Modifying this information is only allowed while ++ * no requests are being processed. See also blk_mq_freeze_queue() and ++ * blk_mq_unfreeze_queue(). + */ + unsigned int nr_zones; + unsigned long *seq_zones_bitmap; diff --git a/queue-4.16/series b/queue-4.16/series index 72677803949..c94945cdb4c 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -55,3 +55,12 @@ mtd-spi-nor-cadence-quadspi-fix-page-fault-kernel-panic.patch mtd-cfi-cmdset_0001-do-not-allow-read-write-to-suspend-erase-block.patch mtd-cfi-cmdset_0001-workaround-micron-erase-suspend-bug.patch mtd-cfi-cmdset_0002-do-not-allow-read-write-to-suspend-erase-block.patch +mtd-rawnand-tango-fix-struct-clk-memory-leak.patch +mtd-rawnand-marvell-fix-the-chip-select-dt-parsing-logic.patch +kobject-don-t-use-warn-for-registration-failures.patch +scsi-sd_zbc-avoid-that-resetting-a-zone-fails-sporadically.patch +scsi-sd-defer-spinning-up-drive-while-sanitize-is-in-progress.patch +blk-mq-start-request-gstate-with-gen-1.patch +bfq-iosched-ensure-to-clear-bic-bfqq-pointers-when-preparing-request.patch +block-do-not-use-interruptible-wait-anywhere.patch +vfio-ccw-process-ssch-with-interrupts-disabled.patch diff --git a/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch new file mode 100644 index 00000000000..7f3de2799ca --- /dev/null +++ b/queue-4.16/vfio-ccw-process-ssch-with-interrupts-disabled.patch @@ -0,0 +1,78 @@ +From 3368e547c52b96586f0edf9657ca12b94d8e61a7 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Fri, 20 Apr 2018 10:24:04 +0200 +Subject: vfio: ccw: process ssch with interrupts disabled + +From: Cornelia Huck + +commit 3368e547c52b96586f0edf9657ca12b94d8e61a7 upstream. + +When we call ssch, an interrupt might already be pending once we +return from the START SUBCHANNEL instruction. Therefore we need to +make sure interrupts are disabled while holding the subchannel lock +until after we're done with our processing. + +Cc: stable@vger.kernel.org #v4.12+ +Reviewed-by: Dong Jia Shi +Acked-by: Halil Pasic +Acked-by: Pierre Morel +Signed-off-by: Cornelia Huck +Signed-off-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/cio/vfio_ccw_fsm.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +--- a/drivers/s390/cio/vfio_ccw_fsm.c ++++ b/drivers/s390/cio/vfio_ccw_fsm.c +@@ -20,12 +20,12 @@ static int fsm_io_helper(struct vfio_ccw + int ccode; + __u8 lpm; + unsigned long flags; ++ int ret; + + sch = private->sch; + + spin_lock_irqsave(sch->lock, flags); + private->state = VFIO_CCW_STATE_BUSY; +- spin_unlock_irqrestore(sch->lock, flags); + + orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm); + +@@ -38,10 +38,12 @@ static int fsm_io_helper(struct vfio_ccw + * Initialize device status information + */ + sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND; +- return 0; ++ ret = 0; ++ break; + case 1: /* Status pending */ + case 2: /* Busy */ +- return -EBUSY; ++ ret = -EBUSY; ++ break; + case 3: /* Device/path not operational */ + { + lpm = orb->cmd.lpm; +@@ -51,13 +53,16 @@ static int fsm_io_helper(struct vfio_ccw + sch->lpm = 0; + + if (cio_update_schib(sch)) +- return -ENODEV; +- +- return sch->lpm ? -EACCES : -ENODEV; ++ ret = -ENODEV; ++ else ++ ret = sch->lpm ? -EACCES : -ENODEV; ++ break; + } + default: +- return ccode; ++ ret = ccode; + } ++ spin_unlock_irqrestore(sch->lock, flags); ++ return ret; + } + + static void fsm_notoper(struct vfio_ccw_private *private,