From a90fd3a227478ceab84aceb5031e0bd55cf75b77 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Mar 2022 08:25:16 +0200 Subject: [PATCH] 5.16-stable patches added patches: block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch block-flush-plug-based-on-hardware-and-software-queue-order.patch tpm-fix-reference-counting-for-struct-tpm_chip.patch --- ...ecks-the-correct-queue-at-least-once.patch | 62 ++++ ...on-hardware-and-software-queue-order.patch | 107 +++++++ queue-5.16/series | 3 + ...ference-counting-for-struct-tpm_chip.patch | 273 ++++++++++++++++++ 4 files changed, 445 insertions(+) create mode 100644 queue-5.16/block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch create mode 100644 queue-5.16/block-flush-plug-based-on-hardware-and-software-queue-order.patch create mode 100644 queue-5.16/tpm-fix-reference-counting-for-struct-tpm_chip.patch diff --git a/queue-5.16/block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch b/queue-5.16/block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch new file mode 100644 index 00000000000..b0d10bb834e --- /dev/null +++ b/queue-5.16/block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch @@ -0,0 +1,62 @@ +From 5b2050718d095cd3242d1f42aaaea3a2fec8e6f0 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 11 Mar 2022 10:21:43 -0700 +Subject: block: ensure plug merging checks the correct queue at least once + +From: Jens Axboe + +commit 5b2050718d095cd3242d1f42aaaea3a2fec8e6f0 upstream. + +Song reports that a RAID rebuild workload runs much slower recently, +and it is seeing a lot less merging than it did previously. The reason +is that a previous commit reduced the amount of work we do for plug +merging. RAID rebuild interleaves requests between disks, so a last-entry +check in plug merging always misses a merge opportunity since we always +find a different disk than what we are looking for. + +Modify the logic such that it's still a one-hit cache, but ensure that +we check enough to find the right target before giving up. + +Fixes: d38a9c04c0d5 ("block: only check previous entry for plug merge attempt") +Reported-and-tested-by: Song Liu +Reviewed-by: Song Liu +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-merge.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -1093,18 +1093,21 @@ bool blk_attempt_plug_merge(struct reque + if (!plug || rq_list_empty(plug->mq_list)) + return false; + +- /* check the previously added entry for a quick merge attempt */ +- rq = rq_list_peek(&plug->mq_list); +- if (rq->q == q) { ++ rq_list_for_each(&plug->mq_list, rq) { ++ if (rq->q == q) { ++ *same_queue_rq = true; ++ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == ++ BIO_MERGE_OK) ++ return true; ++ break; ++ } ++ + /* +- * Only blk-mq multiple hardware queues case checks the rq in +- * the same queue, there should be only one such rq in a queue ++ * Only keep iterating plug list for merges if we have multiple ++ * queues + */ +- *same_queue_rq = true; +- +- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == +- BIO_MERGE_OK) +- return true; ++ if (!plug->multiple_queues) ++ break; + } + return false; + } diff --git a/queue-5.16/block-flush-plug-based-on-hardware-and-software-queue-order.patch b/queue-5.16/block-flush-plug-based-on-hardware-and-software-queue-order.patch new file mode 100644 index 00000000000..d7d9b10e69b --- /dev/null +++ b/queue-5.16/block-flush-plug-based-on-hardware-and-software-queue-order.patch @@ -0,0 +1,107 @@ +From 26fed4ac4eab09c27fbae1859696cc38f0536407 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 11 Mar 2022 10:24:17 -0700 +Subject: block: flush plug based on hardware and software queue order + +From: Jens Axboe + +commit 26fed4ac4eab09c27fbae1859696cc38f0536407 upstream. + +We used to sort the plug list if we had multiple queues before dispatching +requests to the IO scheduler. This usually isn't needed, but for certain +workloads that interleave requests to disks, it's a less efficient to +process the plug list one-by-one if everything is interleaved. + +Don't sort the list, but skip through it and flush out entries that have +the same target at the same time. + +Fixes: df87eb0fce8f ("block: get rid of plug list sorting") +Reported-and-tested-by: Song Liu +Reviewed-by: Song Liu +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-mq.c | 60 +++++++++++++++++++++++++-------------------------------- + 1 file changed, 27 insertions(+), 33 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2244,13 +2244,35 @@ static void blk_mq_plug_issue_direct(str + blk_mq_commit_rqs(hctx, &queued, from_schedule); + } + +-void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) ++static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) + { +- struct blk_mq_hw_ctx *this_hctx; +- struct blk_mq_ctx *this_ctx; +- unsigned int depth; ++ struct blk_mq_hw_ctx *this_hctx = NULL; ++ struct blk_mq_ctx *this_ctx = NULL; ++ struct request *requeue_list = NULL; ++ unsigned int depth = 0; + LIST_HEAD(list); + ++ do { ++ struct request *rq = rq_list_pop(&plug->mq_list); ++ ++ if (!this_hctx) { ++ this_hctx = rq->mq_hctx; ++ this_ctx = rq->mq_ctx; ++ } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { ++ rq_list_add(&requeue_list, rq); ++ continue; ++ } ++ list_add_tail(&rq->queuelist, &list); ++ depth++; ++ } while (!rq_list_empty(plug->mq_list)); ++ ++ plug->mq_list = requeue_list; ++ trace_block_unplug(this_hctx->queue, depth, !from_sched); ++ blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched); ++} ++ ++void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) ++{ + if (rq_list_empty(plug->mq_list)) + return; + plug->rq_count = 0; +@@ -2261,37 +2283,9 @@ void blk_mq_flush_plug_list(struct blk_p + return; + } + +- this_hctx = NULL; +- this_ctx = NULL; +- depth = 0; + do { +- struct request *rq; +- +- rq = rq_list_pop(&plug->mq_list); +- +- if (!this_hctx) { +- this_hctx = rq->mq_hctx; +- this_ctx = rq->mq_ctx; +- } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { +- trace_block_unplug(this_hctx->queue, depth, +- !from_schedule); +- blk_mq_sched_insert_requests(this_hctx, this_ctx, +- &list, from_schedule); +- depth = 0; +- this_hctx = rq->mq_hctx; +- this_ctx = rq->mq_ctx; +- +- } +- +- list_add(&rq->queuelist, &list); +- depth++; ++ blk_mq_dispatch_plug_list(plug, from_schedule); + } while (!rq_list_empty(plug->mq_list)); +- +- if (!list_empty(&list)) { +- trace_block_unplug(this_hctx->queue, depth, !from_schedule); +- blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, +- from_schedule); +- } + } + + static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, diff --git a/queue-5.16/series b/queue-5.16/series index bb3eaade095..6b2814d039a 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -26,3 +26,6 @@ locking-lockdep-avoid-potential-access-of-invalid-memory-in-lock_class.patch drm-amdgpu-move-px-checking-into-amdgpu_device_ip_early_init.patch drm-amdgpu-only-check-for-_pr3-on-dgpus.patch iommu-iova-improve-32-bit-free-space-estimate.patch +tpm-fix-reference-counting-for-struct-tpm_chip.patch +block-ensure-plug-merging-checks-the-correct-queue-at-least-once.patch +block-flush-plug-based-on-hardware-and-software-queue-order.patch diff --git a/queue-5.16/tpm-fix-reference-counting-for-struct-tpm_chip.patch b/queue-5.16/tpm-fix-reference-counting-for-struct-tpm_chip.patch new file mode 100644 index 00000000000..c7babc22ca4 --- /dev/null +++ b/queue-5.16/tpm-fix-reference-counting-for-struct-tpm_chip.patch @@ -0,0 +1,273 @@ +From 7e0438f83dc769465ee663bb5dcf8cc154940712 Mon Sep 17 00:00:00 2001 +From: Lino Sanfilippo +Date: Wed, 2 Mar 2022 10:43:53 +0100 +Subject: tpm: fix reference counting for struct tpm_chip + +From: Lino Sanfilippo + +commit 7e0438f83dc769465ee663bb5dcf8cc154940712 upstream. + +The following sequence of operations results in a refcount warning: + +1. Open device /dev/tpmrm. +2. Remove module tpm_tis_spi. +3. Write a TPM command to the file descriptor opened at step 1. + +------------[ cut here ]------------ +WARNING: CPU: 3 PID: 1161 at lib/refcount.c:25 kobject_get+0xa0/0xa4 +refcount_t: addition on 0; use-after-free. +Modules linked in: tpm_tis_spi tpm_tis_core tpm mdio_bcm_unimac brcmfmac +sha256_generic libsha256 sha256_arm hci_uart btbcm bluetooth cfg80211 vc4 +brcmutil ecdh_generic ecc snd_soc_core crc32_arm_ce libaes +raspberrypi_hwmon ac97_bus snd_pcm_dmaengine bcm2711_thermal snd_pcm +snd_timer genet snd phy_generic soundcore [last unloaded: spi_bcm2835] +CPU: 3 PID: 1161 Comm: hold_open Not tainted 5.10.0ls-main-dirty #2 +Hardware name: BCM2711 +[] (unwind_backtrace) from [] (show_stack+0x10/0x14) +[] (show_stack) from [] (dump_stack+0xc4/0xd8) +[] (dump_stack) from [] (__warn+0x104/0x108) +[] (__warn) from [] (warn_slowpath_fmt+0x74/0xb8) +[] (warn_slowpath_fmt) from [] (kobject_get+0xa0/0xa4) +[] (kobject_get) from [] (tpm_try_get_ops+0x14/0x54 [tpm]) +[] (tpm_try_get_ops [tpm]) from [] (tpm_common_write+0x38/0x60 [tpm]) +[] (tpm_common_write [tpm]) from [] (vfs_write+0xc4/0x3c0) +[] (vfs_write) from [] (ksys_write+0x58/0xcc) +[] (ksys_write) from [] (ret_fast_syscall+0x0/0x4c) +Exception stack(0xc226bfa8 to 0xc226bff0) +bfa0: 00000000 000105b4 00000003 beafe664 00000014 00000000 +bfc0: 00000000 000105b4 000103f8 00000004 00000000 00000000 b6f9c000 beafe684 +bfe0: 0000006c beafe648 0001056c b6eb6944 +---[ end trace d4b8409def9b8b1f ]--- + +The reason for this warning is the attempt to get the chip->dev reference +in tpm_common_write() although the reference counter is already zero. + +Since commit 8979b02aaf1d ("tpm: Fix reference count to main device") the +extra reference used to prevent a premature zero counter is never taken, +because the required TPM_CHIP_FLAG_TPM2 flag is never set. + +Fix this by moving the TPM 2 character device handling from +tpm_chip_alloc() to tpm_add_char_device() which is called at a later point +in time when the flag has been set in case of TPM2. + +Commit fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm") +already introduced function tpm_devs_release() to release the extra +reference but did not implement the required put on chip->devs that results +in the call of this function. + +Fix this by putting chip->devs in tpm_chip_unregister(). + +Finally move the new implementation for the TPM 2 handling into a new +function to avoid multiple checks for the TPM_CHIP_FLAG_TPM2 flag in the +good case and error cases. + +Cc: stable@vger.kernel.org +Fixes: fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm") +Fixes: 8979b02aaf1d ("tpm: Fix reference count to main device") +Co-developed-by: Jason Gunthorpe +Signed-off-by: Jason Gunthorpe +Signed-off-by: Lino Sanfilippo +Tested-by: Stefan Berger +Reviewed-by: Jason Gunthorpe +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm-chip.c | 46 +++++------------------------ + drivers/char/tpm/tpm.h | 2 + + drivers/char/tpm/tpm2-space.c | 65 ++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 75 insertions(+), 38 deletions(-) + +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -274,14 +274,6 @@ static void tpm_dev_release(struct devic + kfree(chip); + } + +-static void tpm_devs_release(struct device *dev) +-{ +- struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); +- +- /* release the master device reference */ +- put_device(&chip->dev); +-} +- + /** + * tpm_class_shutdown() - prepare the TPM device for loss of power. + * @dev: device to which the chip is associated. +@@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct d + chip->dev_num = rc; + + device_initialize(&chip->dev); +- device_initialize(&chip->devs); + + chip->dev.class = tpm_class; + chip->dev.class->shutdown_pre = tpm_class_shutdown; +@@ -352,39 +343,20 @@ struct tpm_chip *tpm_chip_alloc(struct d + chip->dev.parent = pdev; + chip->dev.groups = chip->groups; + +- chip->devs.parent = pdev; +- chip->devs.class = tpmrm_class; +- chip->devs.release = tpm_devs_release; +- /* get extra reference on main device to hold on +- * behalf of devs. This holds the chip structure +- * while cdevs is in use. The corresponding put +- * is in the tpm_devs_release (TPM2 only) +- */ +- if (chip->flags & TPM_CHIP_FLAG_TPM2) +- get_device(&chip->dev); +- + if (chip->dev_num == 0) + chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); + else + chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); + +- chip->devs.devt = +- MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); +- + rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num); + if (rc) + goto out; +- rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); +- if (rc) +- goto out; + + if (!pdev) + chip->flags |= TPM_CHIP_FLAG_VIRTUAL; + + cdev_init(&chip->cdev, &tpm_fops); +- cdev_init(&chip->cdevs, &tpmrm_fops); + chip->cdev.owner = THIS_MODULE; +- chip->cdevs.owner = THIS_MODULE; + + rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE); + if (rc) { +@@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct d + return chip; + + out: +- put_device(&chip->devs); + put_device(&chip->dev); + return ERR_PTR(rc); + } +@@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tp + } + + if (chip->flags & TPM_CHIP_FLAG_TPM2) { +- rc = cdev_device_add(&chip->cdevs, &chip->devs); +- if (rc) { +- dev_err(&chip->devs, +- "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", +- dev_name(&chip->devs), MAJOR(chip->devs.devt), +- MINOR(chip->devs.devt), rc); +- return rc; +- } ++ rc = tpm_devs_add(chip); ++ if (rc) ++ goto err_del_cdev; + } + + /* Make the chip available. */ +@@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tp + idr_replace(&dev_nums_idr, chip, chip->dev_num); + mutex_unlock(&idr_lock); + ++ return 0; ++ ++err_del_cdev: ++ cdev_device_del(&chip->cdev, &chip->dev); + return rc; + } + +@@ -649,7 +619,7 @@ void tpm_chip_unregister(struct tpm_chip + hwrng_unregister(&chip->hwrng); + tpm_bios_log_teardown(chip); + if (chip->flags & TPM_CHIP_FLAG_TPM2) +- cdev_device_del(&chip->cdevs, &chip->devs); ++ tpm_devs_remove(chip); + tpm_del_char_device(chip); + } + EXPORT_SYMBOL_GPL(tpm_chip_unregister); +--- a/drivers/char/tpm/tpm.h ++++ b/drivers/char/tpm/tpm.h +@@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip * + size_t cmdsiz); + int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf, + size_t *bufsiz); ++int tpm_devs_add(struct tpm_chip *chip); ++void tpm_devs_remove(struct tpm_chip *chip); + + void tpm_bios_log_setup(struct tpm_chip *chip); + void tpm_bios_log_teardown(struct tpm_chip *chip); +--- a/drivers/char/tpm/tpm2-space.c ++++ b/drivers/char/tpm/tpm2-space.c +@@ -574,3 +574,68 @@ out: + dev_err(&chip->dev, "%s: error %d\n", __func__, rc); + return rc; + } ++ ++/* ++ * Put the reference to the main device. ++ */ ++static void tpm_devs_release(struct device *dev) ++{ ++ struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); ++ ++ /* release the master device reference */ ++ put_device(&chip->dev); ++} ++ ++/* ++ * Remove the device file for exposed TPM spaces and release the device ++ * reference. This may also release the reference to the master device. ++ */ ++void tpm_devs_remove(struct tpm_chip *chip) ++{ ++ cdev_device_del(&chip->cdevs, &chip->devs); ++ put_device(&chip->devs); ++} ++ ++/* ++ * Add a device file to expose TPM spaces. Also take a reference to the ++ * main device. ++ */ ++int tpm_devs_add(struct tpm_chip *chip) ++{ ++ int rc; ++ ++ device_initialize(&chip->devs); ++ chip->devs.parent = chip->dev.parent; ++ chip->devs.class = tpmrm_class; ++ ++ /* ++ * Get extra reference on main device to hold on behalf of devs. ++ * This holds the chip structure while cdevs is in use. The ++ * corresponding put is in the tpm_devs_release. ++ */ ++ get_device(&chip->dev); ++ chip->devs.release = tpm_devs_release; ++ chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); ++ cdev_init(&chip->cdevs, &tpmrm_fops); ++ chip->cdevs.owner = THIS_MODULE; ++ ++ rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); ++ if (rc) ++ goto err_put_devs; ++ ++ rc = cdev_device_add(&chip->cdevs, &chip->devs); ++ if (rc) { ++ dev_err(&chip->devs, ++ "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", ++ dev_name(&chip->devs), MAJOR(chip->devs.devt), ++ MINOR(chip->devs.devt), rc); ++ goto err_put_devs; ++ } ++ ++ return 0; ++ ++err_put_devs: ++ put_device(&chip->devs); ++ ++ return rc; ++} -- 2.47.3