From: Greg Kroah-Hartman Date: Mon, 26 Aug 2019 15:47:06 +0000 (+0200) Subject: 5.2-stable patches X-Git-Tag: v4.14.141~30 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7cdcb78d1614525921b5524fbfc47838e42ad29e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.2-stable patches added patches: dm-zoned-improve-error-handling-in-i-o-map-code.patch dm-zoned-improve-error-handling-in-reclaim.patch dm-zoned-properly-handle-backing-device-failure.patch genirq-properly-pair-kobject_del-with-kobject_add.patch mm-z3fold.c-fix-race-between-migration-and-destruction.patch --- diff --git a/queue-5.2/dm-zoned-improve-error-handling-in-i-o-map-code.patch b/queue-5.2/dm-zoned-improve-error-handling-in-i-o-map-code.patch new file mode 100644 index 00000000000..bbddd1ed3c0 --- /dev/null +++ b/queue-5.2/dm-zoned-improve-error-handling-in-i-o-map-code.patch @@ -0,0 +1,109 @@ +From d7428c50118e739e672656c28d2b26b09375d4e0 Mon Sep 17 00:00:00 2001 +From: Dmitry Fomichev +Date: Sat, 10 Aug 2019 14:43:10 -0700 +Subject: dm zoned: improve error handling in i/o map code + +From: Dmitry Fomichev + +commit d7428c50118e739e672656c28d2b26b09375d4e0 upstream. + +Some errors are ignored in the I/O path during queueing chunks +for processing by chunk works. Since at least these errors are +transient in nature, it should be possible to retry the failed +incoming commands. + +The fix - + +Errors that can happen while queueing chunks are carried upwards +to the main mapping function and it now returns DM_MAPIO_REQUEUE +for any incoming requests that can not be properly queued. + +Error logging/debug messages are added where needed. + +Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Fomichev +Reviewed-by: Damien Le Moal +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-zoned-target.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/drivers/md/dm-zoned-target.c ++++ b/drivers/md/dm-zoned-target.c +@@ -513,22 +513,24 @@ static void dmz_flush_work(struct work_s + * Get a chunk work and start it to process a new BIO. + * If the BIO chunk has no work yet, create one. + */ +-static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ++static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) + { + unsigned int chunk = dmz_bio_chunk(dmz->dev, bio); + struct dm_chunk_work *cw; ++ int ret = 0; + + mutex_lock(&dmz->chunk_lock); + + /* Get the BIO chunk work. If one is not active yet, create one */ + cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); + if (!cw) { +- int ret; + + /* Create a new chunk work */ + cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); +- if (!cw) ++ if (unlikely(!cw)) { ++ ret = -ENOMEM; + goto out; ++ } + + INIT_WORK(&cw->work, dmz_chunk_work); + refcount_set(&cw->refcount, 0); +@@ -539,7 +541,6 @@ static void dmz_queue_chunk_work(struct + ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw); + if (unlikely(ret)) { + kfree(cw); +- cw = NULL; + goto out; + } + } +@@ -547,10 +548,12 @@ static void dmz_queue_chunk_work(struct + bio_list_add(&cw->bio_list, bio); + dmz_get_chunk_work(cw); + ++ dmz_reclaim_bio_acc(dmz->reclaim); + if (queue_work(dmz->chunk_wq, &cw->work)) + dmz_get_chunk_work(cw); + out: + mutex_unlock(&dmz->chunk_lock); ++ return ret; + } + + /* +@@ -564,6 +567,7 @@ static int dmz_map(struct dm_target *ti, + sector_t sector = bio->bi_iter.bi_sector; + unsigned int nr_sectors = bio_sectors(bio); + sector_t chunk_sector; ++ int ret; + + dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", + bio_op(bio), (unsigned long long)sector, nr_sectors, +@@ -601,8 +605,14 @@ static int dmz_map(struct dm_target *ti, + dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector); + + /* Now ready to handle this BIO */ +- dmz_reclaim_bio_acc(dmz->reclaim); +- dmz_queue_chunk_work(dmz, bio); ++ ret = dmz_queue_chunk_work(dmz, bio); ++ if (ret) { ++ dmz_dev_debug(dmz->dev, ++ "BIO op %d, can't process chunk %llu, err %i\n", ++ bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio), ++ ret); ++ return DM_MAPIO_REQUEUE; ++ } + + return DM_MAPIO_SUBMITTED; + } diff --git a/queue-5.2/dm-zoned-improve-error-handling-in-reclaim.patch b/queue-5.2/dm-zoned-improve-error-handling-in-reclaim.patch new file mode 100644 index 00000000000..db5820cb433 --- /dev/null +++ b/queue-5.2/dm-zoned-improve-error-handling-in-reclaim.patch @@ -0,0 +1,153 @@ +From b234c6d7a703661b5045c5bf569b7c99d2edbf88 Mon Sep 17 00:00:00 2001 +From: Dmitry Fomichev +Date: Sat, 10 Aug 2019 14:43:09 -0700 +Subject: dm zoned: improve error handling in reclaim + +From: Dmitry Fomichev + +commit b234c6d7a703661b5045c5bf569b7c99d2edbf88 upstream. + +There are several places in reclaim code where errors are not +propagated to the main function, dmz_reclaim(). This function +is responsible for unlocking zones that might be still locked +at the end of any failed reclaim iterations. As the result, +some device zones may be left permanently locked for reclaim, +degrading target's capability to reclaim zones. + +This patch fixes these issues as follows - + +Make sure that dmz_reclaim_buf(), dmz_reclaim_seq_data() and +dmz_reclaim_rnd_data() return error codes to the caller. + +dmz_reclaim() function is renamed to dmz_do_reclaim() to avoid +clashing with "struct dmz_reclaim" and is modified to return the +error to the caller. + +dmz_get_zone_for_reclaim() now returns an error instead of NULL +pointer and reclaim code checks for that error. + +Error logging/debug messages are added where necessary. + +Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Fomichev +Reviewed-by: Damien Le Moal +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-zoned-metadata.c | 4 ++-- + drivers/md/dm-zoned-reclaim.c | 28 +++++++++++++++++++--------- + 2 files changed, 21 insertions(+), 11 deletions(-) + +--- a/drivers/md/dm-zoned-metadata.c ++++ b/drivers/md/dm-zoned-metadata.c +@@ -1534,7 +1534,7 @@ static struct dm_zone *dmz_get_rnd_zone_ + struct dm_zone *zone; + + if (list_empty(&zmd->map_rnd_list)) +- return NULL; ++ return ERR_PTR(-EBUSY); + + list_for_each_entry(zone, &zmd->map_rnd_list, link) { + if (dmz_is_buf(zone)) +@@ -1545,7 +1545,7 @@ static struct dm_zone *dmz_get_rnd_zone_ + return dzone; + } + +- return NULL; ++ return ERR_PTR(-EBUSY); + } + + /* +--- a/drivers/md/dm-zoned-reclaim.c ++++ b/drivers/md/dm-zoned-reclaim.c +@@ -215,7 +215,7 @@ static int dmz_reclaim_buf(struct dmz_re + + dmz_unlock_flush(zmd); + +- return 0; ++ return ret; + } + + /* +@@ -259,7 +259,7 @@ static int dmz_reclaim_seq_data(struct d + + dmz_unlock_flush(zmd); + +- return 0; ++ return ret; + } + + /* +@@ -312,7 +312,7 @@ static int dmz_reclaim_rnd_data(struct d + + dmz_unlock_flush(zmd); + +- return 0; ++ return ret; + } + + /* +@@ -334,7 +334,7 @@ static void dmz_reclaim_empty(struct dmz + /* + * Find a candidate zone for reclaim and process it. + */ +-static void dmz_reclaim(struct dmz_reclaim *zrc) ++static int dmz_do_reclaim(struct dmz_reclaim *zrc) + { + struct dmz_metadata *zmd = zrc->metadata; + struct dm_zone *dzone; +@@ -344,8 +344,8 @@ static void dmz_reclaim(struct dmz_recla + + /* Get a data zone */ + dzone = dmz_get_zone_for_reclaim(zmd); +- if (!dzone) +- return; ++ if (IS_ERR(dzone)) ++ return PTR_ERR(dzone); + + start = jiffies; + +@@ -391,13 +391,20 @@ static void dmz_reclaim(struct dmz_recla + out: + if (ret) { + dmz_unlock_zone_reclaim(dzone); +- return; ++ return ret; + } + +- (void) dmz_flush_metadata(zrc->metadata); ++ ret = dmz_flush_metadata(zrc->metadata); ++ if (ret) { ++ dmz_dev_debug(zrc->dev, ++ "Metadata flush for zone %u failed, err %d\n", ++ dmz_id(zmd, rzone), ret); ++ return ret; ++ } + + dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", + dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); ++ return 0; + } + + /* +@@ -442,6 +449,7 @@ static void dmz_reclaim_work(struct work + struct dmz_metadata *zmd = zrc->metadata; + unsigned int nr_rnd, nr_unmap_rnd; + unsigned int p_unmap_rnd; ++ int ret; + + if (!dmz_should_reclaim(zrc)) { + mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); +@@ -471,7 +479,9 @@ static void dmz_reclaim_work(struct work + (dmz_target_idle(zrc) ? "Idle" : "Busy"), + p_unmap_rnd, nr_unmap_rnd, nr_rnd); + +- dmz_reclaim(zrc); ++ ret = dmz_do_reclaim(zrc); ++ if (ret) ++ dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); + + dmz_schedule_reclaim(zrc); + } diff --git a/queue-5.2/dm-zoned-properly-handle-backing-device-failure.patch b/queue-5.2/dm-zoned-properly-handle-backing-device-failure.patch new file mode 100644 index 00000000000..067bc8aea64 --- /dev/null +++ b/queue-5.2/dm-zoned-properly-handle-backing-device-failure.patch @@ -0,0 +1,360 @@ +From 75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 Mon Sep 17 00:00:00 2001 +From: Dmitry Fomichev +Date: Sat, 10 Aug 2019 14:43:11 -0700 +Subject: dm zoned: properly handle backing device failure + +From: Dmitry Fomichev + +commit 75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 upstream. + +dm-zoned is observed to lock up or livelock in case of hardware +failure or some misconfiguration of the backing zoned device. + +This patch adds a new dm-zoned target function that checks the status of +the backing device. If the request queue of the backing device is found +to be in dying state or the SCSI backing device enters offline state, +the health check code sets a dm-zoned target flag prompting all further +incoming I/O to be rejected. In order to detect backing device failures +timely, this new function is called in the request mapping path, at the +beginning of every reclaim run and before performing any metadata I/O. + +The proper way out of this situation is to do + +dmsetup remove + +and recreate the target when the problem with the backing device +is resolved. + +Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Fomichev +Reviewed-by: Damien Le Moal +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-zoned-metadata.c | 51 ++++++++++++++++++++++++++++++++--------- + drivers/md/dm-zoned-reclaim.c | 18 ++++++++++++-- + drivers/md/dm-zoned-target.c | 45 ++++++++++++++++++++++++++++++++++-- + drivers/md/dm-zoned.h | 10 ++++++++ + 4 files changed, 110 insertions(+), 14 deletions(-) + +--- a/drivers/md/dm-zoned-metadata.c ++++ b/drivers/md/dm-zoned-metadata.c +@@ -401,15 +401,18 @@ static struct dmz_mblock *dmz_get_mblock + sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; + struct bio *bio; + ++ if (dmz_bdev_is_dying(zmd->dev)) ++ return ERR_PTR(-EIO); ++ + /* Get a new block and a BIO to read it */ + mblk = dmz_alloc_mblock(zmd, mblk_no); + if (!mblk) +- return NULL; ++ return ERR_PTR(-ENOMEM); + + bio = bio_alloc(GFP_NOIO, 1); + if (!bio) { + dmz_free_mblock(zmd, mblk); +- return NULL; ++ return ERR_PTR(-ENOMEM); + } + + spin_lock(&zmd->mblk_lock); +@@ -540,8 +543,8 @@ static struct dmz_mblock *dmz_get_mblock + if (!mblk) { + /* Cache miss: read the block from disk */ + mblk = dmz_get_mblock_slow(zmd, mblk_no); +- if (!mblk) +- return ERR_PTR(-ENOMEM); ++ if (IS_ERR(mblk)) ++ return mblk; + } + + /* Wait for on-going read I/O and check for error */ +@@ -569,16 +572,19 @@ static void dmz_dirty_mblock(struct dmz_ + /* + * Issue a metadata block write BIO. + */ +-static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, +- unsigned int set) ++static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, ++ unsigned int set) + { + sector_t block = zmd->sb[set].block + mblk->no; + struct bio *bio; + ++ if (dmz_bdev_is_dying(zmd->dev)) ++ return -EIO; ++ + bio = bio_alloc(GFP_NOIO, 1); + if (!bio) { + set_bit(DMZ_META_ERROR, &mblk->state); +- return; ++ return -ENOMEM; + } + + set_bit(DMZ_META_WRITING, &mblk->state); +@@ -590,6 +596,8 @@ static void dmz_write_mblock(struct dmz_ + bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); + bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); + submit_bio(bio); ++ ++ return 0; + } + + /* +@@ -601,6 +609,9 @@ static int dmz_rdwr_block(struct dmz_met + struct bio *bio; + int ret; + ++ if (dmz_bdev_is_dying(zmd->dev)) ++ return -EIO; ++ + bio = bio_alloc(GFP_NOIO, 1); + if (!bio) + return -ENOMEM; +@@ -658,22 +669,29 @@ static int dmz_write_dirty_mblocks(struc + { + struct dmz_mblock *mblk; + struct blk_plug plug; +- int ret = 0; ++ int ret = 0, nr_mblks_submitted = 0; + + /* Issue writes */ + blk_start_plug(&plug); +- list_for_each_entry(mblk, write_list, link) +- dmz_write_mblock(zmd, mblk, set); ++ list_for_each_entry(mblk, write_list, link) { ++ ret = dmz_write_mblock(zmd, mblk, set); ++ if (ret) ++ break; ++ nr_mblks_submitted++; ++ } + blk_finish_plug(&plug); + + /* Wait for completion */ + list_for_each_entry(mblk, write_list, link) { ++ if (!nr_mblks_submitted) ++ break; + wait_on_bit_io(&mblk->state, DMZ_META_WRITING, + TASK_UNINTERRUPTIBLE); + if (test_bit(DMZ_META_ERROR, &mblk->state)) { + clear_bit(DMZ_META_ERROR, &mblk->state); + ret = -EIO; + } ++ nr_mblks_submitted--; + } + + /* Flush drive cache (this will also sync data) */ +@@ -735,6 +753,11 @@ int dmz_flush_metadata(struct dmz_metada + */ + dmz_lock_flush(zmd); + ++ if (dmz_bdev_is_dying(zmd->dev)) { ++ ret = -EIO; ++ goto out; ++ } ++ + /* Get dirty blocks */ + spin_lock(&zmd->mblk_lock); + list_splice_init(&zmd->mblk_dirty_list, &write_list); +@@ -1623,6 +1646,10 @@ again: + /* Alloate a random zone */ + dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + if (!dzone) { ++ if (dmz_bdev_is_dying(zmd->dev)) { ++ dzone = ERR_PTR(-EIO); ++ goto out; ++ } + dmz_wait_for_free_zones(zmd); + goto again; + } +@@ -1720,6 +1747,10 @@ again: + /* Alloate a random zone */ + bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + if (!bzone) { ++ if (dmz_bdev_is_dying(zmd->dev)) { ++ bzone = ERR_PTR(-EIO); ++ goto out; ++ } + dmz_wait_for_free_zones(zmd); + goto again; + } +--- a/drivers/md/dm-zoned-reclaim.c ++++ b/drivers/md/dm-zoned-reclaim.c +@@ -37,7 +37,7 @@ enum { + /* + * Number of seconds of target BIO inactivity to consider the target idle. + */ +-#define DMZ_IDLE_PERIOD (10UL * HZ) ++#define DMZ_IDLE_PERIOD (10UL * HZ) + + /* + * Percentage of unmapped (free) random zones below which reclaim starts +@@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_r + set_bit(DM_KCOPYD_WRITE_SEQ, &flags); + + while (block < end_block) { ++ if (dev->flags & DMZ_BDEV_DYING) ++ return -EIO; ++ + /* Get a valid region from the source zone */ + ret = dmz_first_valid_block(zmd, src_zone, &block); + if (ret <= 0) +@@ -451,6 +454,9 @@ static void dmz_reclaim_work(struct work + unsigned int p_unmap_rnd; + int ret; + ++ if (dmz_bdev_is_dying(zrc->dev)) ++ return; ++ + if (!dmz_should_reclaim(zrc)) { + mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); + return; +@@ -480,8 +486,16 @@ static void dmz_reclaim_work(struct work + p_unmap_rnd, nr_unmap_rnd, nr_rnd); + + ret = dmz_do_reclaim(zrc); +- if (ret) ++ if (ret) { + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); ++ if (ret == -EIO) ++ /* ++ * LLD might be performing some error handling sequence ++ * at the underlying device. To not interfere, do not ++ * attempt to schedule the next reclaim run immediately. ++ */ ++ return; ++ } + + dmz_schedule_reclaim(zrc); + } +--- a/drivers/md/dm-zoned-target.c ++++ b/drivers/md/dm-zoned-target.c +@@ -133,6 +133,8 @@ static int dmz_submit_bio(struct dmz_tar + + refcount_inc(&bioctx->ref); + generic_make_request(clone); ++ if (clone->bi_status == BLK_STS_IOERR) ++ return -EIO; + + if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) + zone->wp_block += nr_blocks; +@@ -277,8 +279,8 @@ static int dmz_handle_buffered_write(str + + /* Get the buffer zone. One will be allocated if needed */ + bzone = dmz_get_chunk_buffer(zmd, zone); +- if (!bzone) +- return -ENOSPC; ++ if (IS_ERR(bzone)) ++ return PTR_ERR(bzone); + + if (dmz_is_readonly(bzone)) + return -EROFS; +@@ -389,6 +391,11 @@ static void dmz_handle_bio(struct dmz_ta + + dmz_lock_metadata(zmd); + ++ if (dmz->dev->flags & DMZ_BDEV_DYING) { ++ ret = -EIO; ++ goto out; ++ } ++ + /* + * Get the data zone mapping the chunk. There may be no + * mapping for read and discard. If a mapping is obtained, +@@ -493,6 +500,8 @@ static void dmz_flush_work(struct work_s + + /* Flush dirty metadata blocks */ + ret = dmz_flush_metadata(dmz->metadata); ++ if (ret) ++ dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret); + + /* Process queued flush requests */ + while (1) { +@@ -557,6 +566,32 @@ out: + } + + /* ++ * Check the backing device availability. If it's on the way out, ++ * start failing I/O. Reclaim and metadata components also call this ++ * function to cleanly abort operation in the event of such failure. ++ */ ++bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) ++{ ++ struct gendisk *disk; ++ ++ if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { ++ disk = dmz_dev->bdev->bd_disk; ++ if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { ++ dmz_dev_warn(dmz_dev, "Backing device queue dying"); ++ dmz_dev->flags |= DMZ_BDEV_DYING; ++ } else if (disk->fops->check_events) { ++ if (disk->fops->check_events(disk, 0) & ++ DISK_EVENT_MEDIA_CHANGE) { ++ dmz_dev_warn(dmz_dev, "Backing device offline"); ++ dmz_dev->flags |= DMZ_BDEV_DYING; ++ } ++ } ++ } ++ ++ return dmz_dev->flags & DMZ_BDEV_DYING; ++} ++ ++/* + * Process a new BIO. + */ + static int dmz_map(struct dm_target *ti, struct bio *bio) +@@ -569,6 +604,9 @@ static int dmz_map(struct dm_target *ti, + sector_t chunk_sector; + int ret; + ++ if (dmz_bdev_is_dying(dmz->dev)) ++ return DM_MAPIO_KILL; ++ + dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", + bio_op(bio), (unsigned long long)sector, nr_sectors, + (unsigned long long)dmz_bio_chunk(dmz->dev, bio), +@@ -865,6 +903,9 @@ static int dmz_prepare_ioctl(struct dm_t + { + struct dmz_target *dmz = ti->private; + ++ if (dmz_bdev_is_dying(dmz->dev)) ++ return -ENODEV; ++ + *bdev = dmz->dev->bdev; + + return 0; +--- a/drivers/md/dm-zoned.h ++++ b/drivers/md/dm-zoned.h +@@ -56,6 +56,8 @@ struct dmz_dev { + + unsigned int nr_zones; + ++ unsigned int flags; ++ + sector_t zone_nr_sectors; + unsigned int zone_nr_sectors_shift; + +@@ -67,6 +69,9 @@ struct dmz_dev { + (dev)->zone_nr_sectors_shift) + #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) + ++/* Device flags. */ ++#define DMZ_BDEV_DYING (1 << 0) ++ + /* + * Zone descriptor. + */ +@@ -245,4 +250,9 @@ void dmz_resume_reclaim(struct dmz_recla + void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); + void dmz_schedule_reclaim(struct dmz_reclaim *zrc); + ++/* ++ * Functions defined in dm-zoned-target.c ++ */ ++bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); ++ + #endif /* DM_ZONED_H */ diff --git a/queue-5.2/genirq-properly-pair-kobject_del-with-kobject_add.patch b/queue-5.2/genirq-properly-pair-kobject_del-with-kobject_add.patch new file mode 100644 index 00000000000..e6547feaae1 --- /dev/null +++ b/queue-5.2/genirq-properly-pair-kobject_del-with-kobject_add.patch @@ -0,0 +1,73 @@ +From d0ff14fdc987303aeeb7de6f1bd72c3749ae2a9b Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Thu, 1 Aug 2019 23:53:53 +0000 +Subject: genirq: Properly pair kobject_del() with kobject_add() + +From: Michael Kelley + +commit d0ff14fdc987303aeeb7de6f1bd72c3749ae2a9b upstream. + +If alloc_descs() fails before irq_sysfs_init() has run, free_desc() in the +cleanup path will call kobject_del() even though the kobject has not been +added with kobject_add(). + +Fix this by making the call to kobject_del() conditional on whether +irq_sysfs_init() has run. + +This problem surfaced because commit aa30f47cf666 ("kobject: Add support +for default attribute groups to kobj_type") makes kobject_del() stricter +about pairing with kobject_add(). If the pairing is incorrrect, a WARNING +and backtrace occur in sysfs_remove_group() because there is no parent. + +[ tglx: Add a comment to the code and make it work with CONFIG_SYSFS=n ] + +Fixes: ecb3f394c5db ("genirq: Expose interrupt information through sysfs") +Signed-off-by: Michael Kelley +Signed-off-by: Thomas Gleixner +Acked-by: Greg Kroah-Hartman +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1564703564-4116-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/irqdesc.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/kernel/irq/irqdesc.c ++++ b/kernel/irq/irqdesc.c +@@ -295,6 +295,18 @@ static void irq_sysfs_add(int irq, struc + } + } + ++static void irq_sysfs_del(struct irq_desc *desc) ++{ ++ /* ++ * If irq_sysfs_init() has not yet been invoked (early boot), then ++ * irq_kobj_base is NULL and the descriptor was never added. ++ * kobject_del() complains about a object with no parent, so make ++ * it conditional. ++ */ ++ if (irq_kobj_base) ++ kobject_del(&desc->kobj); ++} ++ + static int __init irq_sysfs_init(void) + { + struct irq_desc *desc; +@@ -325,6 +337,7 @@ static struct kobj_type irq_kobj_type = + }; + + static void irq_sysfs_add(int irq, struct irq_desc *desc) {} ++static void irq_sysfs_del(struct irq_desc *desc) {} + + #endif /* CONFIG_SYSFS */ + +@@ -438,7 +451,7 @@ static void free_desc(unsigned int irq) + * The sysfs entry must be serialized against a concurrent + * irq_sysfs_init() as well. + */ +- kobject_del(&desc->kobj); ++ irq_sysfs_del(desc); + delete_irq_desc(irq); + + /* diff --git a/queue-5.2/mm-z3fold.c-fix-race-between-migration-and-destruction.patch b/queue-5.2/mm-z3fold.c-fix-race-between-migration-and-destruction.patch new file mode 100644 index 00000000000..ba87ab8438b --- /dev/null +++ b/queue-5.2/mm-z3fold.c-fix-race-between-migration-and-destruction.patch @@ -0,0 +1,198 @@ +From d776aaa9895eb6eb770908e899cb7f5bd5025b3c Mon Sep 17 00:00:00 2001 +From: Henry Burns +Date: Sat, 24 Aug 2019 17:54:37 -0700 +Subject: mm/z3fold.c: fix race between migration and destruction + +From: Henry Burns + +commit d776aaa9895eb6eb770908e899cb7f5bd5025b3c upstream. + +In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq). +However, we have no guarantee that migration isn't happening in the +background at that time. + +Migration directly calls queue_work_on(pool->compact_wq), if destruction +wins that race we are using a destroyed workqueue. + +Link: http://lkml.kernel.org/r/20190809213828.202833-1-henryburns@google.com +Signed-off-by: Henry Burns +Cc: Vitaly Wool +Cc: Shakeel Butt +Cc: Jonathan Adams +Cc: Henry Burns +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/z3fold.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +--- a/mm/z3fold.c ++++ b/mm/z3fold.c +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -144,6 +145,8 @@ struct z3fold_header { + * @release_wq: workqueue for safe page release + * @work: work_struct for safe page release + * @inode: inode for z3fold pseudo filesystem ++ * @destroying: bool to stop migration once we start destruction ++ * @isolated: int to count the number of pages currently in isolation + * + * This structure is allocated at pool creation time and maintains metadata + * pertaining to a particular z3fold pool. +@@ -162,8 +165,11 @@ struct z3fold_pool { + const struct zpool_ops *zpool_ops; + struct workqueue_struct *compact_wq; + struct workqueue_struct *release_wq; ++ struct wait_queue_head isolate_wait; + struct work_struct work; + struct inode *inode; ++ bool destroying; ++ int isolated; + }; + + /* +@@ -771,6 +777,7 @@ static struct z3fold_pool *z3fold_create + goto out_c; + spin_lock_init(&pool->lock); + spin_lock_init(&pool->stale_lock); ++ init_waitqueue_head(&pool->isolate_wait); + pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); + if (!pool->unbuddied) + goto out_pool; +@@ -810,6 +817,15 @@ out: + return NULL; + } + ++static bool pool_isolated_are_drained(struct z3fold_pool *pool) ++{ ++ bool ret; ++ ++ spin_lock(&pool->lock); ++ ret = pool->isolated == 0; ++ spin_unlock(&pool->lock); ++ return ret; ++} + /** + * z3fold_destroy_pool() - destroys an existing z3fold pool + * @pool: the z3fold pool to be destroyed +@@ -819,6 +835,22 @@ out: + static void z3fold_destroy_pool(struct z3fold_pool *pool) + { + kmem_cache_destroy(pool->c_handle); ++ /* ++ * We set pool-> destroying under lock to ensure that ++ * z3fold_page_isolate() sees any changes to destroying. This way we ++ * avoid the need for any memory barriers. ++ */ ++ ++ spin_lock(&pool->lock); ++ pool->destroying = true; ++ spin_unlock(&pool->lock); ++ ++ /* ++ * We need to ensure that no pages are being migrated while we destroy ++ * these workqueues, as migration can queue work on either of the ++ * workqueues. ++ */ ++ wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); + + /* + * We need to destroy pool->compact_wq before pool->release_wq, +@@ -1309,6 +1341,28 @@ static u64 z3fold_get_pool_size(struct z + return atomic64_read(&pool->pages_nr); + } + ++/* ++ * z3fold_dec_isolated() expects to be called while pool->lock is held. ++ */ ++static void z3fold_dec_isolated(struct z3fold_pool *pool) ++{ ++ assert_spin_locked(&pool->lock); ++ VM_BUG_ON(pool->isolated <= 0); ++ pool->isolated--; ++ ++ /* ++ * If we have no more isolated pages, we have to see if ++ * z3fold_destroy_pool() is waiting for a signal. ++ */ ++ if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait)) ++ wake_up_all(&pool->isolate_wait); ++} ++ ++static void z3fold_inc_isolated(struct z3fold_pool *pool) ++{ ++ pool->isolated++; ++} ++ + static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) + { + struct z3fold_header *zhdr; +@@ -1335,6 +1389,33 @@ static bool z3fold_page_isolate(struct p + spin_lock(&pool->lock); + if (!list_empty(&page->lru)) + list_del(&page->lru); ++ /* ++ * We need to check for destruction while holding pool->lock, as ++ * otherwise destruction could see 0 isolated pages, and ++ * proceed. ++ */ ++ if (unlikely(pool->destroying)) { ++ spin_unlock(&pool->lock); ++ /* ++ * If this page isn't stale, somebody else holds a ++ * reference to it. Let't drop our refcount so that they ++ * can call the release logic. ++ */ ++ if (unlikely(kref_put(&zhdr->refcount, ++ release_z3fold_page_locked))) { ++ /* ++ * If we get here we have kref problems, so we ++ * should freak out. ++ */ ++ WARN(1, "Z3fold is experiencing kref problems\n"); ++ return false; ++ } ++ z3fold_page_unlock(zhdr); ++ return false; ++ } ++ ++ ++ z3fold_inc_isolated(pool); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); + return true; +@@ -1408,6 +1489,10 @@ static int z3fold_page_migrate(struct ad + + queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); + ++ spin_lock(&pool->lock); ++ z3fold_dec_isolated(pool); ++ spin_unlock(&pool->lock); ++ + page_mapcount_reset(page); + unlock_page(page); + put_page(page); +@@ -1428,10 +1513,14 @@ static void z3fold_page_putback(struct p + INIT_LIST_HEAD(&page->lru); + if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { + atomic64_dec(&pool->pages_nr); ++ spin_lock(&pool->lock); ++ z3fold_dec_isolated(pool); ++ spin_unlock(&pool->lock); + return; + } + spin_lock(&pool->lock); + list_add(&page->lru, &pool->lru); ++ z3fold_dec_isolated(pool); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); + } diff --git a/queue-5.2/series b/queue-5.2/series index de6040997a2..d7f5edbc535 100644 --- a/queue-5.2/series +++ b/queue-5.2/series @@ -139,3 +139,8 @@ dm-integrity-fix-a-crash-due-to-bug_on-in-__journal_read_write.patch dm-raid-add-missing-cleanup-in-raid_ctr.patch dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch +dm-zoned-improve-error-handling-in-reclaim.patch +dm-zoned-improve-error-handling-in-i-o-map-code.patch +dm-zoned-properly-handle-backing-device-failure.patch +genirq-properly-pair-kobject_del-with-kobject_add.patch +mm-z3fold.c-fix-race-between-migration-and-destruction.patch