From: Greg Kroah-Hartman Date: Thu, 19 Dec 2019 11:57:24 +0000 (+0100) Subject: drop dm-thin patches from all but 5.4 X-Git-Tag: v4.4.207~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=62bb5f6be4a50e7fde757a18a961cf5b596a331e;p=thirdparty%2Fkernel%2Fstable-queue.git drop dm-thin patches from all but 5.4 --- diff --git a/queue-4.14/dm-thin-flush-data-device-before-committing-metadata.patch b/queue-4.14/dm-thin-flush-data-device-before-committing-metadata.patch deleted file mode 100644 index 19c5cfb0b91..00000000000 --- a/queue-4.14/dm-thin-flush-data-device-before-committing-metadata.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 694cfe7f31db36912725e63a38a5179c8628a496 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:42 +0200 -Subject: dm thin: Flush data device before committing metadata - -From: Nikos Tsironis - -commit 694cfe7f31db36912725e63a38a5179c8628a496 upstream. - -The thin provisioning target maintains per thin device mappings that map -virtual blocks to data blocks in the data device. - -When we write to a shared block, in case of internal snapshots, or -provision a new block, in case of external snapshots, we copy the shared -block to a new data block (COW), update the mapping for the relevant -virtual block and then issue the write to the new data block. - -Suppose the data device has a volatile write-back cache and the -following sequence of events occur: - -1. We write to a shared block -2. A new data block is allocated -3. We copy the shared block to the new data block using kcopyd (COW) -4. We insert the new mapping for the virtual block in the btree for that - thin device. -5. The commit timeout expires and we commit the metadata, that now - includes the new mapping from step (4). -6. The system crashes and the data device's cache has not been flushed, - meaning that the COWed data are lost. - -The next time we read that virtual block of the thin device we read it -from the data block allocated in step (2), since the metadata have been -successfully committed. The data are lost due to the crash, so we read -garbage instead of the old, shared data. - -This has the following implications: - -1. In case of writes to shared blocks, with size smaller than the pool's - block size (which means we first copy the whole block and then issue - the smaller write), we corrupt data that the user never touched. - -2. In case of writes to shared blocks, with size equal to the device's - logical block size, we fail to provide atomic sector writes. When the - system recovers the user will read garbage from that sector instead - of the old data or the new data. - -3. Even for writes to shared blocks, with size equal to the pool's block - size (overwrites), after the system recovers, the written sectors - will contain garbage instead of a random mix of sectors containing - either old data or new data, thus we fail again to provide atomic - sectors writes. - -4. Even when the user flushes the thin device, because we first commit - the metadata and then pass down the flush, the same risk for - corruption exists (if the system crashes after the metadata have been - committed but before the flush is passed down to the data device.) - -The only case which is unaffected is that of writes with size equal to -the pool's block size and with the FUA flag set. But, because FUA writes -trigger metadata commits, this case can trigger the corruption -indirectly. - -Moreover, apart from internal and external snapshots, the same issue -exists for newly provisioned blocks, when block zeroing is enabled. -After the system recovers the provisioned blocks might contain garbage -instead of zeroes. - -To solve this and avoid the potential data corruption we flush the -pool's data device **before** committing its metadata. - -This ensures that the data blocks of any newly inserted mappings are -properly written to non-volatile storage and won't be lost in case of a -crash. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - ---- a/drivers/md/dm-thin.c -+++ b/drivers/md/dm-thin.c -@@ -327,6 +327,7 @@ struct pool_c { - dm_block_t low_water_blocks; - struct pool_features requested_pf; /* Features requested during table load */ - struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ -+ struct bio flush_bio; - }; - - /* -@@ -2403,8 +2404,16 @@ static void process_deferred_bios(struct - while ((bio = bio_list_pop(&bio_completions))) - bio_endio(bio); - -- while ((bio = bio_list_pop(&bios))) -- generic_make_request(bio); -+ while ((bio = bio_list_pop(&bios))) { -+ /* -+ * The data device was flushed as part of metadata commit, -+ * so complete redundant flushes immediately. -+ */ -+ if (bio->bi_opf & REQ_PREFLUSH) -+ bio_endio(bio); -+ else -+ generic_make_request(bio); -+ } - } - - static void do_worker(struct work_struct *ws) -@@ -3136,6 +3145,7 @@ static void pool_dtr(struct dm_target *t - __pool_dec(pt->pool); - dm_put_device(ti, pt->metadata_dev); - dm_put_device(ti, pt->data_dev); -+ bio_uninit(&pt->flush_bio); - kfree(pt); - - mutex_unlock(&dm_thin_pool_table.mutex); -@@ -3201,6 +3211,29 @@ static void metadata_low_callback(void * - dm_table_event(pool->ti->table); - } - -+/* -+ * We need to flush the data device **before** committing the metadata. -+ * -+ * This ensures that the data blocks of any newly inserted mappings are -+ * properly written to non-volatile storage and won't be lost in case of a -+ * crash. -+ * -+ * Failure to do so can result in data corruption in the case of internal or -+ * external snapshots and in the case of newly provisioned blocks, when block -+ * zeroing is enabled. -+ */ -+static int metadata_pre_commit_callback(void *context) -+{ -+ struct pool_c *pt = context; -+ struct bio *flush_bio = &pt->flush_bio; -+ -+ bio_reset(flush_bio); -+ bio_set_dev(flush_bio, pt->data_dev->bdev); -+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; -+ -+ return submit_bio_wait(flush_bio); -+} -+ - static sector_t get_dev_size(struct block_device *bdev) - { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; -@@ -3369,6 +3402,7 @@ static int pool_ctr(struct dm_target *ti - pt->data_dev = data_dev; - pt->low_water_blocks = low_water_blocks; - pt->adjusted_pf = pt->requested_pf = pf; -+ bio_init(&pt->flush_bio, NULL, 0); - ti->num_flush_bios = 1; - - /* -@@ -3395,6 +3429,10 @@ static int pool_ctr(struct dm_target *ti - if (r) - goto out_flags_changed; - -+ dm_pool_register_pre_commit_callback(pt->pool->pmd, -+ metadata_pre_commit_callback, -+ pt); -+ - pt->callbacks.congested_fn = pool_is_congested; - dm_table_add_target_callbacks(ti->table, &pt->callbacks); - diff --git a/queue-4.14/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch b/queue-4.14/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch deleted file mode 100644 index d63b633a70e..00000000000 --- a/queue-4.14/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch +++ /dev/null @@ -1,101 +0,0 @@ -From ecda7c0280e6b3398459dc589b9a41c1adb45529 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:41 +0200 -Subject: dm thin metadata: Add support for a pre-commit callback - -From: Nikos Tsironis - -commit ecda7c0280e6b3398459dc589b9a41c1adb45529 upstream. - -Add support for one pre-commit callback which is run right before the -metadata are committed. - -This allows the thin provisioning target to run a callback before the -metadata are committed and is required by the next commit. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin-metadata.c | 29 +++++++++++++++++++++++++++++ - drivers/md/dm-thin-metadata.h | 7 +++++++ - 2 files changed, 36 insertions(+) - ---- a/drivers/md/dm-thin-metadata.c -+++ b/drivers/md/dm-thin-metadata.c -@@ -189,6 +189,15 @@ struct dm_pool_metadata { - sector_t data_block_size; - - /* -+ * Pre-commit callback. -+ * -+ * This allows the thin provisioning target to run a callback before -+ * the metadata are committed. -+ */ -+ dm_pool_pre_commit_fn pre_commit_fn; -+ void *pre_commit_context; -+ -+ /* - * We reserve a section of the metadata for commit overhead. - * All reported space does *not* include this. - */ -@@ -791,6 +800,14 @@ static int __commit_transaction(struct d - */ - BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); - -+ if (pmd->pre_commit_fn) { -+ r = pmd->pre_commit_fn(pmd->pre_commit_context); -+ if (r < 0) { -+ DMERR("pre-commit callback failed"); -+ return r; -+ } -+ } -+ - r = __write_changed_details(pmd); - if (r < 0) - return r; -@@ -864,6 +881,8 @@ struct dm_pool_metadata *dm_pool_metadat - pmd->fail_io = false; - pmd->bdev = bdev; - pmd->data_block_size = data_block_size; -+ pmd->pre_commit_fn = NULL; -+ pmd->pre_commit_context = NULL; - - r = __create_persistent_data_objects(pmd, format_device); - if (r) { -@@ -2008,6 +2027,16 @@ int dm_pool_register_metadata_threshold( - return r; - } - -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context) -+{ -+ pmd_write_lock_in_core(pmd); -+ pmd->pre_commit_fn = fn; -+ pmd->pre_commit_context = context; -+ pmd_write_unlock(pmd); -+} -+ - int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) - { - int r; ---- a/drivers/md/dm-thin-metadata.h -+++ b/drivers/md/dm-thin-metadata.h -@@ -230,6 +230,13 @@ bool dm_pool_metadata_needs_check(struct - */ - void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd); - -+/* Pre-commit callback */ -+typedef int (*dm_pool_pre_commit_fn)(void *context); -+ -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context); -+ - /*----------------------------------------------------------------*/ - - #endif diff --git a/queue-4.14/series b/queue-4.14/series index 254f0e34b85..ec553913b1b 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -30,7 +30,5 @@ vfio-pci-call-irq_bypass_unregister_producer-before-freeing-irq.patch dma-buf-fix-memory-leak-in-sync_file_merge.patch drm-meson-venc-cvbs-fix-cvbs-mode-matching.patch dm-btree-increase-rebalance-threshold-in-__rebalance2.patch -dm-thin-metadata-add-support-for-a-pre-commit-callback.patch -dm-thin-flush-data-device-before-committing-metadata.patch scsi-iscsi-fix-a-potential-deadlock-in-the-timeout-handler.patch drm-radeon-fix-r1xx-r2xx-register-checker-for-pot-textures.patch diff --git a/queue-4.19/dm-thin-flush-data-device-before-committing-metadata.patch b/queue-4.19/dm-thin-flush-data-device-before-committing-metadata.patch deleted file mode 100644 index ad10792d60c..00000000000 --- a/queue-4.19/dm-thin-flush-data-device-before-committing-metadata.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 694cfe7f31db36912725e63a38a5179c8628a496 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:42 +0200 -Subject: dm thin: Flush data device before committing metadata - -From: Nikos Tsironis - -commit 694cfe7f31db36912725e63a38a5179c8628a496 upstream. - -The thin provisioning target maintains per thin device mappings that map -virtual blocks to data blocks in the data device. - -When we write to a shared block, in case of internal snapshots, or -provision a new block, in case of external snapshots, we copy the shared -block to a new data block (COW), update the mapping for the relevant -virtual block and then issue the write to the new data block. - -Suppose the data device has a volatile write-back cache and the -following sequence of events occur: - -1. We write to a shared block -2. A new data block is allocated -3. We copy the shared block to the new data block using kcopyd (COW) -4. We insert the new mapping for the virtual block in the btree for that - thin device. -5. The commit timeout expires and we commit the metadata, that now - includes the new mapping from step (4). -6. The system crashes and the data device's cache has not been flushed, - meaning that the COWed data are lost. - -The next time we read that virtual block of the thin device we read it -from the data block allocated in step (2), since the metadata have been -successfully committed. The data are lost due to the crash, so we read -garbage instead of the old, shared data. - -This has the following implications: - -1. In case of writes to shared blocks, with size smaller than the pool's - block size (which means we first copy the whole block and then issue - the smaller write), we corrupt data that the user never touched. - -2. In case of writes to shared blocks, with size equal to the device's - logical block size, we fail to provide atomic sector writes. When the - system recovers the user will read garbage from that sector instead - of the old data or the new data. - -3. Even for writes to shared blocks, with size equal to the pool's block - size (overwrites), after the system recovers, the written sectors - will contain garbage instead of a random mix of sectors containing - either old data or new data, thus we fail again to provide atomic - sectors writes. - -4. Even when the user flushes the thin device, because we first commit - the metadata and then pass down the flush, the same risk for - corruption exists (if the system crashes after the metadata have been - committed but before the flush is passed down to the data device.) - -The only case which is unaffected is that of writes with size equal to -the pool's block size and with the FUA flag set. But, because FUA writes -trigger metadata commits, this case can trigger the corruption -indirectly. - -Moreover, apart from internal and external snapshots, the same issue -exists for newly provisioned blocks, when block zeroing is enabled. -After the system recovers the provisioned blocks might contain garbage -instead of zeroes. - -To solve this and avoid the potential data corruption we flush the -pool's data device **before** committing its metadata. - -This ensures that the data blocks of any newly inserted mappings are -properly written to non-volatile storage and won't be lost in case of a -crash. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - ---- a/drivers/md/dm-thin.c -+++ b/drivers/md/dm-thin.c -@@ -328,6 +328,7 @@ struct pool_c { - dm_block_t low_water_blocks; - struct pool_features requested_pf; /* Features requested during table load */ - struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ -+ struct bio flush_bio; - }; - - /* -@@ -2392,8 +2393,16 @@ static void process_deferred_bios(struct - while ((bio = bio_list_pop(&bio_completions))) - bio_endio(bio); - -- while ((bio = bio_list_pop(&bios))) -- generic_make_request(bio); -+ while ((bio = bio_list_pop(&bios))) { -+ /* -+ * The data device was flushed as part of metadata commit, -+ * so complete redundant flushes immediately. -+ */ -+ if (bio->bi_opf & REQ_PREFLUSH) -+ bio_endio(bio); -+ else -+ generic_make_request(bio); -+ } - } - - static void do_worker(struct work_struct *ws) -@@ -3127,6 +3136,7 @@ static void pool_dtr(struct dm_target *t - __pool_dec(pt->pool); - dm_put_device(ti, pt->metadata_dev); - dm_put_device(ti, pt->data_dev); -+ bio_uninit(&pt->flush_bio); - kfree(pt); - - mutex_unlock(&dm_thin_pool_table.mutex); -@@ -3192,6 +3202,29 @@ static void metadata_low_callback(void * - dm_table_event(pool->ti->table); - } - -+/* -+ * We need to flush the data device **before** committing the metadata. -+ * -+ * This ensures that the data blocks of any newly inserted mappings are -+ * properly written to non-volatile storage and won't be lost in case of a -+ * crash. -+ * -+ * Failure to do so can result in data corruption in the case of internal or -+ * external snapshots and in the case of newly provisioned blocks, when block -+ * zeroing is enabled. -+ */ -+static int metadata_pre_commit_callback(void *context) -+{ -+ struct pool_c *pt = context; -+ struct bio *flush_bio = &pt->flush_bio; -+ -+ bio_reset(flush_bio); -+ bio_set_dev(flush_bio, pt->data_dev->bdev); -+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; -+ -+ return submit_bio_wait(flush_bio); -+} -+ - static sector_t get_dev_size(struct block_device *bdev) - { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; -@@ -3360,6 +3393,7 @@ static int pool_ctr(struct dm_target *ti - pt->data_dev = data_dev; - pt->low_water_blocks = low_water_blocks; - pt->adjusted_pf = pt->requested_pf = pf; -+ bio_init(&pt->flush_bio, NULL, 0); - ti->num_flush_bios = 1; - - /* -@@ -3386,6 +3420,10 @@ static int pool_ctr(struct dm_target *ti - if (r) - goto out_flags_changed; - -+ dm_pool_register_pre_commit_callback(pt->pool->pmd, -+ metadata_pre_commit_callback, -+ pt); -+ - pt->callbacks.congested_fn = pool_is_congested; - dm_table_add_target_callbacks(ti->table, &pt->callbacks); - diff --git a/queue-4.19/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch b/queue-4.19/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch deleted file mode 100644 index 9a1da5846c3..00000000000 --- a/queue-4.19/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch +++ /dev/null @@ -1,101 +0,0 @@ -From ecda7c0280e6b3398459dc589b9a41c1adb45529 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:41 +0200 -Subject: dm thin metadata: Add support for a pre-commit callback - -From: Nikos Tsironis - -commit ecda7c0280e6b3398459dc589b9a41c1adb45529 upstream. - -Add support for one pre-commit callback which is run right before the -metadata are committed. - -This allows the thin provisioning target to run a callback before the -metadata are committed and is required by the next commit. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin-metadata.c | 29 +++++++++++++++++++++++++++++ - drivers/md/dm-thin-metadata.h | 7 +++++++ - 2 files changed, 36 insertions(+) - ---- a/drivers/md/dm-thin-metadata.c -+++ b/drivers/md/dm-thin-metadata.c -@@ -189,6 +189,15 @@ struct dm_pool_metadata { - sector_t data_block_size; - - /* -+ * Pre-commit callback. -+ * -+ * This allows the thin provisioning target to run a callback before -+ * the metadata are committed. -+ */ -+ dm_pool_pre_commit_fn pre_commit_fn; -+ void *pre_commit_context; -+ -+ /* - * We reserve a section of the metadata for commit overhead. - * All reported space does *not* include this. - */ -@@ -790,6 +799,14 @@ static int __commit_transaction(struct d - */ - BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); - -+ if (pmd->pre_commit_fn) { -+ r = pmd->pre_commit_fn(pmd->pre_commit_context); -+ if (r < 0) { -+ DMERR("pre-commit callback failed"); -+ return r; -+ } -+ } -+ - r = __write_changed_details(pmd); - if (r < 0) - return r; -@@ -855,6 +872,8 @@ struct dm_pool_metadata *dm_pool_metadat - pmd->fail_io = false; - pmd->bdev = bdev; - pmd->data_block_size = data_block_size; -+ pmd->pre_commit_fn = NULL; -+ pmd->pre_commit_context = NULL; - - r = __create_persistent_data_objects(pmd, format_device); - if (r) { -@@ -1999,6 +2018,16 @@ int dm_pool_register_metadata_threshold( - return r; - } - -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context) -+{ -+ pmd_write_lock_in_core(pmd); -+ pmd->pre_commit_fn = fn; -+ pmd->pre_commit_context = context; -+ pmd_write_unlock(pmd); -+} -+ - int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) - { - int r = -EINVAL; ---- a/drivers/md/dm-thin-metadata.h -+++ b/drivers/md/dm-thin-metadata.h -@@ -230,6 +230,13 @@ bool dm_pool_metadata_needs_check(struct - */ - void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd); - -+/* Pre-commit callback */ -+typedef int (*dm_pool_pre_commit_fn)(void *context); -+ -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context); -+ - /*----------------------------------------------------------------*/ - - #endif diff --git a/queue-4.19/series b/queue-4.19/series index 1a3e9e60063..2eb2acfb86e 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -41,8 +41,6 @@ dma-buf-fix-memory-leak-in-sync_file_merge.patch drm-meson-venc-cvbs-fix-cvbs-mode-matching.patch dm-mpath-remove-harmful-bio-based-optimization.patch dm-btree-increase-rebalance-threshold-in-__rebalance2.patch -dm-thin-metadata-add-support-for-a-pre-commit-callback.patch -dm-thin-flush-data-device-before-committing-metadata.patch scsi-iscsi-fix-a-potential-deadlock-in-the-timeout-handler.patch scsi-qla2xxx-change-discovery-state-before-plogi.patch drm-radeon-fix-r1xx-r2xx-register-checker-for-pot-textures.patch diff --git a/queue-4.4/dm-thin-flush-data-device-before-committing-metadata.patch b/queue-4.4/dm-thin-flush-data-device-before-committing-metadata.patch deleted file mode 100644 index c4775d10d44..00000000000 --- a/queue-4.4/dm-thin-flush-data-device-before-committing-metadata.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 694cfe7f31db36912725e63a38a5179c8628a496 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:42 +0200 -Subject: dm thin: Flush data device before committing metadata - -From: Nikos Tsironis - -commit 694cfe7f31db36912725e63a38a5179c8628a496 upstream. - -The thin provisioning target maintains per thin device mappings that map -virtual blocks to data blocks in the data device. - -When we write to a shared block, in case of internal snapshots, or -provision a new block, in case of external snapshots, we copy the shared -block to a new data block (COW), update the mapping for the relevant -virtual block and then issue the write to the new data block. - -Suppose the data device has a volatile write-back cache and the -following sequence of events occur: - -1. We write to a shared block -2. A new data block is allocated -3. We copy the shared block to the new data block using kcopyd (COW) -4. We insert the new mapping for the virtual block in the btree for that - thin device. -5. The commit timeout expires and we commit the metadata, that now - includes the new mapping from step (4). -6. The system crashes and the data device's cache has not been flushed, - meaning that the COWed data are lost. - -The next time we read that virtual block of the thin device we read it -from the data block allocated in step (2), since the metadata have been -successfully committed. The data are lost due to the crash, so we read -garbage instead of the old, shared data. - -This has the following implications: - -1. In case of writes to shared blocks, with size smaller than the pool's - block size (which means we first copy the whole block and then issue - the smaller write), we corrupt data that the user never touched. - -2. In case of writes to shared blocks, with size equal to the device's - logical block size, we fail to provide atomic sector writes. When the - system recovers the user will read garbage from that sector instead - of the old data or the new data. - -3. Even for writes to shared blocks, with size equal to the pool's block - size (overwrites), after the system recovers, the written sectors - will contain garbage instead of a random mix of sectors containing - either old data or new data, thus we fail again to provide atomic - sectors writes. - -4. Even when the user flushes the thin device, because we first commit - the metadata and then pass down the flush, the same risk for - corruption exists (if the system crashes after the metadata have been - committed but before the flush is passed down to the data device.) - -The only case which is unaffected is that of writes with size equal to -the pool's block size and with the FUA flag set. But, because FUA writes -trigger metadata commits, this case can trigger the corruption -indirectly. - -Moreover, apart from internal and external snapshots, the same issue -exists for newly provisioned blocks, when block zeroing is enabled. -After the system recovers the provisioned blocks might contain garbage -instead of zeroes. - -To solve this and avoid the potential data corruption we flush the -pool's data device **before** committing its metadata. - -This ensures that the data blocks of any newly inserted mappings are -properly written to non-volatile storage and won't be lost in case of a -crash. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - ---- a/drivers/md/dm-thin.c -+++ b/drivers/md/dm-thin.c -@@ -295,6 +295,7 @@ struct pool_c { - dm_block_t low_water_blocks; - struct pool_features requested_pf; /* Features requested during table load */ - struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ -+ struct bio flush_bio; - }; - - /* -@@ -2303,8 +2304,16 @@ static void process_deferred_bios(struct - while ((bio = bio_list_pop(&bio_completions))) - bio_endio(bio); - -- while ((bio = bio_list_pop(&bios))) -- generic_make_request(bio); -+ while ((bio = bio_list_pop(&bios))) { -+ /* -+ * The data device was flushed as part of metadata commit, -+ * so complete redundant flushes immediately. -+ */ -+ if (bio->bi_opf & REQ_PREFLUSH) -+ bio_endio(bio); -+ else -+ generic_make_request(bio); -+ } - } - - static void do_worker(struct work_struct *ws) -@@ -3054,6 +3063,7 @@ static void pool_dtr(struct dm_target *t - __pool_dec(pt->pool); - dm_put_device(ti, pt->metadata_dev); - dm_put_device(ti, pt->data_dev); -+ bio_uninit(&pt->flush_bio); - kfree(pt); - - mutex_unlock(&dm_thin_pool_table.mutex); -@@ -3119,6 +3129,29 @@ static void metadata_low_callback(void * - dm_table_event(pool->ti->table); - } - -+/* -+ * We need to flush the data device **before** committing the metadata. -+ * -+ * This ensures that the data blocks of any newly inserted mappings are -+ * properly written to non-volatile storage and won't be lost in case of a -+ * crash. -+ * -+ * Failure to do so can result in data corruption in the case of internal or -+ * external snapshots and in the case of newly provisioned blocks, when block -+ * zeroing is enabled. -+ */ -+static int metadata_pre_commit_callback(void *context) -+{ -+ struct pool_c *pt = context; -+ struct bio *flush_bio = &pt->flush_bio; -+ -+ bio_reset(flush_bio); -+ bio_set_dev(flush_bio, pt->data_dev->bdev); -+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; -+ -+ return submit_bio_wait(flush_bio); -+} -+ - static sector_t get_dev_size(struct block_device *bdev) - { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; -@@ -3287,6 +3320,7 @@ static int pool_ctr(struct dm_target *ti - pt->data_dev = data_dev; - pt->low_water_blocks = low_water_blocks; - pt->adjusted_pf = pt->requested_pf = pf; -+ bio_init(&pt->flush_bio, NULL, 0); - ti->num_flush_bios = 1; - - /* -@@ -3314,6 +3348,10 @@ static int pool_ctr(struct dm_target *ti - if (r) - goto out_flags_changed; - -+ dm_pool_register_pre_commit_callback(pt->pool->pmd, -+ metadata_pre_commit_callback, -+ pt); -+ - pt->callbacks.congested_fn = pool_is_congested; - dm_table_add_target_callbacks(ti->table, &pt->callbacks); - diff --git a/queue-4.4/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch b/queue-4.4/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch deleted file mode 100644 index 54708e78cd8..00000000000 --- a/queue-4.4/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch +++ /dev/null @@ -1,101 +0,0 @@ -From ecda7c0280e6b3398459dc589b9a41c1adb45529 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:41 +0200 -Subject: dm thin metadata: Add support for a pre-commit callback - -From: Nikos Tsironis - -commit ecda7c0280e6b3398459dc589b9a41c1adb45529 upstream. - -Add support for one pre-commit callback which is run right before the -metadata are committed. - -This allows the thin provisioning target to run a callback before the -metadata are committed and is required by the next commit. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin-metadata.c | 29 +++++++++++++++++++++++++++++ - drivers/md/dm-thin-metadata.h | 7 +++++++ - 2 files changed, 36 insertions(+) - ---- a/drivers/md/dm-thin-metadata.c -+++ b/drivers/md/dm-thin-metadata.c -@@ -190,6 +190,15 @@ struct dm_pool_metadata { - sector_t data_block_size; - - /* -+ * Pre-commit callback. -+ * -+ * This allows the thin provisioning target to run a callback before -+ * the metadata are committed. -+ */ -+ dm_pool_pre_commit_fn pre_commit_fn; -+ void *pre_commit_context; -+ -+ /* - * We reserve a section of the metadata for commit overhead. - * All reported space does *not* include this. - */ -@@ -793,6 +802,14 @@ static int __commit_transaction(struct d - */ - BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); - -+ if (pmd->pre_commit_fn) { -+ r = pmd->pre_commit_fn(pmd->pre_commit_context); -+ if (r < 0) { -+ DMERR("pre-commit callback failed"); -+ return r; -+ } -+ } -+ - r = __write_changed_details(pmd); - if (r < 0) - return r; -@@ -866,6 +883,8 @@ struct dm_pool_metadata *dm_pool_metadat - pmd->fail_io = false; - pmd->bdev = bdev; - pmd->data_block_size = data_block_size; -+ pmd->pre_commit_fn = NULL; -+ pmd->pre_commit_context = NULL; - - r = __create_persistent_data_objects(pmd, format_device); - if (r) { -@@ -1942,6 +1961,16 @@ int dm_pool_register_metadata_threshold( - return r; - } - -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context) -+{ -+ pmd_write_lock_in_core(pmd); -+ pmd->pre_commit_fn = fn; -+ pmd->pre_commit_context = context; -+ pmd_write_unlock(pmd); -+} -+ - int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) - { - int r; ---- a/drivers/md/dm-thin-metadata.h -+++ b/drivers/md/dm-thin-metadata.h -@@ -227,6 +227,13 @@ bool dm_pool_metadata_needs_check(struct - */ - void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd); - -+/* Pre-commit callback */ -+typedef int (*dm_pool_pre_commit_fn)(void *context); -+ -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context); -+ - /*----------------------------------------------------------------*/ - - #endif diff --git a/queue-4.4/series b/queue-4.4/series index 0aec291b3ec..6ad816bebee 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -156,6 +156,4 @@ arm-dts-s3c64xx-fix-init-order-of-clock-providers.patch arm-tegra-fix-flow_ctlr_halt-register-clobbering-by-tegra_resume.patch vfio-pci-call-irq_bypass_unregister_producer-before-freeing-irq.patch dm-btree-increase-rebalance-threshold-in-__rebalance2.patch -dm-thin-metadata-add-support-for-a-pre-commit-callback.patch -dm-thin-flush-data-device-before-committing-metadata.patch drm-radeon-fix-r1xx-r2xx-register-checker-for-pot-textures.patch diff --git a/queue-4.9/dm-thin-flush-data-device-before-committing-metadata.patch b/queue-4.9/dm-thin-flush-data-device-before-committing-metadata.patch deleted file mode 100644 index e8521def518..00000000000 --- a/queue-4.9/dm-thin-flush-data-device-before-committing-metadata.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 694cfe7f31db36912725e63a38a5179c8628a496 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:42 +0200 -Subject: dm thin: Flush data device before committing metadata - -From: Nikos Tsironis - -commit 694cfe7f31db36912725e63a38a5179c8628a496 upstream. - -The thin provisioning target maintains per thin device mappings that map -virtual blocks to data blocks in the data device. - -When we write to a shared block, in case of internal snapshots, or -provision a new block, in case of external snapshots, we copy the shared -block to a new data block (COW), update the mapping for the relevant -virtual block and then issue the write to the new data block. - -Suppose the data device has a volatile write-back cache and the -following sequence of events occur: - -1. We write to a shared block -2. A new data block is allocated -3. We copy the shared block to the new data block using kcopyd (COW) -4. We insert the new mapping for the virtual block in the btree for that - thin device. -5. The commit timeout expires and we commit the metadata, that now - includes the new mapping from step (4). -6. The system crashes and the data device's cache has not been flushed, - meaning that the COWed data are lost. - -The next time we read that virtual block of the thin device we read it -from the data block allocated in step (2), since the metadata have been -successfully committed. The data are lost due to the crash, so we read -garbage instead of the old, shared data. - -This has the following implications: - -1. In case of writes to shared blocks, with size smaller than the pool's - block size (which means we first copy the whole block and then issue - the smaller write), we corrupt data that the user never touched. - -2. In case of writes to shared blocks, with size equal to the device's - logical block size, we fail to provide atomic sector writes. When the - system recovers the user will read garbage from that sector instead - of the old data or the new data. - -3. Even for writes to shared blocks, with size equal to the pool's block - size (overwrites), after the system recovers, the written sectors - will contain garbage instead of a random mix of sectors containing - either old data or new data, thus we fail again to provide atomic - sectors writes. - -4. Even when the user flushes the thin device, because we first commit - the metadata and then pass down the flush, the same risk for - corruption exists (if the system crashes after the metadata have been - committed but before the flush is passed down to the data device.) - -The only case which is unaffected is that of writes with size equal to -the pool's block size and with the FUA flag set. But, because FUA writes -trigger metadata commits, this case can trigger the corruption -indirectly. - -Moreover, apart from internal and external snapshots, the same issue -exists for newly provisioned blocks, when block zeroing is enabled. -After the system recovers the provisioned blocks might contain garbage -instead of zeroes. - -To solve this and avoid the potential data corruption we flush the -pool's data device **before** committing its metadata. - -This ensures that the data blocks of any newly inserted mappings are -properly written to non-volatile storage and won't be lost in case of a -crash. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - ---- a/drivers/md/dm-thin.c -+++ b/drivers/md/dm-thin.c -@@ -298,6 +298,7 @@ struct pool_c { - dm_block_t low_water_blocks; - struct pool_features requested_pf; /* Features requested during table load */ - struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ -+ struct bio flush_bio; - }; - - /* -@@ -2378,8 +2379,16 @@ static void process_deferred_bios(struct - while ((bio = bio_list_pop(&bio_completions))) - bio_endio(bio); - -- while ((bio = bio_list_pop(&bios))) -- generic_make_request(bio); -+ while ((bio = bio_list_pop(&bios))) { -+ /* -+ * The data device was flushed as part of metadata commit, -+ * so complete redundant flushes immediately. -+ */ -+ if (bio->bi_opf & REQ_PREFLUSH) -+ bio_endio(bio); -+ else -+ generic_make_request(bio); -+ } - } - - static void do_worker(struct work_struct *ws) -@@ -3139,6 +3148,7 @@ static void pool_dtr(struct dm_target *t - __pool_dec(pt->pool); - dm_put_device(ti, pt->metadata_dev); - dm_put_device(ti, pt->data_dev); -+ bio_uninit(&pt->flush_bio); - kfree(pt); - - mutex_unlock(&dm_thin_pool_table.mutex); -@@ -3204,6 +3214,29 @@ static void metadata_low_callback(void * - dm_table_event(pool->ti->table); - } - -+/* -+ * We need to flush the data device **before** committing the metadata. -+ * -+ * This ensures that the data blocks of any newly inserted mappings are -+ * properly written to non-volatile storage and won't be lost in case of a -+ * crash. -+ * -+ * Failure to do so can result in data corruption in the case of internal or -+ * external snapshots and in the case of newly provisioned blocks, when block -+ * zeroing is enabled. -+ */ -+static int metadata_pre_commit_callback(void *context) -+{ -+ struct pool_c *pt = context; -+ struct bio *flush_bio = &pt->flush_bio; -+ -+ bio_reset(flush_bio); -+ bio_set_dev(flush_bio, pt->data_dev->bdev); -+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; -+ -+ return submit_bio_wait(flush_bio); -+} -+ - static sector_t get_dev_size(struct block_device *bdev) - { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; -@@ -3372,6 +3405,7 @@ static int pool_ctr(struct dm_target *ti - pt->data_dev = data_dev; - pt->low_water_blocks = low_water_blocks; - pt->adjusted_pf = pt->requested_pf = pf; -+ bio_init(&pt->flush_bio, NULL, 0); - ti->num_flush_bios = 1; - - /* -@@ -3399,6 +3433,10 @@ static int pool_ctr(struct dm_target *ti - if (r) - goto out_flags_changed; - -+ dm_pool_register_pre_commit_callback(pt->pool->pmd, -+ metadata_pre_commit_callback, -+ pt); -+ - pt->callbacks.congested_fn = pool_is_congested; - dm_table_add_target_callbacks(ti->table, &pt->callbacks); - diff --git a/queue-4.9/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch b/queue-4.9/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch deleted file mode 100644 index 418555aa618..00000000000 --- a/queue-4.9/dm-thin-metadata-add-support-for-a-pre-commit-callback.patch +++ /dev/null @@ -1,101 +0,0 @@ -From ecda7c0280e6b3398459dc589b9a41c1adb45529 Mon Sep 17 00:00:00 2001 -From: Nikos Tsironis -Date: Wed, 4 Dec 2019 16:07:41 +0200 -Subject: dm thin metadata: Add support for a pre-commit callback - -From: Nikos Tsironis - -commit ecda7c0280e6b3398459dc589b9a41c1adb45529 upstream. - -Add support for one pre-commit callback which is run right before the -metadata are committed. - -This allows the thin provisioning target to run a callback before the -metadata are committed and is required by the next commit. - -Cc: stable@vger.kernel.org -Signed-off-by: Nikos Tsironis -Acked-by: Joe Thornber -Signed-off-by: Mike Snitzer -Signed-off-by: Greg Kroah-Hartman - ---- - drivers/md/dm-thin-metadata.c | 29 +++++++++++++++++++++++++++++ - drivers/md/dm-thin-metadata.h | 7 +++++++ - 2 files changed, 36 insertions(+) - ---- a/drivers/md/dm-thin-metadata.c -+++ b/drivers/md/dm-thin-metadata.c -@@ -190,6 +190,15 @@ struct dm_pool_metadata { - sector_t data_block_size; - - /* -+ * Pre-commit callback. -+ * -+ * This allows the thin provisioning target to run a callback before -+ * the metadata are committed. -+ */ -+ dm_pool_pre_commit_fn pre_commit_fn; -+ void *pre_commit_context; -+ -+ /* - * We reserve a section of the metadata for commit overhead. - * All reported space does *not* include this. - */ -@@ -793,6 +802,14 @@ static int __commit_transaction(struct d - */ - BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); - -+ if (pmd->pre_commit_fn) { -+ r = pmd->pre_commit_fn(pmd->pre_commit_context); -+ if (r < 0) { -+ DMERR("pre-commit callback failed"); -+ return r; -+ } -+ } -+ - r = __write_changed_details(pmd); - if (r < 0) - return r; -@@ -866,6 +883,8 @@ struct dm_pool_metadata *dm_pool_metadat - pmd->fail_io = false; - pmd->bdev = bdev; - pmd->data_block_size = data_block_size; -+ pmd->pre_commit_fn = NULL; -+ pmd->pre_commit_context = NULL; - - r = __create_persistent_data_objects(pmd, format_device); - if (r) { -@@ -2010,6 +2029,16 @@ int dm_pool_register_metadata_threshold( - return r; - } - -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context) -+{ -+ pmd_write_lock_in_core(pmd); -+ pmd->pre_commit_fn = fn; -+ pmd->pre_commit_context = context; -+ pmd_write_unlock(pmd); -+} -+ - int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) - { - int r; ---- a/drivers/md/dm-thin-metadata.h -+++ b/drivers/md/dm-thin-metadata.h -@@ -230,6 +230,13 @@ bool dm_pool_metadata_needs_check(struct - */ - void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd); - -+/* Pre-commit callback */ -+typedef int (*dm_pool_pre_commit_fn)(void *context); -+ -+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd, -+ dm_pool_pre_commit_fn fn, -+ void *context); -+ - /*----------------------------------------------------------------*/ - - #endif diff --git a/queue-4.9/series b/queue-4.9/series index 3077b03194b..cd383c67e1f 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -192,7 +192,5 @@ arm-tegra-fix-flow_ctlr_halt-register-clobbering-by-tegra_resume.patch vfio-pci-call-irq_bypass_unregister_producer-before-freeing-irq.patch dma-buf-fix-memory-leak-in-sync_file_merge.patch dm-btree-increase-rebalance-threshold-in-__rebalance2.patch -dm-thin-metadata-add-support-for-a-pre-commit-callback.patch -dm-thin-flush-data-device-before-committing-metadata.patch scsi-iscsi-fix-a-potential-deadlock-in-the-timeout-handler.patch drm-radeon-fix-r1xx-r2xx-register-checker-for-pot-textures.patch