From: Greg Kroah-Hartman Date: Sun, 9 Feb 2014 00:10:42 +0000 (-0800) Subject: 3.12-stable patches X-Git-Tag: v3.4.80~59 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=73d6888bed2e102a656f1c5d6a4f6f49b532619c;p=thirdparty%2Fkernel%2Fstable-queue.git 3.12-stable patches added patches: dm-space-map-common-make-sure-new-space-is-used-during-extend.patch dm-space-map-metadata-fix-bug-in-resizing-of-thin-metadata.patch dm-space-map-metadata-fix-extending-the-space-map.patch dm-thin-fix-discard-support-to-a-previously-shared-block.patch dm-thin-fix-set_pool_mode-exposed-pool-operation-races.patch dm-thin-initialize-dm_thin_new_mapping-returned-by-get_next_mapping.patch dm-wait-until-embedded-kobject-is-released-before-destroying-a-device.patch --- diff --git a/queue-3.12/dm-space-map-common-make-sure-new-space-is-used-during-extend.patch b/queue-3.12/dm-space-map-common-make-sure-new-space-is-used-during-extend.patch new file mode 100644 index 00000000000..b6691f03941 --- /dev/null +++ b/queue-3.12/dm-space-map-common-make-sure-new-space-is-used-during-extend.patch @@ -0,0 +1,55 @@ +From 12c91a5c2d2a8e8cc40a9552313e1e7b0a2d9ee3 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 7 Jan 2014 15:47:59 +0000 +Subject: dm space map common: make sure new space is used during extend + +From: Joe Thornber + +commit 12c91a5c2d2a8e8cc40a9552313e1e7b0a2d9ee3 upstream. + +When extending a low level space map we should update nr_blocks at +the start so the new space is used for the index entries. + +Otherwise extend can fail, e.g.: sm_metadata_extend call sequence +that fails: + -> sm_ll_extend + -> dm_tm_new_block -> dm_sm_new_block -> sm_bootstrap_new_block + => returns -ENOSPC because smm->begin == smm->ll.nr_blocks + +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/persistent-data/dm-space-map-common.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/md/persistent-data/dm-space-map-common.c ++++ b/drivers/md/persistent-data/dm-space-map-common.c +@@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_ + return -EINVAL; + } + ++ /* ++ * We need to set this before the dm_tm_new_block() call below. ++ */ ++ ll->nr_blocks = nr_blocks; + for (i = old_blocks; i < blocks; i++) { + struct dm_block *b; + struct disk_index_entry idx; +@@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_ + r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); + if (r < 0) + return r; ++ + idx.blocknr = cpu_to_le64(dm_block_location(b)); + + r = dm_tm_unlock(ll->tm, b); +@@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_ + return r; + } + +- ll->nr_blocks = nr_blocks; + return 0; + } + diff --git a/queue-3.12/dm-space-map-metadata-fix-bug-in-resizing-of-thin-metadata.patch b/queue-3.12/dm-space-map-metadata-fix-bug-in-resizing-of-thin-metadata.patch new file mode 100644 index 00000000000..ccb2d065412 --- /dev/null +++ b/queue-3.12/dm-space-map-metadata-fix-bug-in-resizing-of-thin-metadata.patch @@ -0,0 +1,76 @@ +From fca028438fb903852beaf7c3fe1cd326651af57d Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 21 Jan 2014 11:07:32 +0000 +Subject: dm space map metadata: fix bug in resizing of thin metadata + +From: Joe Thornber + +commit fca028438fb903852beaf7c3fe1cd326651af57d upstream. + +This bug was introduced in commit 7e664b3dec431e ("dm space map metadata: +fix extending the space map"). + +When extending a dm-thin metadata volume we: + +- Switch the space map into a simple bootstrap mode, which allocates + all space linearly from the newly added space. +- Add new bitmap entries for the new space +- Increment the reference counts for those newly allocated bitmap + entries +- Commit changes to disk +- Switch back out of bootstrap mode. + +But, the disk commit may allocate space itself, if so this fact will be +lost when switching out of bootstrap mode. + +The bug exhibited itself as an error when the bitmap_root, with an +erroneous ref count of 0, was subsequently decremented as part of a +later disk commit. This would cause the disk commit to fail, and thinp +to enter read_only mode. The metadata was not damaged (thin_check +passed). + +The fix is to put the increments + commit into a loop, running until +the commit has not allocated extra space. In practise this loop only +runs twice. + +With this fix the following device mapper testsuite test passes: + dmtest run --suite thin-provisioning -n thin_remove_works_after_resize + +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/persistent-data/dm-space-map-metadata.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/md/persistent-data/dm-space-map-metadata.c ++++ b/drivers/md/persistent-data/dm-space-map-metadata.c +@@ -617,13 +617,23 @@ static int sm_metadata_extend(struct dm_ + if (r) + goto out; + +- for (i = old_len; !r && i < smm->begin; i++) { +- r = sm_ll_inc(&smm->ll, i, &ev); ++ /* ++ * We repeatedly increment then commit until the commit doesn't ++ * allocate any new blocks. ++ */ ++ do { ++ for (i = old_len; !r && i < smm->begin; i++) { ++ r = sm_ll_inc(&smm->ll, i, &ev); ++ if (r) ++ goto out; ++ } ++ old_len = smm->begin; ++ ++ r = sm_ll_commit(&smm->ll); + if (r) + goto out; +- } + +- r = sm_metadata_commit(sm); ++ } while (old_len != smm->begin); + + out: + /* diff --git a/queue-3.12/dm-space-map-metadata-fix-extending-the-space-map.patch b/queue-3.12/dm-space-map-metadata-fix-extending-the-space-map.patch new file mode 100644 index 00000000000..3c10aa5568a --- /dev/null +++ b/queue-3.12/dm-space-map-metadata-fix-extending-the-space-map.patch @@ -0,0 +1,65 @@ +From 7e664b3dec431eebf0c5df5ff704d6197634cf35 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 7 Jan 2014 15:49:02 +0000 +Subject: dm space map metadata: fix extending the space map + +From: Joe Thornber + +commit 7e664b3dec431eebf0c5df5ff704d6197634cf35 upstream. + +When extending a metadata space map we should do the first commit whilst +still in bootstrap mode -- a mode where all blocks get allocated in the +new area. + +That way the commit overhead is allocated from the newly added space. +Otherwise we risk running out of space. + +With this fix, and the previous commit "dm space map common: make sure +new space is used during extend", the following device mapper testsuite +test passes: + dmtest run --suite thin-provisioning -n /resize_metadata_no_io/ + +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/persistent-data/dm-space-map-metadata.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/drivers/md/persistent-data/dm-space-map-metadata.c ++++ b/drivers/md/persistent-data/dm-space-map-metadata.c +@@ -608,20 +608,28 @@ static int sm_metadata_extend(struct dm_ + * Flick into a mode where all blocks get allocated in the new area. + */ + smm->begin = old_len; +- memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); ++ memcpy(sm, &bootstrap_ops, sizeof(*sm)); + + /* + * Extend. + */ + r = sm_ll_extend(&smm->ll, extra_blocks); ++ if (r) ++ goto out; + ++ for (i = old_len; !r && i < smm->begin; i++) { ++ r = sm_ll_inc(&smm->ll, i, &ev); ++ if (r) ++ goto out; ++ } ++ ++ r = sm_metadata_commit(sm); ++ ++out: + /* + * Switch back to normal behaviour. + */ +- memcpy(&smm->sm, &ops, sizeof(smm->sm)); +- for (i = old_len; !r && i < smm->begin; i++) +- r = sm_ll_inc(&smm->ll, i, &ev); +- ++ memcpy(sm, &ops, sizeof(*sm)); + return r; + } + diff --git a/queue-3.12/dm-thin-fix-discard-support-to-a-previously-shared-block.patch b/queue-3.12/dm-thin-fix-discard-support-to-a-previously-shared-block.patch new file mode 100644 index 00000000000..b6cf7885813 --- /dev/null +++ b/queue-3.12/dm-thin-fix-discard-support-to-a-previously-shared-block.patch @@ -0,0 +1,128 @@ +From 19fa1a6756ed9e92daa9537c03b47d6b55cc2316 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 17 Dec 2013 12:09:40 -0500 +Subject: dm thin: fix discard support to a previously shared block + +From: Joe Thornber + +commit 19fa1a6756ed9e92daa9537c03b47d6b55cc2316 upstream. + +If a snapshot is created and later deleted the origin dm_thin_device's +snapshotted_time will have been updated to reflect the snapshot's +creation time. The 'shared' flag in the dm_thin_lookup_result struct +returned from dm_thin_find_block() is an approximation based on +snapshotted_time -- this is done to avoid 0(n), or worse, time +complexity. In this case, the shared flag would be true. + +But because the 'shared' flag reflects an approximation a block can be +incorrectly assumed to be shared (e.g. false positive for 'shared' +because the snapshot no longer exists). This could result in discards +issued to a thin device not being passed down to the pool's underlying +data device. + +To fix this we double check that a thin block is really still in-use +after a mapping is removed using dm_pool_block_is_used(). If the +reference count for a block is now zero the discard is allowed to be +passed down. + +Also add a 'definitely_not_shared' member to the dm_thin_new_mapping +structure -- reflects that the 'shared' flag in the response from +dm_thin_find_block() can only be held as definitive if false is +returned. + +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1043527 + +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-thin-metadata.c | 20 ++++++++++++++++++++ + drivers/md/dm-thin-metadata.h | 2 ++ + drivers/md/dm-thin.c | 14 ++++++++++++-- + 3 files changed, 34 insertions(+), 2 deletions(-) + +--- a/drivers/md/dm-thin-metadata.c ++++ b/drivers/md/dm-thin-metadata.c +@@ -1349,6 +1349,12 @@ dm_thin_id dm_thin_dev_id(struct dm_thin + return td->id; + } + ++/* ++ * Check whether @time (of block creation) is older than @td's last snapshot. ++ * If so then the associated block is shared with the last snapshot device. ++ * Any block on a device created *after* the device last got snapshotted is ++ * necessarily not shared. ++ */ + static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) + { + return td->snapshotted_time > time; +@@ -1457,6 +1463,20 @@ int dm_thin_remove_block(struct dm_thin_ + + return r; + } ++ ++int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) ++{ ++ int r; ++ uint32_t ref_count; ++ ++ down_read(&pmd->root_lock); ++ r = dm_sm_get_count(pmd->data_sm, b, &ref_count); ++ if (!r) ++ *result = (ref_count != 0); ++ up_read(&pmd->root_lock); ++ ++ return r; ++} + + bool dm_thin_changed_this_transaction(struct dm_thin_device *td) + { +--- a/drivers/md/dm-thin-metadata.h ++++ b/drivers/md/dm-thin-metadata.h +@@ -181,6 +181,8 @@ int dm_pool_get_data_block_size(struct d + + int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); + ++int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); ++ + /* + * Returns -ENOSPC if the new size is too small and already allocated + * blocks would be lost. +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -512,6 +512,7 @@ struct dm_thin_new_mapping { + unsigned quiesced:1; + unsigned prepared:1; + unsigned pass_discard:1; ++ unsigned definitely_not_shared:1; + + struct thin_c *tc; + dm_block_t virt_block; +@@ -683,7 +684,15 @@ static void process_prepared_discard_pas + cell_defer_no_holder(tc, m->cell2); + + if (m->pass_discard) +- remap_and_issue(tc, m->bio, m->data_block); ++ if (m->definitely_not_shared) ++ remap_and_issue(tc, m->bio, m->data_block); ++ else { ++ bool used = false; ++ if (dm_pool_block_is_used(tc->pool->pmd, m->data_block, &used) || used) ++ bio_endio(m->bio, 0); ++ else ++ remap_and_issue(tc, m->bio, m->data_block); ++ } + else + bio_endio(m->bio, 0); + +@@ -1040,7 +1049,8 @@ static void process_discard(struct thin_ + */ + m = get_next_mapping(pool); + m->tc = tc; +- m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; ++ m->pass_discard = pool->pf.discard_passdown; ++ m->definitely_not_shared = !lookup_result.shared; + m->virt_block = block; + m->data_block = lookup_result.block; + m->cell = cell; diff --git a/queue-3.12/dm-thin-fix-set_pool_mode-exposed-pool-operation-races.patch b/queue-3.12/dm-thin-fix-set_pool_mode-exposed-pool-operation-races.patch new file mode 100644 index 00000000000..148694b1eb9 --- /dev/null +++ b/queue-3.12/dm-thin-fix-set_pool_mode-exposed-pool-operation-races.patch @@ -0,0 +1,154 @@ +From 8b64e881eb40ac8b9bfcbce068a97eef819044ee Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Fri, 20 Dec 2013 14:27:28 -0500 +Subject: dm thin: fix set_pool_mode exposed pool operation races + +From: Mike Snitzer + +commit 8b64e881eb40ac8b9bfcbce068a97eef819044ee upstream. + +The pool mode must not be switched until after the corresponding pool +process_* methods have been established. Otherwise, because +set_pool_mode() isn't interlocked with the IO path for performance +reasons, the IO path can end up executing process_* operations that +don't match the mode. This patch eliminates problems like the following +(as seen on really fast PCIe SSD storage when transitioning the pool's +mode from PM_READ_ONLY to PM_WRITE): + +kernel: device-mapper: thin: 253:2: reached low water mark for data device: sending event. +kernel: device-mapper: thin: 253:2: no free data space available. +kernel: device-mapper: thin: 253:2: switching pool to read-only mode +kernel: device-mapper: thin: 253:2: switching pool to write mode +kernel: ------------[ cut here ]------------ +kernel: WARNING: CPU: 11 PID: 7564 at drivers/md/dm-thin.c:995 handle_unserviceable_bio+0x146/0x160 [dm_thin_pool]() +... +kernel: Workqueue: dm-thin do_worker [dm_thin_pool] +kernel: 00000000000003e3 ffff880308831cc8 ffffffff8152ebcb 00000000000003e3 +kernel: 0000000000000000 ffff880308831d08 ffffffff8104c46c ffff88032502a800 +kernel: ffff880036409000 ffff88030ec7ce00 0000000000000001 00000000ffffffc3 +kernel: Call Trace: +kernel: [] dump_stack+0x49/0x5e +kernel: [] warn_slowpath_common+0x8c/0xc0 +kernel: [] warn_slowpath_null+0x1a/0x20 +kernel: [] handle_unserviceable_bio+0x146/0x160 [dm_thin_pool] +kernel: [] process_bio_read_only+0x136/0x180 [dm_thin_pool] +kernel: [] process_deferred_bios+0xc5/0x230 [dm_thin_pool] +kernel: [] do_worker+0x51/0x60 [dm_thin_pool] +kernel: [] process_one_work+0x183/0x490 +kernel: [] worker_thread+0x120/0x3a0 +kernel: [] ? manage_workers+0x160/0x160 +kernel: [] kthread+0xce/0xf0 +kernel: [] ? kthread_freezable_should_stop+0x70/0x70 +kernel: [] ret_from_fork+0x7c/0xb0 +kernel: [] ? kthread_freezable_should_stop+0x70/0x70 +kernel: ---[ end trace 3f00528e08ffa55c ]--- +kernel: device-mapper: thin: pool mode is PM_WRITE not PM_READ_ONLY like expected!? + +dm-thin.c:995 was the WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); +at the top of handle_unserviceable_bio(). And as the additional +debugging I had conveys: the pool mode was _not_ PM_READ_ONLY like +expected, it was already PM_WRITE, yet pool->process_bio was still set +to process_bio_read_only(). + +Also, while fixing this up, reduce logging of redundant pool mode +transitions by checking new_mode is different from old_mode. + +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-thin.c | 40 +++++++++++++++++++++++++++------------- + 1 file changed, 27 insertions(+), 13 deletions(-) + +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -1395,16 +1395,16 @@ static enum pool_mode get_pool_mode(stru + return pool->pf.mode; + } + +-static void set_pool_mode(struct pool *pool, enum pool_mode mode) ++static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) + { + int r; ++ enum pool_mode old_mode = pool->pf.mode; + +- pool->pf.mode = mode; +- +- switch (mode) { ++ switch (new_mode) { + case PM_FAIL: +- DMERR("%s: switching pool to failure mode", +- dm_device_name(pool->pool_md)); ++ if (old_mode != new_mode) ++ DMERR("%s: switching pool to failure mode", ++ dm_device_name(pool->pool_md)); + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_fail; + pool->process_discard = process_bio_fail; +@@ -1413,13 +1413,15 @@ static void set_pool_mode(struct pool *p + break; + + case PM_READ_ONLY: +- DMERR("%s: switching pool to read-only mode", +- dm_device_name(pool->pool_md)); ++ if (old_mode != new_mode) ++ DMERR("%s: switching pool to read-only mode", ++ dm_device_name(pool->pool_md)); + r = dm_pool_abort_metadata(pool->pmd); + if (r) { + DMERR("%s: aborting transaction failed", + dm_device_name(pool->pool_md)); +- set_pool_mode(pool, PM_FAIL); ++ new_mode = PM_FAIL; ++ set_pool_mode(pool, new_mode); + } else { + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; +@@ -1430,6 +1432,9 @@ static void set_pool_mode(struct pool *p + break; + + case PM_WRITE: ++ if (old_mode != new_mode) ++ DMINFO("%s: switching pool to write mode", ++ dm_device_name(pool->pool_md)); + dm_pool_metadata_read_write(pool->pmd); + pool->process_bio = process_bio; + pool->process_discard = process_discard; +@@ -1437,6 +1442,8 @@ static void set_pool_mode(struct pool *p + pool->process_prepared_discard = process_prepared_discard; + break; + } ++ ++ pool->pf.mode = new_mode; + } + + /*----------------------------------------------------------------*/ +@@ -1653,6 +1660,17 @@ static int bind_control_target(struct po + enum pool_mode new_mode = pt->adjusted_pf.mode; + + /* ++ * Don't change the pool's mode until set_pool_mode() below. ++ * Otherwise the pool's process_* function pointers may ++ * not match the desired pool mode. ++ */ ++ pt->adjusted_pf.mode = old_mode; ++ ++ pool->ti = ti; ++ pool->pf = pt->adjusted_pf; ++ pool->low_water_blocks = pt->low_water_blocks; ++ ++ /* + * If we were in PM_FAIL mode, rollback of metadata failed. We're + * not going to recover without a thin_repair. So we never let the + * pool move out of the old mode. On the other hand a PM_READ_ONLY +@@ -1662,10 +1680,6 @@ static int bind_control_target(struct po + if (old_mode == PM_FAIL) + new_mode = old_mode; + +- pool->ti = ti; +- pool->low_water_blocks = pt->low_water_blocks; +- pool->pf = pt->adjusted_pf; +- + set_pool_mode(pool, new_mode); + + return 0; diff --git a/queue-3.12/dm-thin-initialize-dm_thin_new_mapping-returned-by-get_next_mapping.patch b/queue-3.12/dm-thin-initialize-dm_thin_new_mapping-returned-by-get_next_mapping.patch new file mode 100644 index 00000000000..cd8d3172837 --- /dev/null +++ b/queue-3.12/dm-thin-initialize-dm_thin_new_mapping-returned-by-get_next_mapping.patch @@ -0,0 +1,82 @@ +From 16961b042db8cc5cf75d782b4255193ad56e1d4f Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Tue, 17 Dec 2013 13:19:11 -0500 +Subject: dm thin: initialize dm_thin_new_mapping returned by get_next_mapping + +From: Mike Snitzer + +commit 16961b042db8cc5cf75d782b4255193ad56e1d4f upstream. + +As additional members are added to the dm_thin_new_mapping structure +care should be taken to make sure they get initialized before use. + +Signed-off-by: Mike Snitzer +Acked-by: Joe Thornber +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-thin.c | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -760,13 +760,17 @@ static int ensure_next_mapping(struct po + + static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) + { +- struct dm_thin_new_mapping *r = pool->next_mapping; ++ struct dm_thin_new_mapping *m = pool->next_mapping; + + BUG_ON(!pool->next_mapping); + ++ memset(m, 0, sizeof(struct dm_thin_new_mapping)); ++ INIT_LIST_HEAD(&m->list); ++ m->bio = NULL; ++ + pool->next_mapping = NULL; + +- return r; ++ return m; + } + + static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, +@@ -778,15 +782,10 @@ static void schedule_copy(struct thin_c + struct pool *pool = tc->pool; + struct dm_thin_new_mapping *m = get_next_mapping(pool); + +- INIT_LIST_HEAD(&m->list); +- m->quiesced = 0; +- m->prepared = 0; + m->tc = tc; + m->virt_block = virt_block; + m->data_block = data_dest; + m->cell = cell; +- m->err = 0; +- m->bio = NULL; + + if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) + m->quiesced = 1; +@@ -849,15 +848,12 @@ static void schedule_zero(struct thin_c + struct pool *pool = tc->pool; + struct dm_thin_new_mapping *m = get_next_mapping(pool); + +- INIT_LIST_HEAD(&m->list); + m->quiesced = 1; + m->prepared = 0; + m->tc = tc; + m->virt_block = virt_block; + m->data_block = data_block; + m->cell = cell; +- m->err = 0; +- m->bio = NULL; + + /* + * If the whole block of data is being overwritten or we are not +@@ -1055,7 +1051,6 @@ static void process_discard(struct thin_ + m->data_block = lookup_result.block; + m->cell = cell; + m->cell2 = cell2; +- m->err = 0; + m->bio = bio; + + if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { diff --git a/queue-3.12/dm-wait-until-embedded-kobject-is-released-before-destroying-a-device.patch b/queue-3.12/dm-wait-until-embedded-kobject-is-released-before-destroying-a-device.patch new file mode 100644 index 00000000000..fee25e7cb81 --- /dev/null +++ b/queue-3.12/dm-wait-until-embedded-kobject-is-released-before-destroying-a-device.patch @@ -0,0 +1,125 @@ +From be35f486108227e10fe5d96fd42fb2b344c59983 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Mon, 6 Jan 2014 23:01:22 -0500 +Subject: dm: wait until embedded kobject is released before destroying a device + +From: Mikulas Patocka + +commit be35f486108227e10fe5d96fd42fb2b344c59983 upstream. + +There may be other parts of the kernel holding a reference on the dm +kobject. We must wait until all references are dropped before +deallocating the mapped_device structure. + +The dm_kobject_release method signals that all references are dropped +via completion. But dm_kobject_release doesn't free the kobject (which +is embedded in the mapped_device structure). + +This is the sequence of operations: +* when destroying a DM device, call kobject_put from dm_sysfs_exit +* wait until all users stop using the kobject, when it happens the + release method is called +* the release method signals the completion and should return without + delay +* the dm device removal code that waits on the completion continues +* the dm device removal code drops the dm_mod reference the device had +* the dm device removal code frees the mapped_device structure that + contains the kobject + +Using kobject this way should avoid the module unload race that was +mentioned at the beginning of this thread: +https://lkml.org/lkml/2014/1/4/83 + +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-sysfs.c | 10 +++++++++- + drivers/md/dm.c | 11 +++++++++++ + drivers/md/dm.h | 2 ++ + 3 files changed, 22 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-sysfs.c ++++ b/drivers/md/dm-sysfs.c +@@ -79,6 +79,11 @@ static const struct sysfs_ops dm_sysfs_o + .show = dm_attr_show, + }; + ++static void dm_kobject_release(struct kobject *kobj) ++{ ++ complete(dm_get_completion_from_kobject(kobj)); ++} ++ + /* + * dm kobject is embedded in mapped_device structure + * no need to define release function here +@@ -86,6 +91,7 @@ static const struct sysfs_ops dm_sysfs_o + static struct kobj_type dm_ktype = { + .sysfs_ops = &dm_sysfs_ops, + .default_attrs = dm_attrs, ++ .release = dm_kobject_release, + }; + + /* +@@ -104,5 +110,7 @@ int dm_sysfs_init(struct mapped_device * + */ + void dm_sysfs_exit(struct mapped_device *md) + { +- kobject_put(dm_kobject(md)); ++ struct kobject *kobj = dm_kobject(md); ++ kobject_put(kobj); ++ wait_for_completion(dm_get_completion_from_kobject(kobj)); + } +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -197,6 +197,9 @@ struct mapped_device { + /* sysfs handle */ + struct kobject kobj; + ++ /* wait until the kobject is released */ ++ struct completion kobj_completion; ++ + /* zero-length flush that will be cloned and submitted to targets */ + struct bio flush_bio; + +@@ -2005,6 +2008,7 @@ static struct mapped_device *alloc_dev(i + init_waitqueue_head(&md->wait); + INIT_WORK(&md->work, dm_wq_work); + init_waitqueue_head(&md->eventq); ++ init_completion(&md->kobj_completion); + + md->disk->major = _major; + md->disk->first_minor = minor; +@@ -2889,6 +2893,13 @@ struct mapped_device *dm_get_from_kobjec + return md; + } + ++struct completion *dm_get_completion_from_kobject(struct kobject *kobj) ++{ ++ struct mapped_device *md = container_of(kobj, struct mapped_device, kobj); ++ ++ return &md->kobj_completion; ++} ++ + int dm_suspended_md(struct mapped_device *md) + { + return test_bit(DMF_SUSPENDED, &md->flags); +--- a/drivers/md/dm.h ++++ b/drivers/md/dm.h +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #include "dm-stats.h" + +@@ -142,6 +143,7 @@ int dm_sysfs_init(struct mapped_device * + void dm_sysfs_exit(struct mapped_device *md); + struct kobject *dm_kobject(struct mapped_device *md); + struct mapped_device *dm_get_from_kobject(struct kobject *kobj); ++struct completion *dm_get_completion_from_kobject(struct kobject *kobj); + + /* + * Targets for linear and striped mappings diff --git a/queue-3.12/series b/queue-3.12/series index 91c686966b8..893a8dc18f2 100644 --- a/queue-3.12/series +++ b/queue-3.12/series @@ -47,3 +47,10 @@ pnfs-proper-delay-for-nfs4err_recallconflict-in-layout_get_done.patch nfsv4-fix-a-slot-leak-in-nfs40_sequence_done.patch sunrpc-fix-infinite-loop-in-rpc-state-machine.patch sunrpc-don-t-wait-for-write-before-allowing-reads-from-use-gss-proxy-file.patch +dm-thin-fix-discard-support-to-a-previously-shared-block.patch +dm-thin-initialize-dm_thin_new_mapping-returned-by-get_next_mapping.patch +dm-thin-fix-set_pool_mode-exposed-pool-operation-races.patch +dm-wait-until-embedded-kobject-is-released-before-destroying-a-device.patch +dm-space-map-common-make-sure-new-space-is-used-during-extend.patch +dm-space-map-metadata-fix-extending-the-space-map.patch +dm-space-map-metadata-fix-bug-in-resizing-of-thin-metadata.patch