From bd7f919aada8067691a3fe2954ddd93c49a25d66 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 7 Jul 2014 12:59:31 -0700 Subject: [PATCH] 3.10-stable patches added patches: dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch rbd-handle-parent_overlap-on-writes-correctly.patch rbd-use-reference-counts-for-image-requests.patch --- ...y-to-reflect-the-thin-pool-blocksize.patch | 42 +++++++ ...e-parent_overlap-on-writes-correctly.patch | 70 ++++++++++++ ...-reference-counts-for-image-requests.patch | 103 ++++++++++++++++++ queue-3.10/series | 3 + 4 files changed, 218 insertions(+) create mode 100644 queue-3.10/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch create mode 100644 queue-3.10/rbd-handle-parent_overlap-on-writes-correctly.patch create mode 100644 queue-3.10/rbd-use-reference-counts-for-image-requests.patch diff --git a/queue-3.10/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch b/queue-3.10/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch new file mode 100644 index 00000000000..f934e6c7078 --- /dev/null +++ b/queue-3.10/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch @@ -0,0 +1,42 @@ +From 09869de57ed2728ae3c619803932a86cb0e2c4f8 Mon Sep 17 00:00:00 2001 +From: Lukas Czerner +Date: Wed, 11 Jun 2014 12:28:43 -0400 +Subject: dm thin: update discard_granularity to reflect the thin-pool blocksize + +From: Lukas Czerner + +commit 09869de57ed2728ae3c619803932a86cb0e2c4f8 upstream. + +DM thinp already checks whether the discard_granularity of the data +device is a factor of the thin-pool block size. But when using the +dm-thin-pool's discard passdown support, DM thinp was not selecting the +max of the underlying data device's discard_granularity and the +thin-pool's block size. + +Update set_discard_limits() to set discard_granularity to the max of +these values. This enables blkdev_issue_discard() to properly align the +discards that are sent to the DM thin device on a full block boundary. +As such each discard will now cover an entire DM thin-pool block and the +block will be reclaimed. + +Reported-by: Zdenek Kabelac +Signed-off-by: Lukas Czerner +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-thin.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -2647,7 +2647,8 @@ static void set_discard_limits(struct po + */ + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; +- limits->discard_granularity = data_limits->discard_granularity; ++ limits->discard_granularity = max(data_limits->discard_granularity, ++ pool->sectors_per_block << SECTOR_SHIFT); + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + } diff --git a/queue-3.10/rbd-handle-parent_overlap-on-writes-correctly.patch b/queue-3.10/rbd-handle-parent_overlap-on-writes-correctly.patch new file mode 100644 index 00000000000..5e984393443 --- /dev/null +++ b/queue-3.10/rbd-handle-parent_overlap-on-writes-correctly.patch @@ -0,0 +1,70 @@ +From 9638556a276125553549fdfe349c464481ec2f39 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 10 Jun 2014 13:53:29 +0400 +Subject: rbd: handle parent_overlap on writes correctly + +From: Ilya Dryomov + +commit 9638556a276125553549fdfe349c464481ec2f39 upstream. + +The following check in rbd_img_obj_request_submit() + + rbd_dev->parent_overlap <= obj_request->img_offset + +allows the fall through to the non-layered write case even if both +parent_overlap and obj_request->img_offset belong to the same RADOS +object. This leads to data corruption, because the area to the left of +parent_overlap ends up unconditionally zero-filled instead of being +populated with parent data. Suppose we want to write 1M to offset 6M +of image bar, which is a clone of foo@snap; object_size is 4M, +parent_overlap is 5M: + + rbd_data..0000000000000001 + ---------------------|----------------------|------------ + | should be copyup'ed | should be zeroed out | write ... + ---------------------|----------------------|------------ + 4M 5M 6M + parent_overlap obj_request->img_offset + +4..5M should be copyup'ed from foo, yet it is zero-filled, just like +5..6M is. + +Given that the only striping mode kernel client currently supports is +chunking (i.e. stripe_unit == object_size, stripe_count == 1), round +parent_overlap up to the next object boundary for the purposes of the +overlap check. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Josh Durgin +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/rbd.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -1385,6 +1385,14 @@ static bool obj_request_exists_test(stru + return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0; + } + ++static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) ++{ ++ struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; ++ ++ return obj_request->img_offset < ++ round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header)); ++} ++ + static void rbd_obj_request_get(struct rbd_obj_request *obj_request) + { + dout("%s: obj %p (was %d)\n", __func__, obj_request, +@@ -2682,7 +2690,7 @@ static int rbd_img_obj_request_submit(st + */ + if (!img_request_write_test(img_request) || + !img_request_layered_test(img_request) || +- rbd_dev->parent_overlap <= obj_request->img_offset || ++ !obj_request_overlaps_parent(obj_request) || + ((known = obj_request_known_test(obj_request)) && + obj_request_exists_test(obj_request))) { + diff --git a/queue-3.10/rbd-use-reference-counts-for-image-requests.patch b/queue-3.10/rbd-use-reference-counts-for-image-requests.patch new file mode 100644 index 00000000000..8875b206000 --- /dev/null +++ b/queue-3.10/rbd-use-reference-counts-for-image-requests.patch @@ -0,0 +1,103 @@ +From 0f2d5be792b0466b06797f637cfbb0f64dbb408c Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Sat, 26 Apr 2014 14:21:44 +0400 +Subject: rbd: use reference counts for image requests + +From: Alex Elder + +commit 0f2d5be792b0466b06797f637cfbb0f64dbb408c upstream. + +Each image request contains a reference count, but to date it has +not actually been used. (I think this was just an oversight.) A +recent report involving rbd failing an assertion shed light on why +and where we need to use these reference counts. + +Every OSD request associated with an object request uses +rbd_osd_req_callback() as its callback function. That function will +call a helper function (dependent on the type of OSD request) that +will set the object request's "done" flag if the object request if +appropriate. If that "done" flag is set, the object request is +passed to rbd_obj_request_complete(). + +In rbd_obj_request_complete(), requests are processed in sequential +order. So if an object request completes before one of its +predecessors in the image request, the completion is deferred. +Otherwise, if it's a completing object's "turn" to be completed, it +is passed to rbd_img_obj_end_request(), which records the result of +the operation, accumulates transferred bytes, and so on. Next, the +successor to this request is checked and if it is marked "done", +(deferred) completion processing is performed on that request, and +so on. If the last object request in an image request is completed, +rbd_img_request_complete() is called, which (typically) destroys +the image request. + +There is a race here, however. The instant an object request is +marked "done" it can be provided (by a thread handling completion of +one of its predecessor operations) to rbd_img_obj_end_request(), +which (for the last request) can then lead to the image request +getting torn down. And this can happen *before* that object has +itself entered rbd_img_obj_end_request(). As a result, once it +*does* enter that function, the image request (and even the object +request itself) may have been freed and become invalid. + +All that's necessary to avoid this is to properly count references +to the image requests. We tear down an image request's object +requests all at once--only when the entire image request has +completed. So there's no need for an image request to count +references for its object requests. However, we don't want an +image request to go away until the last of its object requests +has passed through rbd_img_obj_callback(). In other words, +we don't want rbd_img_request_complete() to necessarily +result in the image request being destroyed, because it may +get called before we've finished processing on all of its +object requests. + +So the fix is to add a reference to an image request for +each of its object requests. The reference can be viewed +as representing an object request that has not yet finished +its call to rbd_img_obj_callback(). That is emphasized by +getting the reference right after assigning that as the image +object's callback function. The corresponding release of that +reference is done at the end of rbd_img_obj_callback(), which +every image object request passes through exactly once. + +Signed-off-by: Alex Elder +Reviewed-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/rbd.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -1401,6 +1401,13 @@ static void rbd_obj_request_put(struct r + kref_put(&obj_request->kref, rbd_obj_request_destroy); + } + ++static void rbd_img_request_get(struct rbd_img_request *img_request) ++{ ++ dout("%s: img %p (was %d)\n", __func__, img_request, ++ atomic_read(&img_request->kref.refcount)); ++ kref_get(&img_request->kref); ++} ++ + static bool img_request_child_test(struct rbd_img_request *img_request); + static void rbd_parent_request_destroy(struct kref *kref); + static void rbd_img_request_destroy(struct kref *kref); +@@ -2154,6 +2161,7 @@ static void rbd_img_obj_callback(struct + img_request->next_completion = which; + out: + spin_unlock_irq(&img_request->completion_lock); ++ rbd_img_request_put(img_request); + + if (!more) + rbd_img_request_complete(img_request); +@@ -2250,6 +2258,7 @@ static int rbd_img_request_fill(struct r + goto out_partial; + obj_request->osd_req = osd_req; + obj_request->callback = rbd_img_obj_callback; ++ rbd_img_request_get(img_request); + + osd_req_op_extent_init(osd_req, 0, opcode, offset, length, + 0, 0); diff --git a/queue-3.10/series b/queue-3.10/series index 70b2431aa61..c3a1a0e1ae3 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -21,3 +21,6 @@ drm-vmwgfx-fix-incorrect-write-to-read-only-register-v2.patch bluetooth-fix-ssp-acceptor-just-works-confirmation-without-mitm.patch bluetooth-fix-check-for-connection-encryption.patch bluetooth-fix-locking-of-hdev-when-calling-into-smp-code.patch +dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch +rbd-use-reference-counts-for-image-requests.patch +rbd-handle-parent_overlap-on-writes-correctly.patch -- 2.47.3