From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 7 Jul 2014 19:59:39 +0000 (-0700)
Subject: 3.15-stable patches
X-Git-Tag: v3.4.98~39
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=88d1f1a293fdf43bdabb89202c1358fa70305364;p=thirdparty%2Fkernel%2Fstable-queue.git

3.15-stable patches

added patches:
	dm-era-check-for-a-non-null-metadata-object-before-closing-it.patch
	dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch
	rbd-handle-parent_overlap-on-writes-correctly.patch
	rbd-use-reference-counts-for-image-requests.patch
---

diff --git a/queue-3.15/dm-era-check-for-a-non-null-metadata-object-before-closing-it.patch b/queue-3.15/dm-era-check-for-a-non-null-metadata-object-before-closing-it.patch
new file mode 100644
index 00000000000..8d32717d983
--- /dev/null
+++ b/queue-3.15/dm-era-check-for-a-non-null-metadata-object-before-closing-it.patch
@@ -0,0 +1,33 @@
+From 989f26f5ad308f40a95f280bf9cd75e558d4f18d Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Tue, 11 Mar 2014 16:46:25 +0000
+Subject: dm era: check for a non-NULL metadata object before closing it
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit 989f26f5ad308f40a95f280bf9cd75e558d4f18d upstream.
+
+era_ctr() may call era_destroy() before era->md is initialized so
+era_destory() must only close the metadata object if it is not NULL.
+
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Naohiro Aota <naota@elisp.net>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-era-target.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -1391,7 +1391,8 @@ static int era_is_congested(struct dm_ta
+ 
+ static void era_destroy(struct era *era)
+ {
+-	metadata_close(era->md);
++	if (era->md)
++		metadata_close(era->md);
+ 
+ 	if (era->wq)
+ 		destroy_workqueue(era->wq);
diff --git a/queue-3.15/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch b/queue-3.15/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch
new file mode 100644
index 00000000000..47a27b4a896
--- /dev/null
+++ b/queue-3.15/dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch
@@ -0,0 +1,42 @@
+From 09869de57ed2728ae3c619803932a86cb0e2c4f8 Mon Sep 17 00:00:00 2001
+From: Lukas Czerner <lczerner@redhat.com>
+Date: Wed, 11 Jun 2014 12:28:43 -0400
+Subject: dm thin: update discard_granularity to reflect the thin-pool blocksize
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+commit 09869de57ed2728ae3c619803932a86cb0e2c4f8 upstream.
+
+DM thinp already checks whether the discard_granularity of the data
+device is a factor of the thin-pool block size.  But when using the
+dm-thin-pool's discard passdown support, DM thinp was not selecting the
+max of the underlying data device's discard_granularity and the
+thin-pool's block size.
+
+Update set_discard_limits() to set discard_granularity to the max of
+these values.  This enables blkdev_issue_discard() to properly align the
+discards that are sent to the DM thin device on a full block boundary.
+As such each discard will now cover an entire DM thin-pool block and the
+block will be reclaimed.
+
+Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -3068,7 +3068,8 @@ static void set_discard_limits(struct po
+ 	 */
+ 	if (pt->adjusted_pf.discard_passdown) {
+ 		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
+-		limits->discard_granularity = data_limits->discard_granularity;
++		limits->discard_granularity = max(data_limits->discard_granularity,
++						  pool->sectors_per_block << SECTOR_SHIFT);
+ 	} else
+ 		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
+ }
diff --git a/queue-3.15/rbd-handle-parent_overlap-on-writes-correctly.patch b/queue-3.15/rbd-handle-parent_overlap-on-writes-correctly.patch
new file mode 100644
index 00000000000..a095b92a6ab
--- /dev/null
+++ b/queue-3.15/rbd-handle-parent_overlap-on-writes-correctly.patch
@@ -0,0 +1,70 @@
+From 9638556a276125553549fdfe349c464481ec2f39 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <ilya.dryomov@inktank.com>
+Date: Tue, 10 Jun 2014 13:53:29 +0400
+Subject: rbd: handle parent_overlap on writes correctly
+
+From: Ilya Dryomov <ilya.dryomov@inktank.com>
+
+commit 9638556a276125553549fdfe349c464481ec2f39 upstream.
+
+The following check in rbd_img_obj_request_submit()
+
+    rbd_dev->parent_overlap <= obj_request->img_offset
+
+allows the fall through to the non-layered write case even if both
+parent_overlap and obj_request->img_offset belong to the same RADOS
+object.  This leads to data corruption, because the area to the left of
+parent_overlap ends up unconditionally zero-filled instead of being
+populated with parent data.  Suppose we want to write 1M to offset 6M
+of image bar, which is a clone of foo@snap; object_size is 4M,
+parent_overlap is 5M:
+
+    rbd_data.<id>.0000000000000001
+     ---------------------|----------------------|------------
+    | should be copyup'ed | should be zeroed out | write ...
+     ---------------------|----------------------|------------
+   4M                    5M                     6M
+                    parent_overlap    obj_request->img_offset
+
+4..5M should be copyup'ed from foo, yet it is zero-filled, just like
+5..6M is.
+
+Given that the only striping mode kernel client currently supports is
+chunking (i.e. stripe_unit == object_size, stripe_count == 1), round
+parent_overlap up to the next object boundary for the purposes of the
+overlap check.
+
+Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
+Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -1366,6 +1366,14 @@ static bool obj_request_exists_test(stru
+ 	return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
+ }
+ 
++static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request)
++{
++	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
++
++	return obj_request->img_offset <
++	    round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header));
++}
++
+ static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
+ {
+ 	dout("%s: obj %p (was %d)\n", __func__, obj_request,
+@@ -2683,7 +2691,7 @@ static int rbd_img_obj_request_submit(st
+ 	 */
+ 	if (!img_request_write_test(img_request) ||
+ 		!img_request_layered_test(img_request) ||
+-		rbd_dev->parent_overlap <= obj_request->img_offset ||
++		!obj_request_overlaps_parent(obj_request) ||
+ 		((known = obj_request_known_test(obj_request)) &&
+ 			obj_request_exists_test(obj_request))) {
+ 
diff --git a/queue-3.15/rbd-use-reference-counts-for-image-requests.patch b/queue-3.15/rbd-use-reference-counts-for-image-requests.patch
new file mode 100644
index 00000000000..a8a449cb3be
--- /dev/null
+++ b/queue-3.15/rbd-use-reference-counts-for-image-requests.patch
@@ -0,0 +1,103 @@
+From 0f2d5be792b0466b06797f637cfbb0f64dbb408c Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Sat, 26 Apr 2014 14:21:44 +0400
+Subject: rbd: use reference counts for image requests
+
+From: Alex Elder <elder@linaro.org>
+
+commit 0f2d5be792b0466b06797f637cfbb0f64dbb408c upstream.
+
+Each image request contains a reference count, but to date it has
+not actually been used.  (I think this was just an oversight.) A
+recent report involving rbd failing an assertion shed light on why
+and where we need to use these reference counts.
+
+Every OSD request associated with an object request uses
+rbd_osd_req_callback() as its callback function.  That function will
+call a helper function (dependent on the type of OSD request) that
+will set the object request's "done" flag if the object request if
+appropriate.  If that "done" flag is set, the object request is
+passed to rbd_obj_request_complete().
+
+In rbd_obj_request_complete(), requests are processed in sequential
+order.  So if an object request completes before one of its
+predecessors in the image request, the completion is deferred.
+Otherwise, if it's a completing object's "turn" to be completed, it
+is passed to rbd_img_obj_end_request(), which records the result of
+the operation, accumulates transferred bytes, and so on.  Next, the
+successor to this request is checked and if it is marked "done",
+(deferred) completion processing is performed on that request, and
+so on.  If the last object request in an image request is completed,
+rbd_img_request_complete() is called, which (typically) destroys
+the image request.
+
+There is a race here, however.  The instant an object request is
+marked "done" it can be provided (by a thread handling completion of
+one of its predecessor operations) to rbd_img_obj_end_request(),
+which (for the last request) can then lead to the image request
+getting torn down.  And this can happen *before* that object has
+itself entered rbd_img_obj_end_request().  As a result, once it
+*does* enter that function, the image request (and even the object
+request itself) may have been freed and become invalid.
+
+All that's necessary to avoid this is to properly count references
+to the image requests.  We tear down an image request's object
+requests all at once--only when the entire image request has
+completed.  So there's no need for an image request to count
+references for its object requests.  However, we don't want an
+image request to go away until the last of its object requests
+has passed through rbd_img_obj_callback().  In other words,
+we don't want rbd_img_request_complete() to necessarily
+result in the image request being destroyed, because it may
+get called before we've finished processing on all of its
+object requests.
+
+So the fix is to add a reference to an image request for
+each of its object requests.  The reference can be viewed
+as representing an object request that has not yet finished
+its call to rbd_img_obj_callback().  That is emphasized by
+getting the reference right after assigning that as the image
+object's callback function.  The corresponding release of that
+reference is done at the end of rbd_img_obj_callback(), which
+every image object request passes through exactly once.
+
+Signed-off-by: Alex Elder <elder@linaro.org>
+Reviewed-by: Ilya Dryomov <ilya.dryomov@inktank.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -1382,6 +1382,13 @@ static void rbd_obj_request_put(struct r
+ 	kref_put(&obj_request->kref, rbd_obj_request_destroy);
+ }
+ 
++static void rbd_img_request_get(struct rbd_img_request *img_request)
++{
++	dout("%s: img %p (was %d)\n", __func__, img_request,
++	     atomic_read(&img_request->kref.refcount));
++	kref_get(&img_request->kref);
++}
++
+ static bool img_request_child_test(struct rbd_img_request *img_request);
+ static void rbd_parent_request_destroy(struct kref *kref);
+ static void rbd_img_request_destroy(struct kref *kref);
+@@ -2142,6 +2149,7 @@ static void rbd_img_obj_callback(struct
+ 	img_request->next_completion = which;
+ out:
+ 	spin_unlock_irq(&img_request->completion_lock);
++	rbd_img_request_put(img_request);
+ 
+ 	if (!more)
+ 		rbd_img_request_complete(img_request);
+@@ -2242,6 +2250,7 @@ static int rbd_img_request_fill(struct r
+ 			goto out_unwind;
+ 		obj_request->osd_req = osd_req;
+ 		obj_request->callback = rbd_img_obj_callback;
++		rbd_img_request_get(img_request);
+ 
+ 		if (write_request) {
+ 			osd_req_op_alloc_hint_init(osd_req, which,
diff --git a/queue-3.15/series b/queue-3.15/series
index bf957e4d103..480b606ff69 100644
--- a/queue-3.15/series
+++ b/queue-3.15/series
@@ -60,3 +60,7 @@ bluetooth-fix-setting-correct-authentication-information-for-smp-stk.patch
 bluetooth-fix-deadlock-in-l2cap_conn_del.patch
 bluetooth-fix-locking-of-hdev-when-calling-into-smp-code.patch
 bluetooth-allow-change-security-level-on-att_cid-in-slave-role.patch
+dm-era-check-for-a-non-null-metadata-object-before-closing-it.patch
+dm-thin-update-discard_granularity-to-reflect-the-thin-pool-blocksize.patch
+rbd-use-reference-counts-for-image-requests.patch
+rbd-handle-parent_overlap-on-writes-correctly.patch