From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 12 Feb 2016 21:01:40 +0000 (-0800)
Subject: 4.4-stable patches
X-Git-Tag: v4.4.2~42
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b0b1104516d084df3f07baae0d32cd21df437563;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
	ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
---

diff --git a/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
new file mode 100644
index 00000000000..85b55f42930
--- /dev/null
+++ b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
@@ -0,0 +1,38 @@
+From c95a51807b730e4681e2ecbdfd669ca52601959e Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Fri, 5 Feb 2016 15:36:47 -0800
+Subject: ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit c95a51807b730e4681e2ecbdfd669ca52601959e upstream.
+
+When recovery master down, dlm_do_local_recovery_cleanup() only remove
+the $RECOVERY lock owned by dead node, but do not clear the refmap bit.
+Which will make umount thread falling in dead loop migrating $RECOVERY
+to the dead node.
+
+Signed-off-by: xuejiufei <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanu
+ 						break;
+ 					}
+ 				}
++				dlm_lockres_clear_refmap_bit(dlm, res,
++						dead_node);
+ 				spin_unlock(&res->spinlock);
+ 				continue;
+ 			}
diff --git a/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
new file mode 100644
index 00000000000..2260a64b411
--- /dev/null
+++ b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
@@ -0,0 +1,97 @@
+From bef5502de074b6f6fa647b94b73155d675694420 Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Thu, 14 Jan 2016 15:17:38 -0800
+Subject: ocfs2/dlm: ignore cleaning the migration mle that is inuse
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit bef5502de074b6f6fa647b94b73155d675694420 upstream.
+
+We have found that migration source will trigger a BUG that the refcount
+of mle is already zero before put when the target is down during
+migration.  The situation is as follows:
+
+dlm_migrate_lockres
+  dlm_add_migration_mle
+  dlm_mark_lockres_migrating
+  dlm_get_mle_inuse
+  <<<<<< Now the refcount of the mle is 2.
+  dlm_send_one_lockres and wait for the target to become the
+  new master.
+  <<<<<< o2hb detect the target down and clean the migration
+  mle. Now the refcount is 1.
+
+dlm_migrate_lockres woken, and put the mle twice when found the target
+goes down which trigger the BUG with the following message:
+
+  "ERROR: bad mle: ".
+
+Signed-off-by: Jiufei Xue <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmmaster.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmmaster.c
++++ b/fs/ocfs2/dlm/dlmmaster.c
+@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dl
+ 	spin_lock(&dlm->master_lock);
+ 	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
+ 				    namelen, target, dlm->node_num);
++	/* get an extra reference on the mle.
++	 * otherwise the assert_master from the new
++	 * master will destroy this.
++	 */
++	dlm_get_mle_inuse(mle);
+ 	spin_unlock(&dlm->master_lock);
+ 	spin_unlock(&dlm->spinlock);
+ 
+@@ -2554,6 +2559,7 @@ fail:
+ 		if (mle_added) {
+ 			dlm_mle_detach_hb_events(dlm, mle);
+ 			dlm_put_mle(mle);
++			dlm_put_mle_inuse(mle);
+ 		} else if (mle) {
+ 			kmem_cache_free(dlm_mle_cache, mle);
+ 			mle = NULL;
+@@ -2571,17 +2577,6 @@ fail:
+ 	 * ensure that all assert_master work is flushed. */
+ 	flush_workqueue(dlm->dlm_worker);
+ 
+-	/* get an extra reference on the mle.
+-	 * otherwise the assert_master from the new
+-	 * master will destroy this.
+-	 * also, make sure that all callers of dlm_get_mle
+-	 * take both dlm->spinlock and dlm->master_lock */
+-	spin_lock(&dlm->spinlock);
+-	spin_lock(&dlm->master_lock);
+-	dlm_get_mle_inuse(mle);
+-	spin_unlock(&dlm->master_lock);
+-	spin_unlock(&dlm->spinlock);
+-
+ 	/* notify new node and send all lock state */
+ 	/* call send_one_lockres with migration flag.
+ 	 * this serves as notice to the target node that a
+@@ -3312,6 +3307,15 @@ top:
+ 			    mle->new_master != dead_node)
+ 				continue;
+ 
++			if (mle->new_master == dead_node && mle->inuse) {
++				mlog(ML_NOTICE, "%s: target %u died during "
++						"migration from %u, the MLE is "
++						"still keep used, ignore it!\n",
++						dlm->name, dead_node,
++						mle->master);
++				continue;
++			}
++
+ 			/* If we have reached this point, this mle needs to be
+ 			 * removed from the list and freed. */
+ 			dlm_clean_migration_mle(dlm, mle);
diff --git a/queue-4.4/series b/queue-4.4/series
index f14aa8cd063..9748f645551 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -6,3 +6,5 @@ hid-usbhid-fix-recursive-deadlock.patch
 base-platform-fix-platform-drivers-with-no-probe-callback.patch
 block-fix-bio-splitting-on-max-sectors.patch
 alsa-hda-implement-loopback-control-switch-for-realtek-and-other-codecs.patch
+ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
+ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch