4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)
diff --git a/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch

new file mode 100644 (file)

index 0000000..85b55f4
--- /dev/null
+++ b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
@@ -0,0 +1,38 @@
+From c95a51807b730e4681e2ecbdfd669ca52601959e Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Fri, 5 Feb 2016 15:36:47 -0800
+Subject: ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit c95a51807b730e4681e2ecbdfd669ca52601959e upstream.
+
+When recovery master down, dlm_do_local_recovery_cleanup() only remove
+the $RECOVERY lock owned by dead node, but do not clear the refmap bit.
+Which will make umount thread falling in dead loop migrating $RECOVERY
+to the dead node.
+
+Signed-off-by: xuejiufei <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanu
+                                               break;
+                                       }
+                               }
++                              dlm_lockres_clear_refmap_bit(dlm, res,
++                                              dead_node);
+                               spin_unlock(&res->spinlock);
+                               continue;
+                       }
diff --git a/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch

new file mode 100644 (file)

index 0000000..2260a64
--- /dev/null
+++ b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
@@ -0,0 +1,97 @@
+From bef5502de074b6f6fa647b94b73155d675694420 Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Thu, 14 Jan 2016 15:17:38 -0800
+Subject: ocfs2/dlm: ignore cleaning the migration mle that is inuse
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit bef5502de074b6f6fa647b94b73155d675694420 upstream.
+
+We have found that migration source will trigger a BUG that the refcount
+of mle is already zero before put when the target is down during
+migration.  The situation is as follows:
+
+dlm_migrate_lockres
+  dlm_add_migration_mle
+  dlm_mark_lockres_migrating
+  dlm_get_mle_inuse
+  <<<<<< Now the refcount of the mle is 2.
+  dlm_send_one_lockres and wait for the target to become the
+  new master.
+  <<<<<< o2hb detect the target down and clean the migration
+  mle. Now the refcount is 1.
+
+dlm_migrate_lockres woken, and put the mle twice when found the target
+goes down which trigger the BUG with the following message:
+
+  "ERROR: bad mle: ".
+
+Signed-off-by: Jiufei Xue <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmmaster.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmmaster.c
++++ b/fs/ocfs2/dlm/dlmmaster.c
+@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dl
+       spin_lock(&dlm->master_lock);
+       ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
+                                   namelen, target, dlm->node_num);
++      /* get an extra reference on the mle.
++       * otherwise the assert_master from the new
++       * master will destroy this.
++       */
++      dlm_get_mle_inuse(mle);
+       spin_unlock(&dlm->master_lock);
+       spin_unlock(&dlm->spinlock);
+ 
+@@ -2554,6 +2559,7 @@ fail:
+               if (mle_added) {
+                       dlm_mle_detach_hb_events(dlm, mle);
+                       dlm_put_mle(mle);
++                      dlm_put_mle_inuse(mle);
+               } else if (mle) {
+                       kmem_cache_free(dlm_mle_cache, mle);
+                       mle = NULL;
+@@ -2571,17 +2577,6 @@ fail:
+        * ensure that all assert_master work is flushed. */
+       flush_workqueue(dlm->dlm_worker);
+ 
+-      /* get an extra reference on the mle.
+-       * otherwise the assert_master from the new
+-       * master will destroy this.
+-       * also, make sure that all callers of dlm_get_mle
+-       * take both dlm->spinlock and dlm->master_lock */
+-      spin_lock(&dlm->spinlock);
+-      spin_lock(&dlm->master_lock);
+-      dlm_get_mle_inuse(mle);
+-      spin_unlock(&dlm->master_lock);
+-      spin_unlock(&dlm->spinlock);
+-
+       /* notify new node and send all lock state */
+       /* call send_one_lockres with migration flag.
+        * this serves as notice to the target node that a
+@@ -3312,6 +3307,15 @@ top:
+                           mle->new_master != dead_node)
+                               continue;
+ 
++                      if (mle->new_master == dead_node && mle->inuse) {
++                              mlog(ML_NOTICE, "%s: target %u died during "
++                                              "migration from %u, the MLE is "
++                                              "still keep used, ignore it!\n",
++                                              dlm->name, dead_node,
++                                              mle->master);
++                              continue;
++                      }
++
+                       /* If we have reached this point, this mle needs to be
+                        * removed from the list and freed. */
+                       dlm_clean_migration_mle(dlm, mle);
diff --git a/queue-4.4/series b/queue-4.4/series

index f14aa8cd0630864c71fb08da81ea57d49635fca9..9748f6455515b3ce742f9cc0f6e46036676f4e64 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -6,3 +6,5 @@ hid-usbhid-fix-recursive-deadlock.patch
  base-platform-fix-platform-drivers-with-no-probe-callback.patch
  block-fix-bio-splitting-on-max-sectors.patch
  alsa-hda-implement-loopback-control-switch-for-realtek-and-other-codecs.patch
+ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
+ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 12 Feb 2016 21:01:40 +0000 (13:01 -0800)
queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history