From: Greg Kroah-Hartman Date: Fri, 12 Feb 2016 21:01:40 +0000 (-0800) Subject: 4.4-stable patches X-Git-Tag: v4.4.2~42 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b0b1104516d084df3f07baae0d32cd21df437563;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch --- diff --git a/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch new file mode 100644 index 00000000000..85b55f42930 --- /dev/null +++ b/queue-4.4/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch @@ -0,0 +1,38 @@ +From c95a51807b730e4681e2ecbdfd669ca52601959e Mon Sep 17 00:00:00 2001 +From: xuejiufei +Date: Fri, 5 Feb 2016 15:36:47 -0800 +Subject: ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup + +From: xuejiufei + +commit c95a51807b730e4681e2ecbdfd669ca52601959e upstream. + +When recovery master down, dlm_do_local_recovery_cleanup() only remove +the $RECOVERY lock owned by dead node, but do not clear the refmap bit. +Which will make umount thread falling in dead loop migrating $RECOVERY +to the dead node. + +Signed-off-by: xuejiufei +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dlm/dlmrecovery.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ocfs2/dlm/dlmrecovery.c ++++ b/fs/ocfs2/dlm/dlmrecovery.c +@@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanu + break; + } + } ++ dlm_lockres_clear_refmap_bit(dlm, res, ++ dead_node); + spin_unlock(&res->spinlock); + continue; + } diff --git a/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch new file mode 100644 index 00000000000..2260a64b411 --- /dev/null +++ b/queue-4.4/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch @@ -0,0 +1,97 @@ +From bef5502de074b6f6fa647b94b73155d675694420 Mon Sep 17 00:00:00 2001 +From: xuejiufei +Date: Thu, 14 Jan 2016 15:17:38 -0800 +Subject: ocfs2/dlm: ignore cleaning the migration mle that is inuse + +From: xuejiufei + +commit bef5502de074b6f6fa647b94b73155d675694420 upstream. + +We have found that migration source will trigger a BUG that the refcount +of mle is already zero before put when the target is down during +migration. The situation is as follows: + +dlm_migrate_lockres + dlm_add_migration_mle + dlm_mark_lockres_migrating + dlm_get_mle_inuse + <<<<<< Now the refcount of the mle is 2. + dlm_send_one_lockres and wait for the target to become the + new master. + <<<<<< o2hb detect the target down and clean the migration + mle. Now the refcount is 1. + +dlm_migrate_lockres woken, and put the mle twice when found the target +goes down which trigger the BUG with the following message: + + "ERROR: bad mle: ". + +Signed-off-by: Jiufei Xue +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dlm/dlmmaster.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +--- a/fs/ocfs2/dlm/dlmmaster.c ++++ b/fs/ocfs2/dlm/dlmmaster.c +@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dl + spin_lock(&dlm->master_lock); + ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, + namelen, target, dlm->node_num); ++ /* get an extra reference on the mle. ++ * otherwise the assert_master from the new ++ * master will destroy this. ++ */ ++ dlm_get_mle_inuse(mle); + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); + +@@ -2554,6 +2559,7 @@ fail: + if (mle_added) { + dlm_mle_detach_hb_events(dlm, mle); + dlm_put_mle(mle); ++ dlm_put_mle_inuse(mle); + } else if (mle) { + kmem_cache_free(dlm_mle_cache, mle); + mle = NULL; +@@ -2571,17 +2577,6 @@ fail: + * ensure that all assert_master work is flushed. */ + flush_workqueue(dlm->dlm_worker); + +- /* get an extra reference on the mle. +- * otherwise the assert_master from the new +- * master will destroy this. +- * also, make sure that all callers of dlm_get_mle +- * take both dlm->spinlock and dlm->master_lock */ +- spin_lock(&dlm->spinlock); +- spin_lock(&dlm->master_lock); +- dlm_get_mle_inuse(mle); +- spin_unlock(&dlm->master_lock); +- spin_unlock(&dlm->spinlock); +- + /* notify new node and send all lock state */ + /* call send_one_lockres with migration flag. + * this serves as notice to the target node that a +@@ -3312,6 +3307,15 @@ top: + mle->new_master != dead_node) + continue; + ++ if (mle->new_master == dead_node && mle->inuse) { ++ mlog(ML_NOTICE, "%s: target %u died during " ++ "migration from %u, the MLE is " ++ "still keep used, ignore it!\n", ++ dlm->name, dead_node, ++ mle->master); ++ continue; ++ } ++ + /* If we have reached this point, this mle needs to be + * removed from the list and freed. */ + dlm_clean_migration_mle(dlm, mle); diff --git a/queue-4.4/series b/queue-4.4/series index f14aa8cd063..9748f645551 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -6,3 +6,5 @@ hid-usbhid-fix-recursive-deadlock.patch base-platform-fix-platform-drivers-with-no-probe-callback.patch block-fix-bio-splitting-on-max-sectors.patch alsa-hda-implement-loopback-control-switch-for-realtek-and-other-codecs.patch +ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch +ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch