From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 22 Nov 2017 08:40:51 +0000 (+0100)
Subject: 4.9-stable patches
X-Git-Tag: v3.18.84~12
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f3174976c4d8178fe092046da561a5505a8de821;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	dmaengine-dmatest-warn-user-when-dma-test-times-out.patch
	ipmi-fix-unsigned-long-underflow.patch
	mm-page_alloc.c-broken-deferred-calculation.patch
	ocfs2-fix-cluster-hang-after-a-node-dies.patch
	ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
---

diff --git a/queue-4.9/dmaengine-dmatest-warn-user-when-dma-test-times-out.patch b/queue-4.9/dmaengine-dmatest-warn-user-when-dma-test-times-out.patch
new file mode 100644
index 00000000000..e12c24aec6f
--- /dev/null
+++ b/queue-4.9/dmaengine-dmatest-warn-user-when-dma-test-times-out.patch
@@ -0,0 +1,37 @@
+From a9df21e34b422f79d9a9fa5c3eff8c2a53491be6 Mon Sep 17 00:00:00 2001
+From: Adam Wallis <awallis@codeaurora.org>
+Date: Thu, 2 Nov 2017 08:53:30 -0400
+Subject: dmaengine: dmatest: warn user when dma test times out
+
+From: Adam Wallis <awallis@codeaurora.org>
+
+commit a9df21e34b422f79d9a9fa5c3eff8c2a53491be6 upstream.
+
+Commit adfa543e7314 ("dmatest: don't use set_freezable_with_signal()")
+introduced a bug (that is in fact documented by the patch commit text)
+that leaves behind a dangling pointer. Since the done_wait structure is
+allocated on the stack, future invocations to the DMATEST can produce
+undesirable results (e.g., corrupted spinlocks). Ideally, this would be
+cleaned up in the thread handler, but at the very least, the kernel
+is left in a very precarious scenario that can lead to some long debug
+sessions when the crash comes later.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=197605
+Signed-off-by: Adam Wallis <awallis@codeaurora.org>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/dmatest.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/dma/dmatest.c
++++ b/drivers/dma/dmatest.c
+@@ -666,6 +666,7 @@ static int dmatest_func(void *data)
+ 			 * free it this time?" dancing.  For now, just
+ 			 * leave it dangling.
+ 			 */
++			WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
+ 			dmaengine_unmap_put(um);
+ 			result("test timed out", total_tests, src_off, dst_off,
+ 			       len, 0);
diff --git a/queue-4.9/ipmi-fix-unsigned-long-underflow.patch b/queue-4.9/ipmi-fix-unsigned-long-underflow.patch
new file mode 100644
index 00000000000..5fd74bae84d
--- /dev/null
+++ b/queue-4.9/ipmi-fix-unsigned-long-underflow.patch
@@ -0,0 +1,63 @@
+From 392a17b10ec4320d3c0e96e2a23ebaad1123b989 Mon Sep 17 00:00:00 2001
+From: Corey Minyard <cminyard@mvista.com>
+Date: Sat, 29 Jul 2017 21:14:55 -0500
+Subject: ipmi: fix unsigned long underflow
+
+From: Corey Minyard <cminyard@mvista.com>
+
+commit 392a17b10ec4320d3c0e96e2a23ebaad1123b989 upstream.
+
+When I set the timeout to a specific value such as 500ms, the timeout
+event will not happen in time due to the overflow in function
+check_msg_timeout:
+...
+	ent->timeout -= timeout_period;
+	if (ent->timeout > 0)
+		return;
+...
+
+The type of timeout_period is long, but ent->timeout is unsigned long.
+This patch makes the type consistent.
+
+Reported-by: Weilong Chen <chenweilong@huawei.com>
+Signed-off-by: Corey Minyard <cminyard@mvista.com>
+Tested-by: Weilong Chen <chenweilong@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/char/ipmi/ipmi_msghandler.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/ipmi/ipmi_msghandler.c
++++ b/drivers/char/ipmi/ipmi_msghandler.c
+@@ -4029,7 +4029,8 @@ smi_from_recv_msg(ipmi_smi_t intf, struc
+ }
+ 
+ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
+-			      struct list_head *timeouts, long timeout_period,
++			      struct list_head *timeouts,
++			      unsigned long timeout_period,
+ 			      int slot, unsigned long *flags,
+ 			      unsigned int *waiting_msgs)
+ {
+@@ -4042,8 +4043,8 @@ static void check_msg_timeout(ipmi_smi_t
+ 	if (!ent->inuse)
+ 		return;
+ 
+-	ent->timeout -= timeout_period;
+-	if (ent->timeout > 0) {
++	if (timeout_period < ent->timeout) {
++		ent->timeout -= timeout_period;
+ 		(*waiting_msgs)++;
+ 		return;
+ 	}
+@@ -4109,7 +4110,8 @@ static void check_msg_timeout(ipmi_smi_t
+ 	}
+ }
+ 
+-static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
++static unsigned int ipmi_timeout_handler(ipmi_smi_t intf,
++					 unsigned long timeout_period)
+ {
+ 	struct list_head     timeouts;
+ 	struct ipmi_recv_msg *msg, *msg2;
diff --git a/queue-4.9/mm-page_alloc.c-broken-deferred-calculation.patch b/queue-4.9/mm-page_alloc.c-broken-deferred-calculation.patch
new file mode 100644
index 00000000000..0adb4579343
--- /dev/null
+++ b/queue-4.9/mm-page_alloc.c-broken-deferred-calculation.patch
@@ -0,0 +1,106 @@
+From d135e5750205a21a212a19dbb05aeb339e2cbea7 Mon Sep 17 00:00:00 2001
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+Date: Wed, 15 Nov 2017 17:38:41 -0800
+Subject: mm/page_alloc.c: broken deferred calculation
+
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+
+commit d135e5750205a21a212a19dbb05aeb339e2cbea7 upstream.
+
+In reset_deferred_meminit() we determine number of pages that must not
+be deferred.  We initialize pages for at least 2G of memory, but also
+pages for reserved memory in this node.
+
+The reserved memory is determined in this function:
+memblock_reserved_memory_within(), which operates over physical
+addresses, and returns size in bytes.  However, reset_deferred_meminit()
+assumes that that this function operates with pfns, and returns page
+count.
+
+The result is that in the best case machine boots slower than expected
+due to initializing more pages than needed in single thread, and in the
+worst case panics because fewer than needed pages are initialized early.
+
+Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com
+Fixes: 864b9a393dcb ("mm: consider memblock reservations for deferred memory initialization sizing")
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mmzone.h |    3 ++-
+ mm/page_alloc.c        |   27 ++++++++++++++++++---------
+ 2 files changed, 20 insertions(+), 10 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -672,7 +672,8 @@ typedef struct pglist_data {
+ 	 * is the first PFN that needs to be initialised.
+ 	 */
+ 	unsigned long first_deferred_pfn;
+-	unsigned long static_init_size;
++	/* Number of non-deferred pages */
++	unsigned long static_init_pgcnt;
+ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -284,28 +284,37 @@ EXPORT_SYMBOL(nr_online_nodes);
+ int page_group_by_mobility_disabled __read_mostly;
+ 
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
++
++/*
++ * Determine how many pages need to be initialized durig early boot
++ * (non-deferred initialization).
++ * The value of first_deferred_pfn will be set later, once non-deferred pages
++ * are initialized, but for now set it ULONG_MAX.
++ */
+ static inline void reset_deferred_meminit(pg_data_t *pgdat)
+ {
+-	unsigned long max_initialise;
+-	unsigned long reserved_lowmem;
++	phys_addr_t start_addr, end_addr;
++	unsigned long max_pgcnt;
++	unsigned long reserved;
+ 
+ 	/*
+ 	 * Initialise at least 2G of a node but also take into account that
+ 	 * two large system hashes that can take up 1GB for 0.25TB/node.
+ 	 */
+-	max_initialise = max(2UL << (30 - PAGE_SHIFT),
+-		(pgdat->node_spanned_pages >> 8));
++	max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
++			(pgdat->node_spanned_pages >> 8));
+ 
+ 	/*
+ 	 * Compensate the all the memblock reservations (e.g. crash kernel)
+ 	 * from the initial estimation to make sure we will initialize enough
+ 	 * memory to boot.
+ 	 */
+-	reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+-			pgdat->node_start_pfn + max_initialise);
+-	max_initialise += reserved_lowmem;
++	start_addr = PFN_PHYS(pgdat->node_start_pfn);
++	end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
++	reserved = memblock_reserved_memory_within(start_addr, end_addr);
++	max_pgcnt += PHYS_PFN(reserved);
+ 
+-	pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
++	pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
+ 	pgdat->first_deferred_pfn = ULONG_MAX;
+ }
+ 
+@@ -332,7 +341,7 @@ static inline bool update_defer_init(pg_
+ 	if (zone_end < pgdat_end_pfn(pgdat))
+ 		return true;
+ 	(*nr_initialised)++;
+-	if ((*nr_initialised > pgdat->static_init_size) &&
++	if ((*nr_initialised > pgdat->static_init_pgcnt) &&
+ 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
+ 		pgdat->first_deferred_pfn = pfn;
+ 		return false;
diff --git a/queue-4.9/ocfs2-fix-cluster-hang-after-a-node-dies.patch b/queue-4.9/ocfs2-fix-cluster-hang-after-a-node-dies.patch
new file mode 100644
index 00000000000..fcffd173edd
--- /dev/null
+++ b/queue-4.9/ocfs2-fix-cluster-hang-after-a-node-dies.patch
@@ -0,0 +1,54 @@
+From 1c01967116a678fed8e2c68a6ab82abc8effeddc Mon Sep 17 00:00:00 2001
+From: Changwei Ge <ge.changwei@h3c.com>
+Date: Wed, 15 Nov 2017 17:31:33 -0800
+Subject: ocfs2: fix cluster hang after a node dies
+
+From: Changwei Ge <ge.changwei@h3c.com>
+
+commit 1c01967116a678fed8e2c68a6ab82abc8effeddc upstream.
+
+When a node dies, other live nodes have to choose a new master for an
+existed lock resource mastered by the dead node.
+
+As for ocfs2/dlm implementation, this is done by function -
+dlm_move_lockres_to_recovery_list which marks those lock rsources as
+DLM_LOCK_RES_RECOVERING and manages them via a list from which DLM
+changes lock resource's master later.
+
+So without invoking dlm_move_lockres_to_recovery_list, no master will be
+choosed after dlm recovery accomplishment since no lock resource can be
+found through ::resource list.
+
+What's worse is that if DLM_LOCK_RES_RECOVERING is not marked for lock
+resources mastered a dead node, it will break up synchronization among
+nodes.
+
+So invoke dlm_move_lockres_to_recovery_list again.
+
+Fixs: 'commit ee8f7fcbe638 ("ocfs2/dlm: continue to purge recovery lockres when recovery master goes down")'
+Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373CED6E0F9@H3CMLB14-EX.srv.huawei-3com.com
+Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
+Reported-by: Vitaly Mayatskih <v.mayatskih@gmail.com>
+Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Joseph Qi <jiangqi903@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2419,6 +2419,7 @@ static void dlm_do_local_recovery_cleanu
+ 					dlm_lockres_put(res);
+ 					continue;
+ 				}
++				dlm_move_lockres_to_recovery_list(dlm, res);
+ 			} else if (res->owner == dlm->node_num) {
+ 				dlm_free_dead_locks(dlm, res, dead_node);
+ 				__dlm_lockres_calc_usage(dlm, res);
diff --git a/queue-4.9/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch b/queue-4.9/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
new file mode 100644
index 00000000000..586146b54e4
--- /dev/null
+++ b/queue-4.9/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
@@ -0,0 +1,82 @@
+From 28f5a8a7c033cbf3e32277f4cc9c6afd74f05300 Mon Sep 17 00:00:00 2001
+From: alex chen <alex.chen@huawei.com>
+Date: Wed, 15 Nov 2017 17:31:40 -0800
+Subject: ocfs2: should wait dio before inode lock in ocfs2_setattr()
+
+From: alex chen <alex.chen@huawei.com>
+
+commit 28f5a8a7c033cbf3e32277f4cc9c6afd74f05300 upstream.
+
+we should wait dio requests to finish before inode lock in
+ocfs2_setattr(), otherwise the following deadlock will happen:
+
+process 1                  process 2                    process 3
+truncate file 'A'          end_io of writing file 'A'   receiving the bast messages
+ocfs2_setattr
+ ocfs2_inode_lock_tracker
+  ocfs2_inode_lock_full
+ inode_dio_wait
+  __inode_dio_wait
+  -->waiting for all dio
+  requests finish
+                                                        dlm_proxy_ast_handler
+                                                         dlm_do_local_bast
+                                                          ocfs2_blocking_ast
+                                                           ocfs2_generic_handle_bast
+                                                            set OCFS2_LOCK_BLOCKED flag
+                        dio_end_io
+                         dio_bio_end_aio
+                          dio_complete
+                           ocfs2_dio_end_io
+                            ocfs2_dio_end_io_write
+                             ocfs2_inode_lock
+                              __ocfs2_cluster_lock
+                               ocfs2_wait_for_mask
+                               -->waiting for OCFS2_LOCK_BLOCKED
+                               flag to be cleared, that is waiting
+                               for 'process 1' unlocking the inode lock
+                           inode_dio_end
+                           -->here dec the i_dio_count, but will never
+                           be called, so a deadlock happened.
+
+Link: http://lkml.kernel.org/r/59F81636.70508@huawei.com
+Signed-off-by: Alex Chen <alex.chen@huawei.com>
+Reviewed-by: Jun Piao <piaojun@huawei.com>
+Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
+Acked-by: Changwei Ge <ge.changwei@h3c.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/file.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1166,6 +1166,13 @@ int ocfs2_setattr(struct dentry *dentry,
+ 	}
+ 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
+ 	if (size_change) {
++		/*
++		 * Here we should wait dio to finish before inode lock
++		 * to avoid a deadlock between ocfs2_setattr() and
++		 * ocfs2_dio_end_io_write()
++		 */
++		inode_dio_wait(inode);
++
+ 		status = ocfs2_rw_lock(inode, 1);
+ 		if (status < 0) {
+ 			mlog_errno(status);
+@@ -1186,8 +1193,6 @@ int ocfs2_setattr(struct dentry *dentry,
+ 		if (status)
+ 			goto bail_unlock;
+ 
+-		inode_dio_wait(inode);
+-
+ 		if (i_size_read(inode) >= attr->ia_size) {
+ 			if (ocfs2_should_order_data(inode)) {
+ 				status = ocfs2_begin_ordered_truncate(inode,
diff --git a/queue-4.9/series b/queue-4.9/series
index ad9421168c0..4d7b6af3693 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -17,3 +17,8 @@ crypto-dh-fix-double-free-of-ctx-p.patch
 ima-do-not-update-security.ima-if-appraisal-status-is-not-integrity_pass.patch
 serial-omap-fix-efr-write-on-rts-deassertion.patch
 serial-8250_fintek-fix-finding-base_port-with-activated-superio.patch
+dmaengine-dmatest-warn-user-when-dma-test-times-out.patch
+ocfs2-fix-cluster-hang-after-a-node-dies.patch
+ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
+ipmi-fix-unsigned-long-underflow.patch
+mm-page_alloc.c-broken-deferred-calculation.patch