]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Nov 2017 08:39:23 +0000 (09:39 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Nov 2017 08:39:23 +0000 (09:39 +0100)
added patches:
ipmi-fix-unsigned-long-underflow.patch
mm-page_alloc.c-broken-deferred-calculation.patch
mm-page_ext.c-check-if-page_ext-is-not-prepared.patch
mm-pagewalk.c-report-holes-in-hugetlb-ranges.patch
mm-swap-fix-false-error-message-in-__swp_swapcount.patch
ocfs2-fix-cluster-hang-after-a-node-dies.patch
ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
rcu-fix-up-pending-cbs-check-in-rcu_prepare_for_idle.patch

queue-4.13/ipmi-fix-unsigned-long-underflow.patch [new file with mode: 0644]
queue-4.13/mm-page_alloc.c-broken-deferred-calculation.patch [new file with mode: 0644]
queue-4.13/mm-page_ext.c-check-if-page_ext-is-not-prepared.patch [new file with mode: 0644]
queue-4.13/mm-pagewalk.c-report-holes-in-hugetlb-ranges.patch [new file with mode: 0644]
queue-4.13/mm-swap-fix-false-error-message-in-__swp_swapcount.patch [new file with mode: 0644]
queue-4.13/ocfs2-fix-cluster-hang-after-a-node-dies.patch [new file with mode: 0644]
queue-4.13/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch [new file with mode: 0644]
queue-4.13/rcu-fix-up-pending-cbs-check-in-rcu_prepare_for_idle.patch [new file with mode: 0644]
queue-4.13/series

diff --git a/queue-4.13/ipmi-fix-unsigned-long-underflow.patch b/queue-4.13/ipmi-fix-unsigned-long-underflow.patch
new file mode 100644 (file)
index 0000000..278b745
--- /dev/null
@@ -0,0 +1,63 @@
+From 392a17b10ec4320d3c0e96e2a23ebaad1123b989 Mon Sep 17 00:00:00 2001
+From: Corey Minyard <cminyard@mvista.com>
+Date: Sat, 29 Jul 2017 21:14:55 -0500
+Subject: ipmi: fix unsigned long underflow
+
+From: Corey Minyard <cminyard@mvista.com>
+
+commit 392a17b10ec4320d3c0e96e2a23ebaad1123b989 upstream.
+
+When I set the timeout to a specific value such as 500ms, the timeout
+event will not happen in time due to the overflow in function
+check_msg_timeout:
+...
+       ent->timeout -= timeout_period;
+       if (ent->timeout > 0)
+               return;
+...
+
+The type of timeout_period is long, but ent->timeout is unsigned long.
+This patch makes the type consistent.
+
+Reported-by: Weilong Chen <chenweilong@huawei.com>
+Signed-off-by: Corey Minyard <cminyard@mvista.com>
+Tested-by: Weilong Chen <chenweilong@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/char/ipmi/ipmi_msghandler.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/ipmi/ipmi_msghandler.c
++++ b/drivers/char/ipmi/ipmi_msghandler.c
+@@ -4030,7 +4030,8 @@ smi_from_recv_msg(ipmi_smi_t intf, struc
+ }
+ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
+-                            struct list_head *timeouts, long timeout_period,
++                            struct list_head *timeouts,
++                            unsigned long timeout_period,
+                             int slot, unsigned long *flags,
+                             unsigned int *waiting_msgs)
+ {
+@@ -4043,8 +4044,8 @@ static void check_msg_timeout(ipmi_smi_t
+       if (!ent->inuse)
+               return;
+-      ent->timeout -= timeout_period;
+-      if (ent->timeout > 0) {
++      if (timeout_period < ent->timeout) {
++              ent->timeout -= timeout_period;
+               (*waiting_msgs)++;
+               return;
+       }
+@@ -4110,7 +4111,8 @@ static void check_msg_timeout(ipmi_smi_t
+       }
+ }
+-static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
++static unsigned int ipmi_timeout_handler(ipmi_smi_t intf,
++                                       unsigned long timeout_period)
+ {
+       struct list_head     timeouts;
+       struct ipmi_recv_msg *msg, *msg2;
diff --git a/queue-4.13/mm-page_alloc.c-broken-deferred-calculation.patch b/queue-4.13/mm-page_alloc.c-broken-deferred-calculation.patch
new file mode 100644 (file)
index 0000000..9661393
--- /dev/null
@@ -0,0 +1,106 @@
+From d135e5750205a21a212a19dbb05aeb339e2cbea7 Mon Sep 17 00:00:00 2001
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+Date: Wed, 15 Nov 2017 17:38:41 -0800
+Subject: mm/page_alloc.c: broken deferred calculation
+
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+
+commit d135e5750205a21a212a19dbb05aeb339e2cbea7 upstream.
+
+In reset_deferred_meminit() we determine number of pages that must not
+be deferred.  We initialize pages for at least 2G of memory, but also
+pages for reserved memory in this node.
+
+The reserved memory is determined in this function:
+memblock_reserved_memory_within(), which operates over physical
+addresses, and returns size in bytes.  However, reset_deferred_meminit()
+assumes that that this function operates with pfns, and returns page
+count.
+
+The result is that in the best case machine boots slower than expected
+due to initializing more pages than needed in single thread, and in the
+worst case panics because fewer than needed pages are initialized early.
+
+Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com
+Fixes: 864b9a393dcb ("mm: consider memblock reservations for deferred memory initialization sizing")
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mmzone.h |    3 ++-
+ mm/page_alloc.c        |   27 ++++++++++++++++++---------
+ 2 files changed, 20 insertions(+), 10 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -691,7 +691,8 @@ typedef struct pglist_data {
+        * is the first PFN that needs to be initialised.
+        */
+       unsigned long first_deferred_pfn;
+-      unsigned long static_init_size;
++      /* Number of non-deferred pages */
++      unsigned long static_init_pgcnt;
+ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -289,28 +289,37 @@ EXPORT_SYMBOL(nr_online_nodes);
+ int page_group_by_mobility_disabled __read_mostly;
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
++
++/*
++ * Determine how many pages need to be initialized durig early boot
++ * (non-deferred initialization).
++ * The value of first_deferred_pfn will be set later, once non-deferred pages
++ * are initialized, but for now set it ULONG_MAX.
++ */
+ static inline void reset_deferred_meminit(pg_data_t *pgdat)
+ {
+-      unsigned long max_initialise;
+-      unsigned long reserved_lowmem;
++      phys_addr_t start_addr, end_addr;
++      unsigned long max_pgcnt;
++      unsigned long reserved;
+       /*
+        * Initialise at least 2G of a node but also take into account that
+        * two large system hashes that can take up 1GB for 0.25TB/node.
+        */
+-      max_initialise = max(2UL << (30 - PAGE_SHIFT),
+-              (pgdat->node_spanned_pages >> 8));
++      max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
++                      (pgdat->node_spanned_pages >> 8));
+       /*
+        * Compensate the all the memblock reservations (e.g. crash kernel)
+        * from the initial estimation to make sure we will initialize enough
+        * memory to boot.
+        */
+-      reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+-                      pgdat->node_start_pfn + max_initialise);
+-      max_initialise += reserved_lowmem;
++      start_addr = PFN_PHYS(pgdat->node_start_pfn);
++      end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
++      reserved = memblock_reserved_memory_within(start_addr, end_addr);
++      max_pgcnt += PHYS_PFN(reserved);
+-      pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
++      pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
+       pgdat->first_deferred_pfn = ULONG_MAX;
+ }
+@@ -337,7 +346,7 @@ static inline bool update_defer_init(pg_
+       if (zone_end < pgdat_end_pfn(pgdat))
+               return true;
+       (*nr_initialised)++;
+-      if ((*nr_initialised > pgdat->static_init_size) &&
++      if ((*nr_initialised > pgdat->static_init_pgcnt) &&
+           (pfn & (PAGES_PER_SECTION - 1)) == 0) {
+               pgdat->first_deferred_pfn = pfn;
+               return false;
diff --git a/queue-4.13/mm-page_ext.c-check-if-page_ext-is-not-prepared.patch b/queue-4.13/mm-page_ext.c-check-if-page_ext-is-not-prepared.patch
new file mode 100644 (file)
index 0000000..e7af749
--- /dev/null
@@ -0,0 +1,95 @@
+From e492080e640c2d1235ddf3441cae634cfffef7e1 Mon Sep 17 00:00:00 2001
+From: Jaewon Kim <jaewon31.kim@samsung.com>
+Date: Wed, 15 Nov 2017 17:39:07 -0800
+Subject: mm/page_ext.c: check if page_ext is not prepared
+
+From: Jaewon Kim <jaewon31.kim@samsung.com>
+
+commit e492080e640c2d1235ddf3441cae634cfffef7e1 upstream.
+
+online_page_ext() and page_ext_init() allocate page_ext for each
+section, but they do not allocate if the first PFN is !pfn_present(pfn)
+or !pfn_valid(pfn).  Then section->page_ext remains as NULL.
+lookup_page_ext checks NULL only if CONFIG_DEBUG_VM is enabled.  For a
+valid PFN, __set_page_owner will try to get page_ext through
+lookup_page_ext.  Without CONFIG_DEBUG_VM lookup_page_ext will misuse
+NULL pointer as value 0.  This incurrs invalid address access.
+
+This is the panic example when PFN 0x100000 is not valid but PFN
+0x13FC00 is being used for page_ext.  section->page_ext is NULL,
+get_entry returned invalid page_ext address as 0x1DFA000 for a PFN
+0x13FC00.
+
+To avoid this panic, CONFIG_DEBUG_VM should be removed so that page_ext
+will be checked at all times.
+
+  Unable to handle kernel paging request at virtual address 01dfa014
+  ------------[ cut here ]------------
+  Kernel BUG at ffffff80082371e0 [verbose debug info unavailable]
+  Internal error: Oops: 96000045 [#1] PREEMPT SMP
+  Modules linked in:
+  PC is at __set_page_owner+0x48/0x78
+  LR is at __set_page_owner+0x44/0x78
+    __set_page_owner+0x48/0x78
+    get_page_from_freelist+0x880/0x8e8
+    __alloc_pages_nodemask+0x14c/0xc48
+    __do_page_cache_readahead+0xdc/0x264
+    filemap_fault+0x2ac/0x550
+    ext4_filemap_fault+0x3c/0x58
+    __do_fault+0x80/0x120
+    handle_mm_fault+0x704/0xbb0
+    do_page_fault+0x2e8/0x394
+    do_mem_abort+0x88/0x124
+
+Pre-4.7 kernels also need commit f86e4271978b ("mm: check the return
+value of lookup_page_ext for all call sites").
+
+Link: http://lkml.kernel.org/r/20171107094131.14621-1-jaewon31.kim@samsung.com
+Fixes: eefa864b701d ("mm/page_ext: resurrect struct page extending code for debugging")
+Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Joonsoo Kim <js1304@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_ext.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/mm/page_ext.c
++++ b/mm/page_ext.c
+@@ -124,7 +124,6 @@ struct page_ext *lookup_page_ext(struct
+       struct page_ext *base;
+       base = NODE_DATA(page_to_nid(page))->node_page_ext;
+-#if defined(CONFIG_DEBUG_VM)
+       /*
+        * The sanity checks the page allocator does upon freeing a
+        * page can reach here before the page_ext arrays are
+@@ -133,7 +132,6 @@ struct page_ext *lookup_page_ext(struct
+        */
+       if (unlikely(!base))
+               return NULL;
+-#endif
+       index = pfn - round_down(node_start_pfn(page_to_nid(page)),
+                                       MAX_ORDER_NR_PAGES);
+       return get_entry(base, index);
+@@ -198,7 +196,6 @@ struct page_ext *lookup_page_ext(struct
+ {
+       unsigned long pfn = page_to_pfn(page);
+       struct mem_section *section = __pfn_to_section(pfn);
+-#if defined(CONFIG_DEBUG_VM)
+       /*
+        * The sanity checks the page allocator does upon freeing a
+        * page can reach here before the page_ext arrays are
+@@ -207,7 +204,6 @@ struct page_ext *lookup_page_ext(struct
+        */
+       if (!section->page_ext)
+               return NULL;
+-#endif
+       return get_entry(section->page_ext, pfn);
+ }
diff --git a/queue-4.13/mm-pagewalk.c-report-holes-in-hugetlb-ranges.patch b/queue-4.13/mm-pagewalk.c-report-holes-in-hugetlb-ranges.patch
new file mode 100644 (file)
index 0000000..e0a9147
--- /dev/null
@@ -0,0 +1,51 @@
+From 373c4557d2aa362702c4c2d41288fb1e54990b7c Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Tue, 14 Nov 2017 01:03:44 +0100
+Subject: mm/pagewalk.c: report holes in hugetlb ranges
+
+From: Jann Horn <jannh@google.com>
+
+commit 373c4557d2aa362702c4c2d41288fb1e54990b7c upstream.
+
+This matters at least for the mincore syscall, which will otherwise copy
+uninitialized memory from the page allocator to userspace.  It is
+probably also a correctness error for /proc/$pid/pagemap, but I haven't
+tested that.
+
+Removing the `walk->hugetlb_entry` condition in walk_hugetlb_range() has
+no effect because the caller already checks for that.
+
+This only reports holes in hugetlb ranges to callers who have specified
+a hugetlb_entry callback.
+
+This issue was found using an AFL-based fuzzer.
+
+v2:
+ - don't crash on ->pte_hole==NULL (Andrew Morton)
+ - add Cc stable (Andrew Morton)
+
+Fixes: 1e25a271c8ac ("mincore: apply page table walker on do_mincore()")
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/pagewalk.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -187,8 +187,12 @@ static int walk_hugetlb_range(unsigned l
+       do {
+               next = hugetlb_entry_end(h, addr, end);
+               pte = huge_pte_offset(walk->mm, addr & hmask, sz);
+-              if (pte && walk->hugetlb_entry)
++
++              if (pte)
+                       err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
++              else if (walk->pte_hole)
++                      err = walk->pte_hole(addr, next, walk);
++
+               if (err)
+                       break;
+       } while (addr = next, addr != end);
diff --git a/queue-4.13/mm-swap-fix-false-error-message-in-__swp_swapcount.patch b/queue-4.13/mm-swap-fix-false-error-message-in-__swp_swapcount.patch
new file mode 100644 (file)
index 0000000..2b7de56
--- /dev/null
@@ -0,0 +1,62 @@
+From e9a6effa500526e2a19d5ad042cb758b55b1ef93 Mon Sep 17 00:00:00 2001
+From: Huang Ying <huang.ying.caritas@gmail.com>
+Date: Wed, 15 Nov 2017 17:33:15 -0800
+Subject: mm, swap: fix false error message in __swp_swapcount()
+
+From: Huang Ying <huang.ying.caritas@gmail.com>
+
+commit e9a6effa500526e2a19d5ad042cb758b55b1ef93 upstream.
+
+When a page fault occurs for a swap entry, the physical swap readahead
+(not the VMA base swap readahead) may readahead several swap entries
+after the fault swap entry.  The readahead algorithm calculates some of
+the swap entries to readahead via increasing the offset of the fault
+swap entry without checking whether they are beyond the end of the swap
+device and it relys on the __swp_swapcount() and swapcache_prepare() to
+check it.  Although __swp_swapcount() checks for the swap entry passed
+in, it will complain with the error message as follow for the expected
+invalid swap entry.  This may make the end users confused.
+
+  swap_info_get: Bad swap offset entry 0200f8a7
+
+To fix the false error message, the swap entry checking is added in
+swapin_readahead() to avoid to pass the out-of-bound swap entries and
+the swap entry reserved for the swap header to __swp_swapcount() and
+swapcache_prepare().
+
+Link: http://lkml.kernel.org/r/20171102054225.22897-1-ying.huang@intel.com
+Fixes: e8c26ab60598 ("mm/swap: skip readahead for unreferenced swap slots")
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reported-by: Christian Kujau <lists@nerdbynature.de>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Suggested-by: Minchan Kim <minchan@kernel.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/swap_state.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/swap_state.c
++++ b/mm/swap_state.c
+@@ -506,6 +506,7 @@ struct page *swapin_readahead(swp_entry_
+       unsigned long offset = entry_offset;
+       unsigned long start_offset, end_offset;
+       unsigned long mask;
++      struct swap_info_struct *si = swp_swap_info(entry);
+       struct blk_plug plug;
+       bool do_poll = true;
+@@ -519,6 +520,8 @@ struct page *swapin_readahead(swp_entry_
+       end_offset = offset | mask;
+       if (!start_offset)      /* First page is swap header. */
+               start_offset++;
++      if (end_offset >= si->max)
++              end_offset = si->max - 1;
+       blk_start_plug(&plug);
+       for (offset = start_offset; offset <= end_offset ; offset++) {
diff --git a/queue-4.13/ocfs2-fix-cluster-hang-after-a-node-dies.patch b/queue-4.13/ocfs2-fix-cluster-hang-after-a-node-dies.patch
new file mode 100644 (file)
index 0000000..fcffd17
--- /dev/null
@@ -0,0 +1,54 @@
+From 1c01967116a678fed8e2c68a6ab82abc8effeddc Mon Sep 17 00:00:00 2001
+From: Changwei Ge <ge.changwei@h3c.com>
+Date: Wed, 15 Nov 2017 17:31:33 -0800
+Subject: ocfs2: fix cluster hang after a node dies
+
+From: Changwei Ge <ge.changwei@h3c.com>
+
+commit 1c01967116a678fed8e2c68a6ab82abc8effeddc upstream.
+
+When a node dies, other live nodes have to choose a new master for an
+existed lock resource mastered by the dead node.
+
+As for ocfs2/dlm implementation, this is done by function -
+dlm_move_lockres_to_recovery_list which marks those lock rsources as
+DLM_LOCK_RES_RECOVERING and manages them via a list from which DLM
+changes lock resource's master later.
+
+So without invoking dlm_move_lockres_to_recovery_list, no master will be
+choosed after dlm recovery accomplishment since no lock resource can be
+found through ::resource list.
+
+What's worse is that if DLM_LOCK_RES_RECOVERING is not marked for lock
+resources mastered a dead node, it will break up synchronization among
+nodes.
+
+So invoke dlm_move_lockres_to_recovery_list again.
+
+Fixs: 'commit ee8f7fcbe638 ("ocfs2/dlm: continue to purge recovery lockres when recovery master goes down")'
+Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373CED6E0F9@H3CMLB14-EX.srv.huawei-3com.com
+Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
+Reported-by: Vitaly Mayatskih <v.mayatskih@gmail.com>
+Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Joseph Qi <jiangqi903@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2419,6 +2419,7 @@ static void dlm_do_local_recovery_cleanu
+                                       dlm_lockres_put(res);
+                                       continue;
+                               }
++                              dlm_move_lockres_to_recovery_list(dlm, res);
+                       } else if (res->owner == dlm->node_num) {
+                               dlm_free_dead_locks(dlm, res, dead_node);
+                               __dlm_lockres_calc_usage(dlm, res);
diff --git a/queue-4.13/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch b/queue-4.13/ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
new file mode 100644 (file)
index 0000000..45d908e
--- /dev/null
@@ -0,0 +1,82 @@
+From 28f5a8a7c033cbf3e32277f4cc9c6afd74f05300 Mon Sep 17 00:00:00 2001
+From: alex chen <alex.chen@huawei.com>
+Date: Wed, 15 Nov 2017 17:31:40 -0800
+Subject: ocfs2: should wait dio before inode lock in ocfs2_setattr()
+
+From: alex chen <alex.chen@huawei.com>
+
+commit 28f5a8a7c033cbf3e32277f4cc9c6afd74f05300 upstream.
+
+we should wait dio requests to finish before inode lock in
+ocfs2_setattr(), otherwise the following deadlock will happen:
+
+process 1                  process 2                    process 3
+truncate file 'A'          end_io of writing file 'A'   receiving the bast messages
+ocfs2_setattr
+ ocfs2_inode_lock_tracker
+  ocfs2_inode_lock_full
+ inode_dio_wait
+  __inode_dio_wait
+  -->waiting for all dio
+  requests finish
+                                                        dlm_proxy_ast_handler
+                                                         dlm_do_local_bast
+                                                          ocfs2_blocking_ast
+                                                           ocfs2_generic_handle_bast
+                                                            set OCFS2_LOCK_BLOCKED flag
+                        dio_end_io
+                         dio_bio_end_aio
+                          dio_complete
+                           ocfs2_dio_end_io
+                            ocfs2_dio_end_io_write
+                             ocfs2_inode_lock
+                              __ocfs2_cluster_lock
+                               ocfs2_wait_for_mask
+                               -->waiting for OCFS2_LOCK_BLOCKED
+                               flag to be cleared, that is waiting
+                               for 'process 1' unlocking the inode lock
+                           inode_dio_end
+                           -->here dec the i_dio_count, but will never
+                           be called, so a deadlock happened.
+
+Link: http://lkml.kernel.org/r/59F81636.70508@huawei.com
+Signed-off-by: Alex Chen <alex.chen@huawei.com>
+Reviewed-by: Jun Piao <piaojun@huawei.com>
+Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
+Acked-by: Changwei Ge <ge.changwei@h3c.com>
+Cc: Mark Fasheh <mfasheh@versity.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/file.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1168,6 +1168,13 @@ int ocfs2_setattr(struct dentry *dentry,
+       }
+       size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
+       if (size_change) {
++              /*
++               * Here we should wait dio to finish before inode lock
++               * to avoid a deadlock between ocfs2_setattr() and
++               * ocfs2_dio_end_io_write()
++               */
++              inode_dio_wait(inode);
++
+               status = ocfs2_rw_lock(inode, 1);
+               if (status < 0) {
+                       mlog_errno(status);
+@@ -1207,8 +1214,6 @@ int ocfs2_setattr(struct dentry *dentry,
+               if (status)
+                       goto bail_unlock;
+-              inode_dio_wait(inode);
+-
+               if (i_size_read(inode) >= attr->ia_size) {
+                       if (ocfs2_should_order_data(inode)) {
+                               status = ocfs2_begin_ordered_truncate(inode,
diff --git a/queue-4.13/rcu-fix-up-pending-cbs-check-in-rcu_prepare_for_idle.patch b/queue-4.13/rcu-fix-up-pending-cbs-check-in-rcu_prepare_for_idle.patch
new file mode 100644 (file)
index 0000000..03be5ad
--- /dev/null
@@ -0,0 +1,34 @@
+From 135bd1a230bb69a68c9808a7d25467318900b80a Mon Sep 17 00:00:00 2001
+From: Neeraj Upadhyay <neeraju@codeaurora.org>
+Date: Mon, 7 Aug 2017 11:20:10 +0530
+Subject: rcu: Fix up pending cbs check in rcu_prepare_for_idle
+
+From: Neeraj Upadhyay <neeraju@codeaurora.org>
+
+commit 135bd1a230bb69a68c9808a7d25467318900b80a upstream.
+
+The pending-callbacks check in rcu_prepare_for_idle() is backwards.
+It should accelerate if there are pending callbacks, but the check
+rather uselessly accelerates only if there are no callbacks.  This commit
+therefore inverts this check.
+
+Fixes: 15fecf89e46a ("srcu: Abstract multi-tail callback list handling")
+Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/rcu/tree_plugin.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -1493,7 +1493,7 @@ static void rcu_prepare_for_idle(void)
+       rdtp->last_accelerate = jiffies;
+       for_each_rcu_flavor(rsp) {
+               rdp = this_cpu_ptr(rsp->rda);
+-              if (rcu_segcblist_pend_cbs(&rdp->cblist))
++              if (!rcu_segcblist_pend_cbs(&rdp->cblist))
+                       continue;
+               rnp = rdp->mynode;
+               raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
index 42b61f8e1fdad9d8048ca1e966ff08601dc9104c..6d84bf5888ffffe2d94771b7b4089641057a277b 100644 (file)
@@ -24,3 +24,11 @@ ima-do-not-update-security.ima-if-appraisal-status-is-not-integrity_pass.patch
 serial-omap-fix-efr-write-on-rts-deassertion.patch
 serial-8250_fintek-fix-finding-base_port-with-activated-superio.patch
 tpm-dev-common-reject-too-short-writes.patch
+rcu-fix-up-pending-cbs-check-in-rcu_prepare_for_idle.patch
+mm-pagewalk.c-report-holes-in-hugetlb-ranges.patch
+ocfs2-fix-cluster-hang-after-a-node-dies.patch
+ocfs2-should-wait-dio-before-inode-lock-in-ocfs2_setattr.patch
+ipmi-fix-unsigned-long-underflow.patch
+mm-swap-fix-false-error-message-in-__swp_swapcount.patch
+mm-page_alloc.c-broken-deferred-calculation.patch
+mm-page_ext.c-check-if-page_ext-is-not-prepared.patch