]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 23 Sep 2015 06:24:19 +0000 (23:24 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 23 Sep 2015 06:24:19 +0000 (23:24 -0700)
added patches:
mm-check-if-section-present-during-memory-block-registering.patch
mm-make-page-pfmemalloc-check-more-robust.patch
x86-mm-initialize-pmd_idx-in-page_table_range_init_count.patch

queue-4.1/mm-check-if-section-present-during-memory-block-registering.patch [new file with mode: 0644]
queue-4.1/mm-make-page-pfmemalloc-check-more-robust.patch [new file with mode: 0644]
queue-4.1/series
queue-4.1/x86-mm-initialize-pmd_idx-in-page_table_range_init_count.patch [new file with mode: 0644]

diff --git a/queue-4.1/mm-check-if-section-present-during-memory-block-registering.patch b/queue-4.1/mm-check-if-section-present-during-memory-block-registering.patch
new file mode 100644 (file)
index 0000000..1554b8c
--- /dev/null
@@ -0,0 +1,86 @@
+From 04697858d89e4bf2650364f8d6956e2554e8ef88 Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Fri, 4 Sep 2015 15:42:39 -0700
+Subject: mm: check if section present during memory block registering
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit 04697858d89e4bf2650364f8d6956e2554e8ef88 upstream.
+
+Tony Luck found on his setup, if memory block size 512M will cause crash
+during booting.
+
+  BUG: unable to handle kernel paging request at ffffea0074000020
+  IP: get_nid_for_pfn+0x17/0x40
+  PGD 128ffcb067 PUD 128ffc9067 PMD 0
+  Oops: 0000 [#1] SMP
+  Modules linked in:
+  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0-rc8 #1
+  ...
+  Call Trace:
+     ? register_mem_sect_under_node+0x66/0xe0
+     register_one_node+0x17b/0x240
+     ? pci_iommu_alloc+0x6e/0x6e
+     topology_init+0x3c/0x95
+     do_one_initcall+0xcd/0x1f0
+
+The system has non continuous RAM address:
+ BIOS-e820: [mem 0x0000001300000000-0x0000001cffffffff] usable
+ BIOS-e820: [mem 0x0000001d70000000-0x0000001ec7ffefff] usable
+ BIOS-e820: [mem 0x0000001f00000000-0x0000002bffffffff] usable
+ BIOS-e820: [mem 0x0000002c18000000-0x0000002d6fffefff] usable
+ BIOS-e820: [mem 0x0000002e00000000-0x00000039ffffffff] usable
+
+So there are start sections in memory block not present.  For example:
+
+    memory block : [0x2c18000000, 0x2c20000000) 512M
+
+first three sections are not present.
+
+The current register_mem_sect_under_node() assume first section is
+present, but memory block section number range [start_section_nr,
+end_section_nr] would include not present section.
+
+For arch that support vmemmap, we don't setup memmap for struct page
+area within not present sections area.
+
+So skip the pfn range that belong to absent section.
+
+[akpm@linux-foundation.org: simplification]
+[rientjes@google.com: more simplification]
+Fixes: bdee237c0343 ("x86: mm: Use 2GB memory block size on large memory x86-64 systems")
+Fixes: 982792c782ef ("x86, mm: probe memory block size for generic x86 64bit")
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: David Rientjes <rientjes@google.com>
+Reported-by: Tony Luck <tony.luck@intel.com>
+Tested-by: Tony Luck <tony.luck@intel.com>
+Cc: Greg KH <greg@kroah.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Tested-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/node.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/base/node.c
++++ b/drivers/base/node.c
+@@ -388,6 +388,16 @@ int register_mem_sect_under_node(struct
+       for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+               int page_nid;
++              /*
++               * memory block could have several absent sections from start.
++               * skip pfn range from absent section
++               */
++              if (!pfn_present(pfn)) {
++                      pfn = round_down(pfn + PAGES_PER_SECTION,
++                                       PAGES_PER_SECTION) - 1;
++                      continue;
++              }
++
+               page_nid = get_nid_for_pfn(pfn);
+               if (page_nid < 0)
+                       continue;
diff --git a/queue-4.1/mm-make-page-pfmemalloc-check-more-robust.patch b/queue-4.1/mm-make-page-pfmemalloc-check-more-robust.patch
new file mode 100644 (file)
index 0000000..2153779
--- /dev/null
@@ -0,0 +1,266 @@
+From 2f064f3485cd29633ad1b3cfb00cc519509a3d72 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Fri, 21 Aug 2015 14:11:51 -0700
+Subject: mm: make page pfmemalloc check more robust
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 2f064f3485cd29633ad1b3cfb00cc519509a3d72 upstream.
+
+Commit c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") added
+checks for page->pfmemalloc to __skb_fill_page_desc():
+
+        if (page->pfmemalloc && !page->mapping)
+                skb->pfmemalloc = true;
+
+It assumes page->mapping == NULL implies that page->pfmemalloc can be
+trusted.  However, __delete_from_page_cache() can set set page->mapping
+to NULL and leave page->index value alone.  Due to being in union, a
+non-zero page->index will be interpreted as true page->pfmemalloc.
+
+So the assumption is invalid if the networking code can see such a page.
+And it seems it can.  We have encountered this with a NFS over loopback
+setup when such a page is attached to a new skbuf.  There is no copying
+going on in this case so the page confuses __skb_fill_page_desc which
+interprets the index as pfmemalloc flag and the network stack drops
+packets that have been allocated using the reserves unless they are to
+be queued on sockets handling the swapping which is the case here and
+that leads to hangs when the nfs client waits for a response from the
+server which has been dropped and thus never arrive.
+
+The struct page is already heavily packed so rather than finding another
+hole to put it in, let's do a trick instead.  We can reuse the index
+again but define it to an impossible value (-1UL).  This is the page
+index so it should never see the value that large.  Replace all direct
+users of page->pfmemalloc by page_is_pfmemalloc which will hide this
+nastiness from unspoiled eyes.
+
+The information will get lost if somebody wants to use page->index
+obviously but that was the case before and the original code expected
+that the information should be persisted somewhere else if that is
+really needed (e.g.  what SLAB and SLUB do).
+
+[akpm@linux-foundation.org: fix blooper in slub]
+Fixes: c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb")
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Debugged-by: Vlastimil Babka <vbabka@suse.com>
+Debugged-by: Jiri Bohac <jbohac@suse.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: David Miller <davem@davemloft.net>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/net/ethernet/intel/fm10k/fm10k_main.c     |    2 -
+ drivers/net/ethernet/intel/igb/igb_main.c         |    2 -
+ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |    2 -
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |    2 -
+ include/linux/mm.h                                |   28 ++++++++++++++++++++++
+ include/linux/mm_types.h                          |    9 -------
+ include/linux/skbuff.h                            |   14 +++--------
+ mm/page_alloc.c                                   |    7 +++--
+ mm/slab.c                                         |    4 +--
+ mm/slub.c                                         |    2 -
+ net/core/skbuff.c                                 |    2 -
+ 11 files changed, 46 insertions(+), 28 deletions(-)
+
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+@@ -216,7 +216,7 @@ static void fm10k_reuse_rx_page(struct f
+ static inline bool fm10k_page_is_reserved(struct page *page)
+ {
+-      return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
++      return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+ }
+ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -6596,7 +6596,7 @@ static void igb_reuse_rx_page(struct igb
+ static inline bool igb_page_is_reserved(struct page *page)
+ {
+-      return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
++      return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+ }
+ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -1829,7 +1829,7 @@ static void ixgbe_reuse_rx_page(struct i
+ static inline bool ixgbe_page_is_reserved(struct page *page)
+ {
+-      return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
++      return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+ }
+ /**
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+@@ -765,7 +765,7 @@ static void ixgbevf_reuse_rx_page(struct
+ static inline bool ixgbevf_page_is_reserved(struct page *page)
+ {
+-      return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
++      return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+ }
+ /**
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1002,6 +1002,34 @@ static inline int page_mapped(struct pag
+ }
+ /*
++ * Return true only if the page has been allocated with
++ * ALLOC_NO_WATERMARKS and the low watermark was not
++ * met implying that the system is under some pressure.
++ */
++static inline bool page_is_pfmemalloc(struct page *page)
++{
++      /*
++       * Page index cannot be this large so this must be
++       * a pfmemalloc page.
++       */
++      return page->index == -1UL;
++}
++
++/*
++ * Only to be called by the page allocator on a freshly allocated
++ * page.
++ */
++static inline void set_page_pfmemalloc(struct page *page)
++{
++      page->index = -1UL;
++}
++
++static inline void clear_page_pfmemalloc(struct page *page)
++{
++      page->index = 0;
++}
++
++/*
+  * Different kinds of faults, as returned by handle_mm_fault().
+  * Used to decide whether a process gets delivered SIGBUS or
+  * just gets major/minor fault counters bumped up.
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -63,15 +63,6 @@ struct page {
+               union {
+                       pgoff_t index;          /* Our offset within mapping. */
+                       void *freelist;         /* sl[aou]b first free object */
+-                      bool pfmemalloc;        /* If set by the page allocator,
+-                                               * ALLOC_NO_WATERMARKS was set
+-                                               * and the low watermark was not
+-                                               * met implying that the system
+-                                               * is under some pressure. The
+-                                               * caller should try ensure
+-                                               * this page is only used to
+-                                               * free other pages.
+-                                               */
+               };
+               union {
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1590,20 +1590,16 @@ static inline void __skb_fill_page_desc(
+       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+       /*
+-       * Propagate page->pfmemalloc to the skb if we can. The problem is
+-       * that not all callers have unique ownership of the page. If
+-       * pfmemalloc is set, we check the mapping as a mapping implies
+-       * page->index is set (index and pfmemalloc share space).
+-       * If it's a valid mapping, we cannot use page->pfmemalloc but we
+-       * do not lose pfmemalloc information as the pages would not be
+-       * allocated using __GFP_MEMALLOC.
++       * Propagate page pfmemalloc to the skb if we can. The problem is
++       * that not all callers have unique ownership of the page but rely
++       * on page_is_pfmemalloc doing the right thing(tm).
+        */
+       frag->page.p              = page;
+       frag->page_offset         = off;
+       skb_frag_size_set(frag, size);
+       page = compound_head(page);
+-      if (page->pfmemalloc && !page->mapping)
++      if (page_is_pfmemalloc(page))
+               skb->pfmemalloc = true;
+ }
+@@ -2250,7 +2246,7 @@ static inline struct page *dev_alloc_pag
+ static inline void skb_propagate_pfmemalloc(struct page *page,
+                                            struct sk_buff *skb)
+ {
+-      if (page && page->pfmemalloc)
++      if (page_is_pfmemalloc(page))
+               skb->pfmemalloc = true;
+ }
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -983,12 +983,15 @@ static int prep_new_page(struct page *pa
+       set_page_owner(page, order, gfp_flags);
+       /*
+-       * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to
++       * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
+        * allocate the page. The expectation is that the caller is taking
+        * steps that will free more memory. The caller should avoid the page
+        * being used for !PFMEMALLOC purposes.
+        */
+-      page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
++      if (alloc_flags & ALLOC_NO_WATERMARKS)
++              set_page_pfmemalloc(page);
++      else
++              clear_page_pfmemalloc(page);
+       return 0;
+ }
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -1602,7 +1602,7 @@ static struct page *kmem_getpages(struct
+       }
+       /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
+-      if (unlikely(page->pfmemalloc))
++      if (page_is_pfmemalloc(page))
+               pfmemalloc_active = true;
+       nr_pages = (1 << cachep->gfporder);
+@@ -1613,7 +1613,7 @@ static struct page *kmem_getpages(struct
+               add_zone_page_state(page_zone(page),
+                       NR_SLAB_UNRECLAIMABLE, nr_pages);
+       __SetPageSlab(page);
+-      if (page->pfmemalloc)
++      if (page_is_pfmemalloc(page))
+               SetPageSlabPfmemalloc(page);
+       if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1427,7 +1427,7 @@ static struct page *new_slab(struct kmem
+       inc_slabs_node(s, page_to_nid(page), page->objects);
+       page->slab_cache = s;
+       __SetPageSlab(page);
+-      if (page->pfmemalloc)
++      if (page_is_pfmemalloc(page))
+               SetPageSlabPfmemalloc(page);
+       start = page_address(page);
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -340,7 +340,7 @@ struct sk_buff *build_skb(void *data, un
+       if (skb && frag_size) {
+               skb->head_frag = 1;
+-              if (virt_to_head_page(data)->pfmemalloc)
++              if (page_is_pfmemalloc(virt_to_head_page(data)))
+                       skb->pfmemalloc = 1;
+       }
+       return skb;
index d6eca50889db34b2107301145c2043432b8d688d..d1fa173616c66bf5f1216347e6e843d4b134cbd2 100644 (file)
@@ -42,4 +42,7 @@ powerpc-boot-specify-abi-v2-when-building-an-le-boot-wrapper.patch
 powerpc-mm-recompute-hash-value-after-a-failed-update.patch
 cifs-fix-type-confusion-in-copy-offload-ioctl.patch
 add-radeon-suspend-resume-quirk-for-hp-compaq-dc5750.patch
+mm-check-if-section-present-during-memory-block-registering.patch
+x86-mm-initialize-pmd_idx-in-page_table_range_init_count.patch
+mm-make-page-pfmemalloc-check-more-robust.patch
 fs-fuse-fix-ioctl-type-confusion.patch
diff --git a/queue-4.1/x86-mm-initialize-pmd_idx-in-page_table_range_init_count.patch b/queue-4.1/x86-mm-initialize-pmd_idx-in-page_table_range_init_count.patch
new file mode 100644 (file)
index 0000000..d2c7797
--- /dev/null
@@ -0,0 +1,38 @@
+From 9962eea9e55f797f05f20ba6448929cab2a9f018 Mon Sep 17 00:00:00 2001
+From: Minfei Huang <mnfhuang@gmail.com>
+Date: Sun, 12 Jul 2015 20:18:42 +0800
+Subject: x86/mm: Initialize pmd_idx in page_table_range_init_count()
+
+From: Minfei Huang <mnfhuang@gmail.com>
+
+commit 9962eea9e55f797f05f20ba6448929cab2a9f018 upstream.
+
+The variable pmd_idx is not initialized for the first iteration of the
+for loop.
+
+Assign the proper value which indexes the start address.
+
+Fixes: 719272c45b82 'x86, mm: only call early_ioremap_page_table_range_init() once'
+Signed-off-by: Minfei Huang <mnfhuang@gmail.com>
+Cc: tony.luck@intel.com
+Cc: wangnan0@huawei.com
+Cc: david.vrabel@citrix.com
+Reviewed-by: yinghai@kernel.org
+Link: http://lkml.kernel.org/r/1436703522-29552-1-git-send-email-mhuang@redhat.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/init_32.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -137,6 +137,7 @@ page_table_range_init_count(unsigned lon
+       vaddr = start;
+       pgd_idx = pgd_index(vaddr);
++      pmd_idx = pmd_index(vaddr);
+       for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
+               for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);