]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 11 Mar 2015 13:49:14 +0000 (14:49 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 11 Mar 2015 13:49:14 +0000 (14:49 +0100)
added patches:
mm-compaction-fix-wrong-order-check-in-compact_finished.patch
mm-hugetlb-add-migration-entry-check-in-__unmap_hugepage_range.patch
mm-hugetlb-add-migration-hwpoisoned-entry-check-in-hugetlb_change_protection.patch
mm-memory.c-actually-remap-enough-memory.patch
mm-mmap.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
mm-nommu.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
mm-when-stealing-freepages-also-take-pages-created-by-splitting-buddy-page.patch

queue-3.14/mm-compaction-fix-wrong-order-check-in-compact_finished.patch [new file with mode: 0644]
queue-3.14/mm-hugetlb-add-migration-entry-check-in-__unmap_hugepage_range.patch [new file with mode: 0644]
queue-3.14/mm-hugetlb-add-migration-hwpoisoned-entry-check-in-hugetlb_change_protection.patch [new file with mode: 0644]
queue-3.14/mm-memory.c-actually-remap-enough-memory.patch [new file with mode: 0644]
queue-3.14/mm-mmap.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch [new file with mode: 0644]
queue-3.14/mm-nommu.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch [new file with mode: 0644]
queue-3.14/mm-when-stealing-freepages-also-take-pages-created-by-splitting-buddy-page.patch [new file with mode: 0644]
queue-3.14/series

diff --git a/queue-3.14/mm-compaction-fix-wrong-order-check-in-compact_finished.patch b/queue-3.14/mm-compaction-fix-wrong-order-check-in-compact_finished.patch
new file mode 100644 (file)
index 0000000..42a3ce7
--- /dev/null
@@ -0,0 +1,60 @@
+From 372549c2a3778fd3df445819811c944ad54609ca Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Thu, 12 Feb 2015 14:59:50 -0800
+Subject: mm/compaction: fix wrong order check in compact_finished()
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit 372549c2a3778fd3df445819811c944ad54609ca upstream.
+
+What we want to check here is whether there is highorder freepage in buddy
+list of other migratetype in order to steal it without fragmentation.
+But, current code just checks cc->order which means allocation request
+order.  So, this is wrong.
+
+Without this fix, non-movable synchronous compaction below pageblock order
+would not stopped until compaction is complete, because migratetype of
+most pageblocks are movable and high order freepage made by compaction is
+usually on movable type buddy list.
+
+There is some report related to this bug. See below link.
+
+  http://www.spinics.net/lists/linux-mm/msg81666.html
+
+Although the issued system still has load spike comes from compaction,
+this makes that system completely stable and responsive according to his
+report.
+
+stress-highalloc test in mmtests with non movable order 7 allocation
+doesn't show any notable difference in allocation success rate, but, it
+shows more compaction success rate.
+
+Compaction success rate (Compaction success * 100 / Compaction stalls, %)
+18.47 : 28.94
+
+Fixes: 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available")
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -937,7 +937,7 @@ static int compact_finished(struct zone
+                       return COMPACT_PARTIAL;
+               /* Job done if allocation would set block type */
+-              if (cc->order >= pageblock_order && area->nr_free)
++              if (order >= pageblock_order && area->nr_free)
+                       return COMPACT_PARTIAL;
+       }
diff --git a/queue-3.14/mm-hugetlb-add-migration-entry-check-in-__unmap_hugepage_range.patch b/queue-3.14/mm-hugetlb-add-migration-entry-check-in-__unmap_hugepage_range.patch
new file mode 100644 (file)
index 0000000..71fe40b
--- /dev/null
@@ -0,0 +1,51 @@
+From 9fbc1f635fd0bd28cb32550211bf095753ac637a Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:25:32 -0800
+Subject: mm/hugetlb: add migration entry check in __unmap_hugepage_range
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 9fbc1f635fd0bd28cb32550211bf095753ac637a upstream.
+
+If __unmap_hugepage_range() tries to unmap the address range over which
+hugepage migration is on the way, we get the wrong page because pte_page()
+doesn't work for migration entries.  This patch simply clears the pte for
+migration entries as we do for hwpoison entries.
+
+Fixes: 290408d4a2 ("hugetlb: hugepage migration core")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: James Hogan <james.hogan@imgtec.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
+Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
+Cc: Steve Capper <steve.capper@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2488,9 +2488,10 @@ again:
+                       goto unlock;
+               /*
+-               * HWPoisoned hugepage is already unmapped and dropped reference
++               * Migrating hugepage or HWPoisoned hugepage is already
++               * unmapped and its refcount is dropped, so just clear pte here.
+                */
+-              if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
++              if (unlikely(!pte_present(pte))) {
+                       huge_pte_clear(mm, address, ptep);
+                       goto unlock;
+               }
diff --git a/queue-3.14/mm-hugetlb-add-migration-hwpoisoned-entry-check-in-hugetlb_change_protection.patch b/queue-3.14/mm-hugetlb-add-migration-hwpoisoned-entry-check-in-hugetlb_change_protection.patch
new file mode 100644 (file)
index 0000000..c90ccdc
--- /dev/null
@@ -0,0 +1,70 @@
+From a8bda28d87c38c6aa93de28ba5d30cc18e865a11 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:25:28 -0800
+Subject: mm/hugetlb: add migration/hwpoisoned entry check in hugetlb_change_protection
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit a8bda28d87c38c6aa93de28ba5d30cc18e865a11 upstream.
+
+There is a race condition between hugepage migration and
+change_protection(), where hugetlb_change_protection() doesn't care about
+migration entries and wrongly overwrites them.  That causes unexpected
+results like kernel crash.  HWPoison entries also can cause the same
+problem.
+
+This patch adds is_hugetlb_entry_(migration|hwpoisoned) check in this
+function to do proper actions.
+
+Fixes: 290408d4a2 ("hugetlb: hugepage migration core")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: James Hogan <james.hogan@imgtec.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
+Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
+Cc: Steve Capper <steve.capper@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |   21 ++++++++++++++++++++-
+ 1 file changed, 20 insertions(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3163,7 +3163,26 @@ unsigned long hugetlb_change_protection(
+                       spin_unlock(ptl);
+                       continue;
+               }
+-              if (!huge_pte_none(huge_ptep_get(ptep))) {
++              pte = huge_ptep_get(ptep);
++              if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
++                      spin_unlock(ptl);
++                      continue;
++              }
++              if (unlikely(is_hugetlb_entry_migration(pte))) {
++                      swp_entry_t entry = pte_to_swp_entry(pte);
++
++                      if (is_write_migration_entry(entry)) {
++                              pte_t newpte;
++
++                              make_migration_entry_read(&entry);
++                              newpte = swp_entry_to_pte(entry);
++                              set_huge_pte_at(mm, address, ptep, newpte);
++                              pages++;
++                      }
++                      spin_unlock(ptl);
++                      continue;
++              }
++              if (!huge_pte_none(pte)) {
+                       pte = huge_ptep_get_and_clear(mm, address, ptep);
+                       pte = pte_mkhuge(huge_pte_modify(pte, newprot));
+                       pte = arch_make_huge_pte(pte, vma, NULL, 0);
diff --git a/queue-3.14/mm-memory.c-actually-remap-enough-memory.patch b/queue-3.14/mm-memory.c-actually-remap-enough-memory.patch
new file mode 100644 (file)
index 0000000..2aea424
--- /dev/null
@@ -0,0 +1,36 @@
+From 9cb12d7b4ccaa976f97ce0c5fd0f1b6a83bc2a75 Mon Sep 17 00:00:00 2001
+From: Grazvydas Ignotas <notasas@gmail.com>
+Date: Thu, 12 Feb 2015 15:00:19 -0800
+Subject: mm/memory.c: actually remap enough memory
+
+From: Grazvydas Ignotas <notasas@gmail.com>
+
+commit 9cb12d7b4ccaa976f97ce0c5fd0f1b6a83bc2a75 upstream.
+
+For whatever reason, generic_access_phys() only remaps one page, but
+actually allows to access arbitrary size.  It's quite easy to trigger
+large reads, like printing out large structure with gdb, which leads to a
+crash.  Fix it by remapping correct size.
+
+Fixes: 28b2ee20c7cb ("access_process_vm device memory infrastructure")
+Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4024,7 +4024,7 @@ int generic_access_phys(struct vm_area_s
+       if (follow_phys(vma, addr, write, &prot, &phys_addr))
+               return -EINVAL;
+-      maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
++      maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+       if (write)
+               memcpy_toio(maddr + offset, buf, len);
+       else
diff --git a/queue-3.14/mm-mmap.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch b/queue-3.14/mm-mmap.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
new file mode 100644 (file)
index 0000000..f71378c
--- /dev/null
@@ -0,0 +1,63 @@
+From 5703b087dc8eaf47bfb399d6cf512d471beff405 Mon Sep 17 00:00:00 2001
+From: Roman Gushchin <klamm@yandex-team.ru>
+Date: Wed, 11 Feb 2015 15:28:39 -0800
+Subject: mm/mmap.c: fix arithmetic overflow in __vm_enough_memory()
+
+From: Roman Gushchin <klamm@yandex-team.ru>
+
+commit 5703b087dc8eaf47bfb399d6cf512d471beff405 upstream.
+
+I noticed, that "allowed" can easily overflow by falling below 0,
+because (total_vm / 32) can be larger than "allowed".  The problem
+occurs in OVERCOMMIT_NONE mode.
+
+In this case, a huge allocation can success and overcommit the system
+(despite OVERCOMMIT_NONE mode).  All subsequent allocations will fall
+(system-wide), so system become unusable.
+
+The problem was masked out by commit c9b1d0981fcc
+("mm: limit growth of 3% hardcoded other user reserve"),
+but it's easy to reproduce it on older kernels:
+1) set overcommit_memory sysctl to 2
+2) mmap() large file multiple times (with VM_SHARED flag)
+3) try to malloc() large amount of memory
+
+It also can be reproduced on newer kernels, but miss-configured
+sysctl_user_reserve_kbytes is required.
+
+Fix this issue by switching to signed arithmetic here.
+
+[akpm@linux-foundation.org: use min_t]
+Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Andrew Shewmaker <agshew@gmail.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmap.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -129,7 +129,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed);
+  */
+ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
+ {
+-      unsigned long free, allowed, reserve;
++      long free, allowed, reserve;
+       vm_acct_memory(pages);
+@@ -193,7 +193,7 @@ int __vm_enough_memory(struct mm_struct
+        */
+       if (mm) {
+               reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+-              allowed -= min(mm->total_vm / 32, reserve);
++              allowed -= min_t(long, mm->total_vm / 32, reserve);
+       }
+       if (percpu_counter_read_positive(&vm_committed_as) < allowed)
diff --git a/queue-3.14/mm-nommu.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch b/queue-3.14/mm-nommu.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
new file mode 100644 (file)
index 0000000..cd073e8
--- /dev/null
@@ -0,0 +1,61 @@
+From 8138a67a5557ffea3a21dfd6f037842d4e748513 Mon Sep 17 00:00:00 2001
+From: Roman Gushchin <klamm@yandex-team.ru>
+Date: Wed, 11 Feb 2015 15:28:42 -0800
+Subject: mm/nommu.c: fix arithmetic overflow in __vm_enough_memory()
+
+From: Roman Gushchin <klamm@yandex-team.ru>
+
+commit 8138a67a5557ffea3a21dfd6f037842d4e748513 upstream.
+
+I noticed that "allowed" can easily overflow by falling below 0, because
+(total_vm / 32) can be larger than "allowed".  The problem occurs in
+OVERCOMMIT_NONE mode.
+
+In this case, a huge allocation can success and overcommit the system
+(despite OVERCOMMIT_NONE mode).  All subsequent allocations will fall
+(system-wide), so system become unusable.
+
+The problem was masked out by commit c9b1d0981fcc
+("mm: limit growth of 3% hardcoded other user reserve"),
+but it's easy to reproduce it on older kernels:
+1) set overcommit_memory sysctl to 2
+2) mmap() large file multiple times (with VM_SHARED flag)
+3) try to malloc() large amount of memory
+
+It also can be reproduced on newer kernels, but miss-configured
+sysctl_user_reserve_kbytes is required.
+
+Fix this issue by switching to signed arithmetic here.
+
+Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Andrew Shewmaker <agshew@gmail.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/nommu.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -1905,7 +1905,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
+  */
+ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
+ {
+-      unsigned long free, allowed, reserve;
++      long free, allowed, reserve;
+       vm_acct_memory(pages);
+@@ -1969,7 +1969,7 @@ int __vm_enough_memory(struct mm_struct
+        */
+       if (mm) {
+               reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+-              allowed -= min(mm->total_vm / 32, reserve);
++              allowed -= min_t(long, mm->total_vm / 32, reserve);
+       }
+       if (percpu_counter_read_positive(&vm_committed_as) < allowed)
diff --git a/queue-3.14/mm-when-stealing-freepages-also-take-pages-created-by-splitting-buddy-page.patch b/queue-3.14/mm-when-stealing-freepages-also-take-pages-created-by-splitting-buddy-page.patch
new file mode 100644 (file)
index 0000000..53cb83b
--- /dev/null
@@ -0,0 +1,362 @@
+From 99592d598eca62bdbbf62b59941c189176dfc614 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 11 Feb 2015 15:28:15 -0800
+Subject: mm: when stealing freepages, also take pages created by splitting buddy page
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 99592d598eca62bdbbf62b59941c189176dfc614 upstream.
+
+When studying page stealing, I noticed some weird looking decisions in
+try_to_steal_freepages().  The first I assume is a bug (Patch 1), the
+following two patches were driven by evaluation.
+
+Testing was done with stress-highalloc of mmtests, using the
+mm_page_alloc_extfrag tracepoint and postprocessing to get counts of how
+often page stealing occurs for individual migratetypes, and what
+migratetypes are used for fallbacks.  Arguably, the worst case of page
+stealing is when UNMOVABLE allocation steals from MOVABLE pageblock.
+RECLAIMABLE allocation stealing from MOVABLE allocation is also not ideal,
+so the goal is to minimize these two cases.
+
+The evaluation of v2 wasn't always clear win and Joonsoo questioned the
+results.  Here I used different baseline which includes RFC compaction
+improvements from [1].  I found that the compaction improvements reduce
+variability of stress-highalloc, so there's less noise in the data.
+
+First, let's look at stress-highalloc configured to do sync compaction,
+and how these patches reduce page stealing events during the test.  First
+column is after fresh reboot, other two are reiterations of test without
+reboot.  That was all accumulater over 5 re-iterations (so the benchmark
+was run 5x3 times with 5 fresh restarts).
+
+Baseline:
+
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                  5-nothp-1       5-nothp-2       5-nothp-3
+Page alloc extfrag event                               10264225     8702233    10244125
+Extfrag fragmenting                                    10263271     8701552    10243473
+Extfrag fragmenting for unmovable                         13595       17616       15960
+Extfrag fragmenting unmovable placed with movable          7989       12193        8447
+Extfrag fragmenting for reclaimable                         658        1840        1817
+Extfrag fragmenting reclaimable placed with movable         558        1677        1679
+Extfrag fragmenting for movable                        10249018     8682096    10225696
+
+With Patch 1:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                  6-nothp-1       6-nothp-2       6-nothp-3
+Page alloc extfrag event                               11834954     9877523     9774860
+Extfrag fragmenting                                    11833993     9876880     9774245
+Extfrag fragmenting for unmovable                          7342       16129       11712
+Extfrag fragmenting unmovable placed with movable          4191       10547        6270
+Extfrag fragmenting for reclaimable                         373        1130         923
+Extfrag fragmenting reclaimable placed with movable         302         906         738
+Extfrag fragmenting for movable                        11826278     9859621     9761610
+
+With Patch 2:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                  7-nothp-1       7-nothp-2       7-nothp-3
+Page alloc extfrag event                                4725990     3668793     3807436
+Extfrag fragmenting                                     4725104     3668252     3806898
+Extfrag fragmenting for unmovable                          6678        7974        7281
+Extfrag fragmenting unmovable placed with movable          2051        3829        4017
+Extfrag fragmenting for reclaimable                         429        1208        1278
+Extfrag fragmenting reclaimable placed with movable         369         976        1034
+Extfrag fragmenting for movable                         4717997     3659070     3798339
+
+With Patch 3:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                  8-nothp-1       8-nothp-2       8-nothp-3
+Page alloc extfrag event                                5016183     4700142     3850633
+Extfrag fragmenting                                     5015325     4699613     3850072
+Extfrag fragmenting for unmovable                          1312        3154        3088
+Extfrag fragmenting unmovable placed with movable          1115        2777        2714
+Extfrag fragmenting for reclaimable                         437        1193        1097
+Extfrag fragmenting reclaimable placed with movable         330         969         879
+Extfrag fragmenting for movable                         5013576     4695266     3845887
+
+In v2 we've seen apparent regression with Patch 1 for unmovable events,
+this is now gone, suggesting it was indeed noise.  Here, each patch
+improves the situation for unmovable events.  Reclaimable is improved by
+patch 1 and then either the same modulo noise, or perhaps sligtly worse -
+a small price for unmovable improvements, IMHO.  The number of movable
+allocations falling back to other migratetypes is most noisy, but it's
+reduced to half at Patch 2 nevertheless.  These are least critical as
+compaction can move them around.
+
+If we look at success rates, the patches don't affect them, that didn't change.
+
+Baseline:
+                             3.19-rc4              3.19-rc4              3.19-rc4
+                            5-nothp-1             5-nothp-2             5-nothp-3
+Success 1 Min         49.00 (  0.00%)       42.00 ( 14.29%)       41.00 ( 16.33%)
+Success 1 Mean        51.00 (  0.00%)       45.00 ( 11.76%)       42.60 ( 16.47%)
+Success 1 Max         55.00 (  0.00%)       51.00 (  7.27%)       46.00 ( 16.36%)
+Success 2 Min         53.00 (  0.00%)       47.00 ( 11.32%)       44.00 ( 16.98%)
+Success 2 Mean        59.60 (  0.00%)       50.80 ( 14.77%)       48.20 ( 19.13%)
+Success 2 Max         64.00 (  0.00%)       56.00 ( 12.50%)       52.00 ( 18.75%)
+Success 3 Min         84.00 (  0.00%)       82.00 (  2.38%)       78.00 (  7.14%)
+Success 3 Mean        85.60 (  0.00%)       82.80 (  3.27%)       79.40 (  7.24%)
+Success 3 Max         86.00 (  0.00%)       83.00 (  3.49%)       80.00 (  6.98%)
+
+Patch 1:
+                             3.19-rc4              3.19-rc4              3.19-rc4
+                            6-nothp-1             6-nothp-2             6-nothp-3
+Success 1 Min         49.00 (  0.00%)       44.00 ( 10.20%)       44.00 ( 10.20%)
+Success 1 Mean        51.80 (  0.00%)       46.00 ( 11.20%)       45.80 ( 11.58%)
+Success 1 Max         54.00 (  0.00%)       49.00 (  9.26%)       49.00 (  9.26%)
+Success 2 Min         58.00 (  0.00%)       49.00 ( 15.52%)       48.00 ( 17.24%)
+Success 2 Mean        60.40 (  0.00%)       51.80 ( 14.24%)       50.80 ( 15.89%)
+Success 2 Max         63.00 (  0.00%)       54.00 ( 14.29%)       55.00 ( 12.70%)
+Success 3 Min         84.00 (  0.00%)       81.00 (  3.57%)       79.00 (  5.95%)
+Success 3 Mean        85.00 (  0.00%)       81.60 (  4.00%)       79.80 (  6.12%)
+Success 3 Max         86.00 (  0.00%)       82.00 (  4.65%)       82.00 (  4.65%)
+
+Patch 2:
+
+                             3.19-rc4              3.19-rc4              3.19-rc4
+                            7-nothp-1             7-nothp-2             7-nothp-3
+Success 1 Min         50.00 (  0.00%)       44.00 ( 12.00%)       39.00 ( 22.00%)
+Success 1 Mean        52.80 (  0.00%)       45.60 ( 13.64%)       42.40 ( 19.70%)
+Success 1 Max         55.00 (  0.00%)       46.00 ( 16.36%)       47.00 ( 14.55%)
+Success 2 Min         52.00 (  0.00%)       48.00 (  7.69%)       45.00 ( 13.46%)
+Success 2 Mean        53.40 (  0.00%)       49.80 (  6.74%)       48.80 (  8.61%)
+Success 2 Max         57.00 (  0.00%)       52.00 (  8.77%)       52.00 (  8.77%)
+Success 3 Min         84.00 (  0.00%)       81.00 (  3.57%)       79.00 (  5.95%)
+Success 3 Mean        85.00 (  0.00%)       82.40 (  3.06%)       79.60 (  6.35%)
+Success 3 Max         86.00 (  0.00%)       83.00 (  3.49%)       80.00 (  6.98%)
+
+Patch 3:
+                             3.19-rc4              3.19-rc4              3.19-rc4
+                            8-nothp-1             8-nothp-2             8-nothp-3
+Success 1 Min         46.00 (  0.00%)       44.00 (  4.35%)       42.00 (  8.70%)
+Success 1 Mean        50.20 (  0.00%)       45.60 (  9.16%)       44.00 ( 12.35%)
+Success 1 Max         52.00 (  0.00%)       47.00 (  9.62%)       47.00 (  9.62%)
+Success 2 Min         53.00 (  0.00%)       49.00 (  7.55%)       48.00 (  9.43%)
+Success 2 Mean        55.80 (  0.00%)       50.60 (  9.32%)       49.00 ( 12.19%)
+Success 2 Max         59.00 (  0.00%)       52.00 ( 11.86%)       51.00 ( 13.56%)
+Success 3 Min         84.00 (  0.00%)       80.00 (  4.76%)       79.00 (  5.95%)
+Success 3 Mean        85.40 (  0.00%)       81.60 (  4.45%)       80.40 (  5.85%)
+Success 3 Max         87.00 (  0.00%)       83.00 (  4.60%)       82.00 (  5.75%)
+
+While there's no improvement here, I consider reduced fragmentation events
+to be worth on its own.  Patch 2 also seems to reduce scanning for free
+pages, and migrations in compaction, suggesting it has somewhat less work
+to do:
+
+Patch 1:
+
+Compaction stalls                 4153        3959        3978
+Compaction success                1523        1441        1446
+Compaction failures               2630        2517        2531
+Page migrate success           4600827     4943120     5104348
+Page migrate failure             19763       16656       17806
+Compaction pages isolated      9597640    10305617    10653541
+Compaction migrate scanned    77828948    86533283    87137064
+Compaction free scanned      517758295   521312840   521462251
+Compaction cost                   5503        5932        6110
+
+Patch 2:
+
+Compaction stalls                 3800        3450        3518
+Compaction success                1421        1316        1317
+Compaction failures               2379        2134        2201
+Page migrate success           4160421     4502708     4752148
+Page migrate failure             19705       14340       14911
+Compaction pages isolated      8731983     9382374     9910043
+Compaction migrate scanned    98362797    96349194    98609686
+Compaction free scanned      496512560   469502017   480442545
+Compaction cost                   5173        5526        5811
+
+As with v2, /proc/pagetypeinfo appears unaffected with respect to numbers
+of unmovable and reclaimable pageblocks.
+
+Configuring the benchmark to allocate like THP page fault (i.e.  no sync
+compaction) gives much noisier results for iterations 2 and 3 after
+reboot.  This is not so surprising given how [1] offers lower improvements
+in this scenario due to less restarts after deferred compaction which
+would change compaction pivot.
+
+Baseline:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                    5-thp-1         5-thp-2         5-thp-3
+Page alloc extfrag event                                8148965     6227815     6646741
+Extfrag fragmenting                                     8147872     6227130     6646117
+Extfrag fragmenting for unmovable                         10324       12942       15975
+Extfrag fragmenting unmovable placed with movable          5972        8495       10907
+Extfrag fragmenting for reclaimable                         601        1707        2210
+Extfrag fragmenting reclaimable placed with movable         520        1570        2000
+Extfrag fragmenting for movable                         8136947     6212481     6627932
+
+Patch 1:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                    6-thp-1         6-thp-2         6-thp-3
+Page alloc extfrag event                                8345457     7574471     7020419
+Extfrag fragmenting                                     8343546     7573777     7019718
+Extfrag fragmenting for unmovable                         10256       18535       30716
+Extfrag fragmenting unmovable placed with movable          6893       11726       22181
+Extfrag fragmenting for reclaimable                         465        1208        1023
+Extfrag fragmenting reclaimable placed with movable         353         996         843
+Extfrag fragmenting for movable                         8332825     7554034     6987979
+
+Patch 2:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                    7-thp-1         7-thp-2         7-thp-3
+Page alloc extfrag event                                3512847     3020756     2891625
+Extfrag fragmenting                                     3511940     3020185     2891059
+Extfrag fragmenting for unmovable                          9017        6892        6191
+Extfrag fragmenting unmovable placed with movable          1524        3053        2435
+Extfrag fragmenting for reclaimable                         445        1081        1160
+Extfrag fragmenting reclaimable placed with movable         375         918         986
+Extfrag fragmenting for movable                         3502478     3012212     2883708
+
+Patch 3:
+                                                   3.19-rc4        3.19-rc4        3.19-rc4
+                                                    8-thp-1         8-thp-2         8-thp-3
+Page alloc extfrag event                                3181699     3082881     2674164
+Extfrag fragmenting                                     3180812     3082303     2673611
+Extfrag fragmenting for unmovable                          1201        4031        4040
+Extfrag fragmenting unmovable placed with movable           974        3611        3645
+Extfrag fragmenting for reclaimable                         478        1165        1294
+Extfrag fragmenting reclaimable placed with movable         387         985        1030
+Extfrag fragmenting for movable                         3179133     3077107     2668277
+
+The improvements for first iteration are clear, the rest is much noisier
+and can appear like regression for Patch 1.  Anyway, patch 2 rectifies it.
+
+Allocation success rates are again unaffected so there's no point in
+making this e-mail any longer.
+
+[1] http://marc.info/?l=linux-mm&m=142166196321125&w=2
+
+This patch (of 3):
+
+When __rmqueue_fallback() is called to allocate a page of order X, it will
+find a page of order Y >= X of a fallback migratetype, which is different
+from the desired migratetype.  With the help of try_to_steal_freepages(),
+it may change the migratetype (to the desired one) also of:
+
+1) all currently free pages in the pageblock containing the fallback page
+2) the fallback pageblock itself
+3) buddy pages created by splitting the fallback page (when Y > X)
+
+These decisions take the order Y into account, as well as the desired
+migratetype, with the goal of preventing multiple fallback allocations
+that could e.g.  distribute UNMOVABLE allocations among multiple
+pageblocks.
+
+Originally, decision for 1) has implied the decision for 3).  Commit
+47118af076f6 ("mm: mmzone: MIGRATE_CMA migration type added") changed that
+(probably unintentionally) so that the buddy pages in case 3) are always
+changed to the desired migratetype, except for CMA pageblocks.
+
+Commit fef903efcf0c ("mm/page_allo.c: restructure free-page stealing code
+and fix a bug") did some refactoring and added a comment that the case of
+3) is intended.  Commit 0cbef29a7821 ("mm: __rmqueue_fallback() should
+respect pageblock type") removed the comment and tried to restore the
+original behavior where 1) implies 3), but due to the previous
+refactoring, the result is instead that only 2) implies 3) - and the
+conditions for 2) are less frequently met than conditions for 1).  This
+may increase fragmentation in situations where the code decides to steal
+all free pages from the pageblock (case 1)), but then gives back the buddy
+pages produced by splitting.
+
+This patch restores the original intended logic where 1) implies 3).
+During testing with stress-highalloc from mmtests, this has shown to
+decrease the number of events where UNMOVABLE and RECLAIMABLE allocations
+steal from MOVABLE pageblocks, which can lead to permanent fragmentation.
+In some cases it has increased the number of events when MOVABLE
+allocations steal from UNMOVABLE or RECLAIMABLE pageblocks, but these are
+fixable by sync compaction and thus less harmful.
+
+Note that evaluation has shown that the behavior introduced by
+47118af076f6 for buddy pages in case 3) is actually even better than the
+original logic, so the following patch will introduce it properly once
+again.  For stable backports of this patch it makes thus sense to only fix
+versions containing 0cbef29a7821.
+
+[iamjoonsoo.kim@lge.com: tracepoint fix]
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/kmem.h |    7 ++++---
+ mm/page_alloc.c             |   12 +++++-------
+ 2 files changed, 9 insertions(+), 10 deletions(-)
+
+--- a/include/trace/events/kmem.h
++++ b/include/trace/events/kmem.h
+@@ -268,11 +268,11 @@ TRACE_EVENT(mm_page_alloc_extfrag,
+       TP_PROTO(struct page *page,
+               int alloc_order, int fallback_order,
+-              int alloc_migratetype, int fallback_migratetype, int new_migratetype),
++              int alloc_migratetype, int fallback_migratetype),
+       TP_ARGS(page,
+               alloc_order, fallback_order,
+-              alloc_migratetype, fallback_migratetype, new_migratetype),
++              alloc_migratetype, fallback_migratetype),
+       TP_STRUCT__entry(
+               __field(        struct page *,  page                    )
+@@ -289,7 +289,8 @@ TRACE_EVENT(mm_page_alloc_extfrag,
+               __entry->fallback_order         = fallback_order;
+               __entry->alloc_migratetype      = alloc_migratetype;
+               __entry->fallback_migratetype   = fallback_migratetype;
+-              __entry->change_ownership       = (new_migratetype == alloc_migratetype);
++              __entry->change_ownership       = (alloc_migratetype ==
++                                      get_pageblock_migratetype(page));
+       ),
+       TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1081,8 +1081,8 @@ static void change_pageblock_range(struc
+  * nor move CMA pages to different free lists. We don't want unmovable pages
+  * to be allocated from MIGRATE_CMA areas.
+  *
+- * Returns the new migratetype of the pageblock (or the same old migratetype
+- * if it was unchanged).
++ * Returns the allocation migratetype if free pages were stolen, or the
++ * fallback migratetype if it was decided not to steal.
+  */
+ static int try_to_steal_freepages(struct zone *zone, struct page *page,
+                                 int start_type, int fallback_type)
+@@ -1113,12 +1113,10 @@ static int try_to_steal_freepages(struct
+               /* Claim the whole block if over half of it is free */
+               if (pages >= (1 << (pageblock_order-1)) ||
+-                              page_group_by_mobility_disabled) {
+-
++                              page_group_by_mobility_disabled)
+                       set_pageblock_migratetype(page, start_type);
+-                      return start_type;
+-              }
++              return start_type;
+       }
+       return fallback_type;
+@@ -1170,7 +1168,7 @@ __rmqueue_fallback(struct zone *zone, un
+                       set_freepage_migratetype(page, new_type);
+                       trace_mm_page_alloc_extfrag(page, order, current_order,
+-                              start_migratetype, migratetype, new_type);
++                              start_migratetype, migratetype);
+                       return page;
+               }
index 205b28b2fdd9bc2722bbce522dc13c3d2e0353f8..59194657639e834f83394abef05603a63be19b07 100644 (file)
@@ -15,3 +15,10 @@ usb-plusb-add-support-for-national-instruments-host-to-host-cable.patch
 udp-only-allow-ufo-for-packets-from-sock_dgram-sockets.patch
 net-ping-return-eafnosupport-when-appropriate.patch
 team-don-t-traverse-port-list-using-rcu-in-team_set_mac_address.patch
+mm-hugetlb-add-migration-hwpoisoned-entry-check-in-hugetlb_change_protection.patch
+mm-hugetlb-add-migration-entry-check-in-__unmap_hugepage_range.patch
+mm-when-stealing-freepages-also-take-pages-created-by-splitting-buddy-page.patch
+mm-mmap.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
+mm-nommu.c-fix-arithmetic-overflow-in-__vm_enough_memory.patch
+mm-compaction-fix-wrong-order-check-in-compact_finished.patch
+mm-memory.c-actually-remap-enough-memory.patch