--- /dev/null
+From 70feee0e1ef331b22cc51f383d532a0d043fbdcc Mon Sep 17 00:00:00 2001
+From: Yisheng Xie <xieyisheng1@huawei.com>
+Date: Fri, 2 Jun 2017 14:46:43 -0700
+Subject: mlock: fix mlock count can not decrease in race condition
+
+From: Yisheng Xie <xieyisheng1@huawei.com>
+
+commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc upstream.
+
+Kefeng reported that when running the follow test, the mlock count in
+meminfo will increase permanently:
+
+ [1] testcase
+ linux:~ # cat test_mlockal
+ grep Mlocked /proc/meminfo
+ for j in `seq 0 10`
+ do
+ for i in `seq 4 15`
+ do
+ ./p_mlockall >> log &
+ done
+ sleep 0.2
+ done
+ # wait some time to let mlock counter decrease and 5s may not enough
+ sleep 5
+ grep Mlocked /proc/meminfo
+
+ linux:~ # cat p_mlockall.c
+ #include <sys/mman.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+
+ #define SPACE_LEN 4096
+
+ int main(int argc, char ** argv)
+ {
+ int ret;
+ void *adr = malloc(SPACE_LEN);
+ if (!adr)
+ return -1;
+
+ ret = mlockall(MCL_CURRENT | MCL_FUTURE);
+ printf("mlcokall ret = %d\n", ret);
+
+ ret = munlockall();
+ printf("munlcokall ret = %d\n", ret);
+
+ free(adr);
+ return 0;
+ }
+
+In __munlock_pagevec() we should decrement NR_MLOCK for each page where
+we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch
+NR_MLOCK zone state updates") has introduced a bug where we don't
+decrement NR_MLOCK for pages where we clear the flag, but fail to
+isolate them from the lru list (e.g. when the pages are on some other
+cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK
+accounting gets permanently disrupted by this.
+
+Fix it by counting the number of page whose PageMlock flag is cleared.
+
+Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates")
+Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com
+Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
+Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Joern Engel <joern@logfs.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Xishi Qiu <qiuxishi@huawei.com>
+Cc: zhongjiang <zhongjiang@huawei.com>
+Cc: Hanjun Guo <guohanjun@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -277,7 +277,7 @@ static void __munlock_pagevec(struct pag
+ {
+ int i;
+ int nr = pagevec_count(pvec);
+- int delta_munlocked;
++ int delta_munlocked = -nr;
+ struct pagevec pvec_putback;
+ int pgrescued = 0;
+
+@@ -297,6 +297,8 @@ static void __munlock_pagevec(struct pag
+ continue;
+ else
+ __munlock_isolation_failed(page);
++ } else {
++ delta_munlocked++;
+ }
+
+ /*
+@@ -308,7 +310,6 @@ static void __munlock_pagevec(struct pag
+ pagevec_add(&pvec_putback, pvec->pages[i]);
+ pvec->pages[i] = NULL;
+ }
+- delta_munlocked = -nr + pagevec_count(&pvec_putback);
+ __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
+ spin_unlock_irq(&zone->lru_lock);
+
--- /dev/null
+From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Fri, 2 Jun 2017 14:46:49 -0700
+Subject: mm: consider memblock reservations for deferred memory initialization sizing
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 upstream.
+
+We have seen an early OOM killer invocation on ppc64 systems with
+crashkernel=4096M:
+
+ kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0
+ kthreadd cpuset=/ mems_allowed=7
+ CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1
+ Call Trace:
+ dump_stack+0xb0/0xf0 (unreliable)
+ dump_header+0xb0/0x258
+ out_of_memory+0x5f0/0x640
+ __alloc_pages_nodemask+0xa8c/0xc80
+ kmem_getpages+0x84/0x1a0
+ fallback_alloc+0x2a4/0x320
+ kmem_cache_alloc_node+0xc0/0x2e0
+ copy_process.isra.25+0x260/0x1b30
+ _do_fork+0x94/0x470
+ kernel_thread+0x48/0x60
+ kthreadd+0x264/0x330
+ ret_from_kernel_thread+0x5c/0xa4
+
+ Mem-Info:
+ active_anon:0 inactive_anon:0 isolated_anon:0
+ active_file:0 inactive_file:0 isolated_file:0
+ unevictable:0 dirty:0 writeback:0 unstable:0
+ slab_reclaimable:5 slab_unreclaimable:73
+ mapped:0 shmem:0 pagetables:0 bounce:0
+ free:0 free_pcp:0 free_cma:0
+ Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
+ lowmem_reserve[]: 0 0 0 0
+ Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
+ 0 total pagecache pages
+ 0 pages in swap cache
+ Swap cache stats: add 0, delete 0, find 0/0
+ Free swap = 0kB
+ Total swap = 0kB
+ 819200 pages RAM
+ 0 pages HighMem/MovableOnly
+ 817481 pages reserved
+ 0 pages cma reserved
+ 0 pages hwpoisoned
+
+the reason is that the managed memory is too low (only 110MB) while the
+rest of the the 50GB is still waiting for the deferred intialization to
+be done. update_defer_init estimates the initial memoty to initialize
+to 2GB at least but it doesn't consider any memory allocated in that
+range. In this particular case we've had
+
+ Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB)
+
+so the low 2GB is mostly depleted.
+
+Fix this by considering memblock allocations in the initial static
+initialization estimation. Move the max_initialise to
+reset_deferred_meminit and implement a simple memblock_reserved_memory
+helper which iterates all reserved blocks and sums the size of all that
+start below the given address. The cumulative size is than added on top
+of the initial estimation. This is still not ideal because
+reset_deferred_meminit doesn't consider holes and so reservation might
+be above the initial estimation whihch we ignore but let's make the
+logic simpler until we really need to handle more complicated cases.
+
+Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
+Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memblock.h | 8 ++++++++
+ include/linux/mmzone.h | 1 +
+ mm/memblock.c | 23 +++++++++++++++++++++++
+ mm/page_alloc.c | 24 ++++++++++++++++++++++--
+ 4 files changed, 54 insertions(+), 2 deletions(-)
+
+--- a/include/linux/memblock.h
++++ b/include/linux/memblock.h
+@@ -408,11 +408,19 @@ static inline void early_memtest(phys_ad
+ }
+ #endif
+
++extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
++ phys_addr_t end_addr);
+ #else
+ static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
+ {
+ return 0;
+ }
++
++static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
++ phys_addr_t end_addr)
++{
++ return 0;
++}
+
+ #endif /* CONFIG_HAVE_MEMBLOCK */
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -688,6 +688,7 @@ typedef struct pglist_data {
+ * is the first PFN that needs to be initialised.
+ */
+ unsigned long first_deferred_pfn;
++ unsigned long static_init_size;
+ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+ } pg_data_t;
+
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -1634,6 +1634,29 @@ static void __init_memblock memblock_dum
+ }
+ }
+
++extern unsigned long __init_memblock
++memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
++{
++ struct memblock_region *rgn;
++ unsigned long size = 0;
++ int idx;
++
++ for_each_memblock_type((&memblock.reserved), rgn) {
++ phys_addr_t start, end;
++
++ if (rgn->base + rgn->size < start_addr)
++ continue;
++ if (rgn->base > end_addr)
++ continue;
++
++ start = rgn->base;
++ end = start + rgn->size;
++ size += end - start;
++ }
++
++ return size;
++}
++
+ void __init_memblock __memblock_dump_all(void)
+ {
+ pr_info("MEMBLOCK configuration:\n");
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -269,6 +269,26 @@ int page_group_by_mobility_disabled __re
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ static inline void reset_deferred_meminit(pg_data_t *pgdat)
+ {
++ unsigned long max_initialise;
++ unsigned long reserved_lowmem;
++
++ /*
++ * Initialise at least 2G of a node but also take into account that
++ * two large system hashes that can take up 1GB for 0.25TB/node.
++ */
++ max_initialise = max(2UL << (30 - PAGE_SHIFT),
++ (pgdat->node_spanned_pages >> 8));
++
++ /*
++ * Compensate the all the memblock reservations (e.g. crash kernel)
++ * from the initial estimation to make sure we will initialize enough
++ * memory to boot.
++ */
++ reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
++ pgdat->node_start_pfn + max_initialise);
++ max_initialise += reserved_lowmem;
++
++ pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
+ pgdat->first_deferred_pfn = ULONG_MAX;
+ }
+
+@@ -305,7 +325,7 @@ static inline bool update_defer_init(pg_
+
+ /* Initialise at least 2G of the highest zone */
+ (*nr_initialised)++;
+- if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
++ if ((*nr_initialised > pgdat->static_init_size) &&
+ (pfn & (PAGES_PER_SECTION - 1)) == 0) {
+ pgdat->first_deferred_pfn = pfn;
+ return false;
+@@ -5343,7 +5363,6 @@ void __paginginit free_area_init_node(in
+ /* pg_data_t should be reset to zero when it's allocated */
+ WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
+
+- reset_deferred_meminit(pgdat);
+ pgdat->node_id = nid;
+ pgdat->node_start_pfn = node_start_pfn;
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+@@ -5362,6 +5381,7 @@ void __paginginit free_area_init_node(in
+ (unsigned long)pgdat->node_mem_map);
+ #endif
+
++ reset_deferred_meminit(pgdat);
+ free_area_init_core(pgdat);
+ }
+
--- /dev/null
+From 30809f559a0d348c2dfd7ab05e9a451e2384962e Mon Sep 17 00:00:00 2001
+From: Punit Agrawal <punit.agrawal@arm.com>
+Date: Fri, 2 Jun 2017 14:46:40 -0700
+Subject: mm/migrate: fix refcount handling when !hugepage_migration_supported()
+
+From: Punit Agrawal <punit.agrawal@arm.com>
+
+commit 30809f559a0d348c2dfd7ab05e9a451e2384962e upstream.
+
+On failing to migrate a page, soft_offline_huge_page() performs the
+necessary update to the hugepage ref-count.
+
+But when !hugepage_migration_supported() , unmap_and_move_hugepage()
+also decrements the page ref-count for the hugepage. The combined
+behaviour leaves the ref-count in an inconsistent state.
+
+This leads to soft lockups when running the overcommitted hugepage test
+from mce-tests suite.
+
+ Soft offlining pfn 0x83ed600 at process virtual address 0x400000000000
+ soft offline: 0x83ed600: migration failed 1, type 1fffc00000008008 (uptodate|head)
+ INFO: rcu_preempt detected stalls on CPUs/tasks:
+ Tasks blocked on level-0 rcu_node (CPUs 0-7): P2715
+ (detected by 7, t=5254 jiffies, g=963, c=962, q=321)
+ thugetlb_overco R running task 0 2715 2685 0x00000008
+ Call trace:
+ dump_backtrace+0x0/0x268
+ show_stack+0x24/0x30
+ sched_show_task+0x134/0x180
+ rcu_print_detail_task_stall_rnp+0x54/0x7c
+ rcu_check_callbacks+0xa74/0xb08
+ update_process_times+0x34/0x60
+ tick_sched_handle.isra.7+0x38/0x70
+ tick_sched_timer+0x4c/0x98
+ __hrtimer_run_queues+0xc0/0x300
+ hrtimer_interrupt+0xac/0x228
+ arch_timer_handler_phys+0x3c/0x50
+ handle_percpu_devid_irq+0x8c/0x290
+ generic_handle_irq+0x34/0x50
+ __handle_domain_irq+0x68/0xc0
+ gic_handle_irq+0x5c/0xb0
+
+Address this by changing the putback_active_hugepage() in
+soft_offline_huge_page() to putback_movable_pages().
+
+This only triggers on systems that enable memory failure handling
+(ARCH_SUPPORTS_MEMORY_FAILURE) but not hugepage migration
+(!ARCH_ENABLE_HUGEPAGE_MIGRATION).
+
+I imagine this wasn't triggered as there aren't many systems running
+this configuration.
+
+[akpm@linux-foundation.org: remove dead comment, per Naoya]
+Link: http://lkml.kernel.org/r/20170525135146.32011-1-punit.agrawal@arm.com
+Reported-by: Manoj Iyer <manoj.iyer@canonical.com>
+Tested-by: Manoj Iyer <manoj.iyer@canonical.com>
+Suggested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Wanpeng Li <wanpeng.li@hotmail.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1619,12 +1619,8 @@ static int soft_offline_huge_page(struct
+ if (ret) {
+ pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+ pfn, ret, page->flags);
+- /*
+- * We know that soft_offline_huge_page() tries to migrate
+- * only one hugepage pointed to by hpage, so we need not
+- * run through the pagelist here.
+- */
+- putback_active_hugepage(hpage);
++ if (!list_empty(&pagelist))
++ putback_movable_pages(&pagelist);
+ if (ret > 0)
+ ret = -EIO;
+ } else {
--- /dev/null
+From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 23 May 2017 14:18:17 -0500
+Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 4d071c3238987325b9e50e33051a40d1cce311cc upstream.
+
+Some drivers - like i915 - may not support the system suspend direct
+complete optimization due to differences in their runtime and system
+suspend sequence. Add a flag that when set resumes the device before
+calling the driver's system suspend handlers which effectively disables
+the optimization.
+
+Needed by a future patch fixing suspend/resume on i915.
+
+Suggested by Rafael.
+
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci.c | 3 ++-
+ include/linux/pci.h | 5 +++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -2079,7 +2079,8 @@ bool pci_dev_keep_suspended(struct pci_d
+
+ if (!pm_runtime_suspended(dev)
+ || pci_target_state(pci_dev) != pci_dev->current_state
+- || platform_pci_need_resume(pci_dev))
++ || platform_pci_need_resume(pci_dev)
++ || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
+ return false;
+
+ /*
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -182,6 +182,11 @@ enum pci_dev_flags {
+ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
+ /* Get VPD from function 0 VPD */
+ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
++ /*
++ * Resume before calling the driver's system suspend hooks, disabling
++ * the direct_complete optimization.
++ */
++ PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+ };
+
+ enum pci_irq_reroute_variant {
alsa-hda-apply-stac_9200_dell_m22-quirk-for-dell-latitude-d430.patch
slub-memcg-cure-the-brainless-abuse-of-sysfs-attributes.patch
drm-gma500-psb-actually-use-vbt-mode-when-it-is-found.patch
+mm-migrate-fix-refcount-handling-when-hugepage_migration_supported.patch
+mlock-fix-mlock-count-can-not-decrease-in-race-condition.patch
+pci-pm-add-needs_resume-flag-to-avoid-suspend-complete-optimization.patch
+mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch