--- /dev/null
+From f231fe4235e22e18d847e05cbe705deaca56580a Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 18 Oct 2019 20:20:05 -0700
+Subject: hugetlbfs: don't access uninitialized memmaps in pfn_range_valid_gigantic()
+
+From: David Hildenbrand <david@redhat.com>
+
+commit f231fe4235e22e18d847e05cbe705deaca56580a upstream.
+
+Uninitialized memmaps contain garbage and in the worst case trigger
+kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get
+touched.
+
+Let's make sure that we only consider online memory (managed by the
+buddy) that has initialized memmaps. ZONE_DEVICE is not applicable.
+
+page_zone() will call page_to_nid(), which will trigger
+VM_BUG_ON_PGFLAGS(PagePoisoned(page), page) with CONFIG_PAGE_POISONING
+and CONFIG_DEBUG_VM_PGFLAGS when called on uninitialized memmaps. This
+can be the case when an offline memory block (e.g., never onlined) is
+spanned by a zone.
+
+Note: As explained by Michal in [1], alloc_contig_range() will verify
+the range. So it boils down to the wrong access in this function.
+
+[1] http://lkml.kernel.org/r/20180423000943.GO17484@dhcp22.suse.cz
+
+Link: http://lkml.kernel.org/r/20191015120717.4858-1-david@redhat.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319]
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: Michal Hocko <mhocko@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: <stable@vger.kernel.org> [4.13+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1084,11 +1084,10 @@ static bool pfn_range_valid_gigantic(str
+ struct page *page;
+
+ for (i = start_pfn; i < end_pfn; i++) {
+- if (!pfn_valid(i))
++ page = pfn_to_online_page(i);
++ if (!page)
+ return false;
+
+- page = pfn_to_page(i);
+-
+ if (page_zone(page) != z)
+ return false;
+
--- /dev/null
+From a2e9a5afce080226edbf1882d63d99bf32070e9e Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 14 Oct 2019 14:12:07 -0700
+Subject: mm, compaction: fix wrong pfn handling in __reset_isolation_pfn()
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit a2e9a5afce080226edbf1882d63d99bf32070e9e upstream.
+
+Florian and Dave reported [1] a NULL pointer dereference in
+__reset_isolation_pfn(). While the exact cause is unclear, staring at
+the code revealed two bugs, which might be related.
+
+One bug is that if zone starts in the middle of pageblock, block_page
+might correspond to different pfn than block_pfn, and then the
+pfn_valid_within() checks will check different pfn's than those accessed
+via struct page. This might result in acessing an unitialized page in
+CONFIG_HOLES_IN_ZONE configs.
+
+The other bug is that end_page refers to the first page of next
+pageblock and not last page of current pageblock. The online and valid
+check is then wrong and with sections, the while (page < end_page) loop
+might wander off actual struct page arrays.
+
+[1] https://lore.kernel.org/linux-xfs/87o8z1fvqu.fsf@mid.deneb.enyo.de/
+
+Link: http://lkml.kernel.org/r/20191008152915.24704-1-vbabka@suse.cz
+Fixes: 6b0868c820ff ("mm/compaction.c: correct zone boundary handling when resetting pageblock skip hints")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Florian Weimer <fw@deneb.enyo.de>
+Reported-by: Dave Chinner <david@fromorbit.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone,
+
+ /* Ensure the start of the pageblock or zone is online and valid */
+ block_pfn = pageblock_start_pfn(pfn);
+- block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
++ block_pfn = max(block_pfn, zone->zone_start_pfn);
++ block_page = pfn_to_online_page(block_pfn);
+ if (block_page) {
+ page = block_page;
+ pfn = block_pfn;
+ }
+
+ /* Ensure the end of the pageblock or zone is online and valid */
+- block_pfn += pageblock_nr_pages;
++ block_pfn = pageblock_end_pfn(pfn) - 1;
+ block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
+ end_page = pfn_to_online_page(block_pfn);
+ if (!end_page)
+@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone,
+
+ page += (1 << PAGE_ALLOC_COSTLY_ORDER);
+ pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
+- } while (page < end_page);
++ } while (page <= end_page);
+
+ return false;
+ }
--- /dev/null
+From f3057ad767542be7bbac44e548cb44017178a163 Mon Sep 17 00:00:00 2001
+From: Mike Rapoport <rppt@linux.ibm.com>
+Date: Fri, 18 Oct 2019 20:20:01 -0700
+Subject: mm: memblock: do not enforce current limit for memblock_phys* family
+
+From: Mike Rapoport <rppt@linux.ibm.com>
+
+commit f3057ad767542be7bbac44e548cb44017178a163 upstream.
+
+Until commit 92d12f9544b7 ("memblock: refactor internal allocation
+functions") the maximal address for memblock allocations was forced to
+memblock.current_limit only for the allocation functions returning
+virtual address. The changes introduced by that commit moved the limit
+enforcement into the allocation core and as a result the allocation
+functions returning physical address also started to limit allocations
+to memblock.current_limit.
+
+This caused breakage of etnaviv GPU driver:
+
+ etnaviv etnaviv: bound 130000.gpu (ops gpu_ops)
+ etnaviv etnaviv: bound 134000.gpu (ops gpu_ops)
+ etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops)
+ etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108
+ etnaviv-gpu 130000.gpu: command buffer outside valid memory window
+ etnaviv-gpu 134000.gpu: model: GC320, revision: 5007
+ etnaviv-gpu 134000.gpu: command buffer outside valid memory window
+ etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215
+ etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0
+
+Restore the behaviour of memblock_phys* family so that these functions
+will not enforce memblock.current_limit.
+
+Link: http://lkml.kernel.org/r/1570915861-17633-1-git-send-email-rppt@kernel.org
+Fixes: 92d12f9544b7 ("memblock: refactor internal allocation functions")
+Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
+Reported-by: Adam Ford <aford173@gmail.com>
+Tested-by: Adam Ford <aford173@gmail.com> [imx6q-logicpd]
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Fabio Estevam <festevam@gmail.com>
+Cc: Lucas Stach <l.stach@pengutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memblock.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -1356,9 +1356,6 @@ static phys_addr_t __init memblock_alloc
+ align = SMP_CACHE_BYTES;
+ }
+
+- if (end > memblock.current_limit)
+- end = memblock.current_limit;
+-
+ again:
+ found = memblock_find_in_range_node(size, align, start, end, nid,
+ flags);
+@@ -1469,6 +1466,9 @@ static void * __init memblock_alloc_inte
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, nid);
+
++ if (max_addr > memblock.current_limit)
++ max_addr = memblock.current_limit;
++
+ alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
+
+ /* retry allocation without lower limit */
--- /dev/null
+From b11edebbc967ebf5c55b8f9e1d5bb6d68ec3a7fd Mon Sep 17 00:00:00 2001
+From: Honglei Wang <honglei.wang@oracle.com>
+Date: Fri, 18 Oct 2019 20:19:58 -0700
+Subject: mm: memcg: get number of pages on the LRU list in memcgroup base on lru_zone_size
+
+From: Honglei Wang <honglei.wang@oracle.com>
+
+commit b11edebbc967ebf5c55b8f9e1d5bb6d68ec3a7fd upstream.
+
+Commit 1a61ab8038e72 ("mm: memcontrol: replace zone summing with
+lruvec_page_state()") has made lruvec_page_state to use per-cpu counters
+instead of calculating it directly from lru_zone_size with an idea that
+this would be more effective.
+
+Tim has reported that this is not really the case for their database
+benchmark which is showing an opposite results where lruvec_page_state
+is taking up a huge chunk of CPU cycles (about 25% of the system time
+which is roughly 7% of total cpu cycles) on 5.3 kernels. The workload
+is running on a larger machine (96cpus), it has many cgroups (500) and
+it is heavily direct reclaim bound.
+
+Tim Chen said:
+
+: The problem can also be reproduced by running simple multi-threaded
+: pmbench benchmark with a fast Optane SSD swap (see profile below).
+:
+:
+: 6.15% 3.08% pmbench [kernel.vmlinux] [k] lruvec_lru_size
+: |
+: |--3.07%--lruvec_lru_size
+: | |
+: | |--2.11%--cpumask_next
+: | | |
+: | | --1.66%--find_next_bit
+: | |
+: | --0.57%--call_function_interrupt
+: | |
+: | --0.55%--smp_call_function_interrupt
+: |
+: |--1.59%--0x441f0fc3d009
+: | _ops_rdtsc_init_base_freq
+: | access_histogram
+: | page_fault
+: | __do_page_fault
+: | handle_mm_fault
+: | __handle_mm_fault
+: | |
+: | --1.54%--do_swap_page
+: | swapin_readahead
+: | swap_cluster_readahead
+: | |
+: | --1.53%--read_swap_cache_async
+: | __read_swap_cache_async
+: | alloc_pages_vma
+: | __alloc_pages_nodemask
+: | __alloc_pages_slowpath
+: | try_to_free_pages
+: | do_try_to_free_pages
+: | shrink_node
+: | shrink_node_memcg
+: | |
+: | |--0.77%--lruvec_lru_size
+: | |
+: | --0.76%--inactive_list_is_low
+: | |
+: | --0.76%--lruvec_lru_size
+: |
+: --1.50%--measure_read
+: page_fault
+: __do_page_fault
+: handle_mm_fault
+: __handle_mm_fault
+: do_swap_page
+: swapin_readahead
+: swap_cluster_readahead
+: |
+: --1.48%--read_swap_cache_async
+: __read_swap_cache_async
+: alloc_pages_vma
+: __alloc_pages_nodemask
+: __alloc_pages_slowpath
+: try_to_free_pages
+: do_try_to_free_pages
+: shrink_node
+: shrink_node_memcg
+: |
+: |--0.75%--inactive_list_is_low
+: | |
+: | --0.75%--lruvec_lru_size
+: |
+: --0.73%--lruvec_lru_size
+
+The likely culprit is the cache traffic the lruvec_page_state_local
+generates. Dave Hansen says:
+
+: I was thinking purely of the cache footprint. If it's reading
+: pn->lruvec_stat_local->count[idx] is three separate cachelines, so 192
+: bytes of cache *96 CPUs = 18k of data, mostly read-only. 1 cgroup would
+: be 18k of data for the whole system and the caching would be pretty
+: efficient and all 18k would probably survive a tight page fault loop in
+: the L1. 500 cgroups would be ~90k of data per CPU thread which doesn't
+: fit in the L1 and probably wouldn't survive a tight page fault loop if
+: both logical threads were banging on different cgroups.
+:
+: It's just a theory, but it's why I noted the number of cgroups when I
+: initially saw this show up in profiles
+
+Fix the regression by partially reverting the said commit and calculate
+the lru size explicitly.
+
+Link: http://lkml.kernel.org/r/20190905071034.16822-1-honglei.wang@oracle.com
+Fixes: 1a61ab8038e72 ("mm: memcontrol: replace zone summing with lruvec_page_state()")
+Signed-off-by: Honglei Wang <honglei.wang@oracle.com>
+Reported-by: Tim Chen <tim.c.chen@linux.intel.com>
+Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
+Tested-by: Tim Chen <tim.c.chen@linux.intel.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: <stable@vger.kernel.org> [5.2+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -354,12 +354,13 @@ unsigned long zone_reclaimable_pages(str
+ */
+ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
+ {
+- unsigned long lru_size;
++ unsigned long lru_size = 0;
+ int zid;
+
+- if (!mem_cgroup_disabled())
+- lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
+- else
++ if (!mem_cgroup_disabled()) {
++ for (zid = 0; zid < MAX_NR_ZONES; zid++)
++ lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
++ } else
+ lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
+
+ for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
--- /dev/null
+From b749ecfaf6c53ce79d6ab66afd2fc34189a073b1 Mon Sep 17 00:00:00 2001
+From: Roman Gushchin <guro@fb.com>
+Date: Fri, 18 Oct 2019 20:19:44 -0700
+Subject: mm: memcg/slab: fix panic in __free_slab() caused by premature memcg pointer release
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Roman Gushchin <guro@fb.com>
+
+commit b749ecfaf6c53ce79d6ab66afd2fc34189a073b1 upstream.
+
+Karsten reported the following panic in __free_slab() happening on a s390x
+machine:
+
+ Unable to handle kernel pointer dereference in virtual kernel address space
+ Failing address: 0000000000000000 TEID: 0000000000000483
+ Fault in home space mode while using kernel ASCE.
+ AS:00000000017d4007 R3:000000007fbd0007 S:000000007fbff000 P:000000000000003d
+ Oops: 0004 ilc:3 Ý#1¨ PREEMPT SMP
+ Modules linked in: tcp_diag inet_diag xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_at nf_nat
+ CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-05872-g6133e3e4bada-dirty #14
+ Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0)
+ Krnl PSW : 0704d00180000000 00000000003cadb6 (__free_slab+0x686/0x6b0)
+ R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3
+ Krnl GPRS: 00000000f3a32928 0000000000000000 000000007fbf5d00 000000000117c4b8
+ 0000000000000000 000000009e3291c1 0000000000000000 0000000000000000
+ 0000000000000003 0000000000000008 000000002b478b00 000003d080a97600
+ 0000000000000003 0000000000000008 000000002b478b00 000003d080a97600
+ 000000000117ba00 000003e000057db0 00000000003cabcc 000003e000057c78
+ Krnl Code: 00000000003cada6: e310a1400004 lg %r1,320(%r10)
+ 00000000003cadac: c0e50046c286 brasl %r14,ca32b8
+ #00000000003cadb2: a7f4fe36 brc 15,3caa1e
+ >00000000003cadb6: e32060800024 stg %r2,128(%r6)
+ 00000000003cadbc: a7f4fd9e brc 15,3ca8f8
+ 00000000003cadc0: c0e50046790c brasl %r14,c99fd8
+ 00000000003cadc6: a7f4fe2c brc 15,3caa
+ 00000000003cadc6: a7f4fe2c brc 15,3caa1e
+ 00000000003cadca: ecb1ffff00d9 aghik %r11,%r1,-1
+ Call Trace:
+ (<00000000003cabcc> __free_slab+0x49c/0x6b0)
+ <00000000001f5886> rcu_core+0x5a6/0x7e0
+ <0000000000ca2dea> __do_softirq+0xf2/0x5c0
+ <0000000000152644> irq_exit+0x104/0x130
+ <000000000010d222> do_IRQ+0x9a/0xf0
+ <0000000000ca2344> ext_int_handler+0x130/0x134
+ <0000000000103648> enabled_wait+0x58/0x128
+ (<0000000000103634> enabled_wait+0x44/0x128)
+ <0000000000103b00> arch_cpu_idle+0x40/0x58
+ <0000000000ca0544> default_idle_call+0x3c/0x68
+ <000000000018eaa4> do_idle+0xec/0x1c0
+ <000000000018ee0e> cpu_startup_entry+0x36/0x40
+ <000000000122df34> arch_call_rest_init+0x5c/0x88
+ <0000000000000000> 0x0
+ INFO: lockdep is turned off.
+ Last Breaking-Event-Address:
+ <00000000003ca8f4> __free_slab+0x1c4/0x6b0
+ Kernel panic - not syncing: Fatal exception in interrupt
+
+The kernel panics on an attempt to dereference the NULL memcg pointer.
+When shutdown_cache() is called from the kmem_cache_destroy() context, a
+memcg kmem_cache might have empty slab pages in a partial list, which are
+still charged to the memory cgroup.
+
+These pages are released by free_partial() at the beginning of
+shutdown_cache(): either directly or by scheduling a RCU-delayed work
+(if the kmem_cache has the SLAB_TYPESAFE_BY_RCU flag). The latter case
+is when the reported panic can happen: memcg_unlink_cache() is called
+immediately after shrinking partial lists, without waiting for scheduled
+RCU works. It sets the kmem_cache->memcg_params.memcg pointer to NULL,
+and the following attempt to dereference it by __free_slab() from the
+RCU work context causes the panic.
+
+To fix the issue, let's postpone the release of the memcg pointer to
+destroy_memcg_params(). It's called from a separate work context by
+slab_caches_to_rcu_destroy_workfn(), which contains a full RCU barrier.
+This guarantees that all scheduled page release RCU works will complete
+before the memcg pointer will be zeroed.
+
+Big thanks for Karsten for the perfect report containing all necessary
+information, his help with the analysis of the problem and testing of the
+fix.
+
+Link: http://lkml.kernel.org/r/20191010160549.1584316-1-guro@fb.com
+Fixes: fb2f2b0adb98 ("mm: memcg/slab: reparent memcg kmem_caches on cgroup removal")
+Signed-off-by: Roman Gushchin <guro@fb.com>
+Reported-by: Karsten Graul <kgraul@linux.ibm.com>
+Tested-by: Karsten Graul <kgraul@linux.ibm.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Cc: Karsten Graul <kgraul@linux.ibm.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slab_common.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -178,10 +178,13 @@ static int init_memcg_params(struct kmem
+
+ static void destroy_memcg_params(struct kmem_cache *s)
+ {
+- if (is_root_cache(s))
++ if (is_root_cache(s)) {
+ kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+- else
++ } else {
++ mem_cgroup_put(s->memcg_params.memcg);
++ WRITE_ONCE(s->memcg_params.memcg, NULL);
+ percpu_ref_exit(&s->memcg_params.refcnt);
++ }
+ }
+
+ static void free_memcg_params(struct rcu_head *rcu)
+@@ -253,8 +256,6 @@ static void memcg_unlink_cache(struct km
+ } else {
+ list_del(&s->memcg_params.children_node);
+ list_del(&s->memcg_params.kmem_caches_node);
+- mem_cgroup_put(s->memcg_params.memcg);
+- WRITE_ONCE(s->memcg_params.memcg, NULL);
+ }
+ }
+ #else
--- /dev/null
+From 3d7fed4ad8ccb691d217efbb0f934e6a4df5ef91 Mon Sep 17 00:00:00 2001
+From: Jane Chu <jane.chu@oracle.com>
+Date: Mon, 14 Oct 2019 14:12:29 -0700
+Subject: mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once
+
+From: Jane Chu <jane.chu@oracle.com>
+
+commit 3d7fed4ad8ccb691d217efbb0f934e6a4df5ef91 upstream.
+
+Mmap /dev/dax more than once, then read the poison location using
+address from one of the mappings. The other mappings due to not having
+the page mapped in will cause SIGKILLs delivered to the process.
+SIGKILL succeeds over SIGBUS, so user process loses the opportunity to
+handle the UE.
+
+Although one may add MAP_POPULATE to mmap(2) to work around the issue,
+MAP_POPULATE makes mapping 128GB of pmem several magnitudes slower, so
+isn't always an option.
+
+Details -
+
+ ndctl inject-error --block=10 --count=1 namespace6.0
+
+ ./read_poison -x dax6.0 -o 5120 -m 2
+ mmaped address 0x7f5bb6600000
+ mmaped address 0x7f3cf3600000
+ doing local read at address 0x7f3cf3601400
+ Killed
+
+Console messages in instrumented kernel -
+
+ mce: Uncorrected hardware memory error in user-access at edbe201400
+ Memory failure: tk->addr = 7f5bb6601000
+ Memory failure: address edbe201: call dev_pagemap_mapping_shift
+ dev_pagemap_mapping_shift: page edbe201: no PUD
+ Memory failure: tk->size_shift == 0
+ Memory failure: Unable to find user space address edbe201 in read_poison
+ Memory failure: tk->addr = 7f3cf3601000
+ Memory failure: address edbe201: call dev_pagemap_mapping_shift
+ Memory failure: tk->size_shift = 21
+ Memory failure: 0xedbe201: forcibly killing read_poison:22434 because of failure to unmap corrupted page
+ => to deliver SIGKILL
+ Memory failure: 0xedbe201: Killing read_poison:22434 due to hardware memory corruption
+ => to deliver SIGBUS
+
+Link: http://lkml.kernel.org/r/1565112345-28754-3-git-send-email-jane.chu@oracle.com
+Signed-off-by: Jane Chu <jane.chu@oracle.com>
+Suggested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -199,7 +199,6 @@ struct to_kill {
+ struct task_struct *tsk;
+ unsigned long addr;
+ short size_shift;
+- char addr_valid;
+ };
+
+ /*
+@@ -324,22 +323,27 @@ static void add_to_kill(struct task_stru
+ }
+ }
+ tk->addr = page_address_in_vma(p, vma);
+- tk->addr_valid = 1;
+ if (is_zone_device_page(p))
+ tk->size_shift = dev_pagemap_mapping_shift(p, vma);
+ else
+ tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
+
+ /*
+- * In theory we don't have to kill when the page was
+- * munmaped. But it could be also a mremap. Since that's
+- * likely very rare kill anyways just out of paranoia, but use
+- * a SIGKILL because the error is not contained anymore.
++ * Send SIGKILL if "tk->addr == -EFAULT". Also, as
++ * "tk->size_shift" is always non-zero for !is_zone_device_page(),
++ * so "tk->size_shift == 0" effectively checks no mapping on
++ * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
++ * to a process' address space, it's possible not all N VMAs
++ * contain mappings for the page, but at least one VMA does.
++ * Only deliver SIGBUS with payload derived from the VMA that
++ * has a mapping for the page.
+ */
+- if (tk->addr == -EFAULT || tk->size_shift == 0) {
++ if (tk->addr == -EFAULT) {
+ pr_info("Memory failure: Unable to find user space address %lx in %s\n",
+ page_to_pfn(p), tsk->comm);
+- tk->addr_valid = 0;
++ } else if (tk->size_shift == 0) {
++ kfree(tk);
++ return;
+ }
+ get_task_struct(tsk);
+ tk->tsk = tsk;
+@@ -366,7 +370,7 @@ static void kill_procs(struct list_head
+ * make sure the process doesn't catch the
+ * signal and then access the memory. Just kill it.
+ */
+- if (fail || tk->addr_valid == 0) {
++ if (fail || tk->addr == -EFAULT) {
+ pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
+ do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
--- /dev/null
+From 96c804a6ae8c59a9092b3d5dd581198472063184 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 18 Oct 2019 20:19:23 -0700
+Subject: mm/memory-failure.c: don't access uninitialized memmaps in memory_failure()
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 96c804a6ae8c59a9092b3d5dd581198472063184 upstream.
+
+We should check for pfn_to_online_page() to not access uninitialized
+memmaps. Reshuffle the code so we don't have to duplicate the error
+message.
+
+Link: http://lkml.kernel.org/r/20191009142435.3975-3-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319]
+Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org> [4.13+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1253,17 +1253,19 @@ int memory_failure(unsigned long pfn, in
+ if (!sysctl_memory_failure_recovery)
+ panic("Memory failure on page %lx", pfn);
+
+- if (!pfn_valid(pfn)) {
++ p = pfn_to_online_page(pfn);
++ if (!p) {
++ if (pfn_valid(pfn)) {
++ pgmap = get_dev_pagemap(pfn, NULL);
++ if (pgmap)
++ return memory_failure_dev_pagemap(pfn, flags,
++ pgmap);
++ }
+ pr_err("Memory failure: %#lx: memory outside kernel control\n",
+ pfn);
+ return -ENXIO;
+ }
+
+- pgmap = get_dev_pagemap(pfn, NULL);
+- if (pgmap)
+- return memory_failure_dev_pagemap(pfn, flags, pgmap);
+-
+- p = pfn_to_page(pfn);
+ if (PageHuge(p))
+ return memory_failure_hugetlb(pfn, flags);
+ if (TestSetPageHWPoison(p)) {
--- /dev/null
+From 00d6c019b5bc175cee3770e0e659f2b5f4804ea5 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 18 Oct 2019 20:19:33 -0700
+Subject: mm/memory_hotplug: don't access uninitialized memmaps in shrink_pgdat_span()
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 00d6c019b5bc175cee3770e0e659f2b5f4804ea5 upstream.
+
+We might use the nid of memmaps that were never initialized. For
+example, if the memmap was poisoned, we will crash the kernel in
+pfn_to_nid() right now. Let's use the calculated boundaries of the
+separate zones instead. This now also avoids having to iterate over a
+whole bunch of subsections again, after shrinking one zone.
+
+Before commit d0dc12e86b31 ("mm/memory_hotplug: optimize memory
+hotplug"), the memmap was initialized to 0 and the node was set to the
+right value. After that commit, the node might be garbage.
+
+We'll have to fix shrink_zone_span() next.
+
+Link: http://lkml.kernel.org/r/20191006085646.5768-4-david@redhat.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [d0dc12e86b319]
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Wei Yang <richardw.yang@linux.intel.com>
+Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Damian Tometzki <damian.tometzki@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Halil Pasic <pasic@linux.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Ira Weiny <ira.weiny@intel.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Jun Yao <yaojun8558363@gmail.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Pankaj Gupta <pagupta@redhat.com>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Qian Cai <cai@lca.pw>
+Cc: Rich Felker <dalias@libc.org>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: Steve Capper <steve.capper@arm.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org> [4.13+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory_hotplug.c | 74 +++++++++++-----------------------------------------
+ 1 file changed, 16 insertions(+), 58 deletions(-)
+
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -436,67 +436,25 @@ static void shrink_zone_span(struct zone
+ zone_span_writeunlock(zone);
+ }
+
+-static void shrink_pgdat_span(struct pglist_data *pgdat,
+- unsigned long start_pfn, unsigned long end_pfn)
++static void update_pgdat_span(struct pglist_data *pgdat)
+ {
+- unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
+- unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
+- unsigned long pgdat_end_pfn = p;
+- unsigned long pfn;
+- int nid = pgdat->node_id;
+-
+- if (pgdat_start_pfn == start_pfn) {
+- /*
+- * If the section is smallest section in the pgdat, it need
+- * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
+- * In this case, we find second smallest valid mem_section
+- * for shrinking zone.
+- */
+- pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
+- pgdat_end_pfn);
+- if (pfn) {
+- pgdat->node_start_pfn = pfn;
+- pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
+- }
+- } else if (pgdat_end_pfn == end_pfn) {
+- /*
+- * If the section is biggest section in the pgdat, it need
+- * shrink pgdat->node_spanned_pages.
+- * In this case, we find second biggest valid mem_section for
+- * shrinking zone.
+- */
+- pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
+- start_pfn);
+- if (pfn)
+- pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
+- }
+-
+- /*
+- * If the section is not biggest or smallest mem_section in the pgdat,
+- * it only creates a hole in the pgdat. So in this case, we need not
+- * change the pgdat.
+- * But perhaps, the pgdat has only hole data. Thus it check the pgdat
+- * has only hole or not.
+- */
+- pfn = pgdat_start_pfn;
+- for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) {
+- if (unlikely(!pfn_valid(pfn)))
+- continue;
+-
+- if (pfn_to_nid(pfn) != nid)
+- continue;
+-
+- /* Skip range to be removed */
+- if (pfn >= start_pfn && pfn < end_pfn)
+- continue;
++ unsigned long node_start_pfn = 0, node_end_pfn = 0;
++ struct zone *zone;
+
+- /* If we find valid section, we have nothing to do */
+- return;
++ for (zone = pgdat->node_zones;
++ zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
++ unsigned long zone_end_pfn = zone->zone_start_pfn +
++ zone->spanned_pages;
++
++ /* No need to lock the zones, they can't change. */
++ if (zone_end_pfn > node_end_pfn)
++ node_end_pfn = zone_end_pfn;
++ if (zone->zone_start_pfn < node_start_pfn)
++ node_start_pfn = zone->zone_start_pfn;
+ }
+
+- /* The pgdat has no valid section */
+- pgdat->node_start_pfn = 0;
+- pgdat->node_spanned_pages = 0;
++ pgdat->node_start_pfn = node_start_pfn;
++ pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
+ }
+
+ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
+@@ -507,7 +465,7 @@ static void __remove_zone(struct zone *z
+
+ pgdat_resize_lock(zone->zone_pgdat, &flags);
+ shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
+- shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
++ update_pgdat_span(pgdat);
+ pgdat_resize_unlock(zone->zone_pgdat, &flags);
+ }
+
--- /dev/null
+From 77e080e7680e1e615587352f70c87b9e98126d03 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Fri, 18 Oct 2019 20:19:39 -0700
+Subject: mm/memunmap: don't access uninitialized memmap in memunmap_pages()
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit 77e080e7680e1e615587352f70c87b9e98126d03 upstream.
+
+Patch series "mm/memory_hotplug: Shrink zones before removing memory",
+v6.
+
+This series fixes the access of uninitialized memmaps when shrinking
+zones/nodes and when removing memory. Also, it contains all fixes for
+crashes that can be triggered when removing certain namespace using
+memunmap_pages() - ZONE_DEVICE, reported by Aneesh.
+
+We stop trying to shrink ZONE_DEVICE, as it's buggy, fixing it would be
+more involved (we don't have SECTION_IS_ONLINE as an indicator), and
+shrinking is only of limited use (set_zone_contiguous() cannot detect
+the ZONE_DEVICE as contiguous).
+
+We continue shrinking !ZONE_DEVICE zones, however, I reduced the amount
+of code to a minimum. Shrinking is especially necessary to keep
+zone->contiguous set where possible, especially, on memory unplug of
+DIMMs at zone boundaries.
+
+--------------------------------------------------------------------------
+
+Zones are now properly shrunk when offlining memory blocks or when
+onlining failed. This allows to properly shrink zones on memory unplug
+even if the separate memory blocks of a DIMM were onlined to different
+zones or re-onlined to a different zone after offlining.
+
+Example:
+
+ :/# cat /proc/zoneinfo
+ Node 1, zone Movable
+ spanned 0
+ present 0
+ managed 0
+ :/# echo "online_movable" > /sys/devices/system/memory/memory41/state
+ :/# echo "online_movable" > /sys/devices/system/memory/memory43/state
+ :/# cat /proc/zoneinfo
+ Node 1, zone Movable
+ spanned 98304
+ present 65536
+ managed 65536
+ :/# echo 0 > /sys/devices/system/memory/memory43/online
+ :/# cat /proc/zoneinfo
+ Node 1, zone Movable
+ spanned 32768
+ present 32768
+ managed 32768
+ :/# echo 0 > /sys/devices/system/memory/memory41/online
+ :/# cat /proc/zoneinfo
+ Node 1, zone Movable
+ spanned 0
+ present 0
+ managed 0
+
+This patch (of 10):
+
+With an altmap, the memmap falling into the reserved altmap space are not
+initialized and, therefore, contain a garbage NID and a garbage zone.
+Make sure to read the NID/zone from a memmap that was initialized.
+
+This fixes a kernel crash that is observed when destroying a namespace:
+
+ kernel BUG at include/linux/mm.h:1107!
+ cpu 0x1: Vector: 700 (Program Check) at [c000000274087890]
+ pc: c0000000004b9728: memunmap_pages+0x238/0x340
+ lr: c0000000004b9724: memunmap_pages+0x234/0x340
+ ...
+ pid = 3669, comm = ndctl
+ kernel BUG at include/linux/mm.h:1107!
+ devm_action_release+0x30/0x50
+ release_nodes+0x268/0x2d0
+ device_release_driver_internal+0x174/0x240
+ unbind_store+0x13c/0x190
+ drv_attr_store+0x44/0x60
+ sysfs_kf_write+0x70/0xa0
+ kernfs_fop_write+0x1ac/0x290
+ __vfs_write+0x3c/0x70
+ vfs_write+0xe4/0x200
+ ksys_write+0x7c/0x140
+ system_call+0x5c/0x68
+
+The "page_zone(pfn_to_page(pfn)" was introduced by 69324b8f4833 ("mm,
+devm_memremap_pages: add MEMORY_DEVICE_PRIVATE support"), however, I
+think we will never have driver reserved memory with
+MEMORY_DEVICE_PRIVATE (no altmap AFAIKS).
+
+[david@redhat.com: minimze code changes, rephrase description]
+Link: http://lkml.kernel.org/r/20191006085646.5768-2-david@redhat.com
+Fixes: 2c2a5af6fed2 ("mm, memory_hotplug: add nid parameter to arch_remove_memory")
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Ira Weiny <ira.weiny@intel.com>
+Cc: Damian Tometzki <damian.tometzki@gmail.com>
+Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Halil Pasic <pasic@linux.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jun Yao <yaojun8558363@gmail.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Pankaj Gupta <pagupta@redhat.com>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Qian Cai <cai@lca.pw>
+Cc: Rich Felker <dalias@libc.org>
+Cc: Robin Murphy <robin.murphy@arm.com>
+Cc: Steve Capper <steve.capper@arm.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Wei Yang <richardw.yang@linux.intel.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org> [5.0+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memremap.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/mm/memremap.c
++++ b/mm/memremap.c
+@@ -104,6 +104,7 @@ static void devm_memremap_pages_release(
+ struct dev_pagemap *pgmap = data;
+ struct device *dev = pgmap->dev;
+ struct resource *res = &pgmap->res;
++ struct page *first_page;
+ unsigned long pfn;
+ int nid;
+
+@@ -112,14 +113,16 @@ static void devm_memremap_pages_release(
+ put_page(pfn_to_page(pfn));
+ dev_pagemap_cleanup(pgmap);
+
++ /* make sure to access a memmap that was actually initialized */
++ first_page = pfn_to_page(pfn_first(pgmap));
++
+ /* pages are dead and unused, undo the arch mapping */
+- nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
++ nid = page_to_nid(first_page);
+
+ mem_hotplug_begin();
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+- pfn = PHYS_PFN(res->start);
+- __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
+- PHYS_PFN(resource_size(res)), NULL);
++ __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
++ PHYS_PFN(resource_size(res)), NULL);
+ } else {
+ arch_remove_memory(nid, res->start, resource_size(res),
+ pgmap_altmap(pgmap));
--- /dev/null
+From a26ee565b6cd8dc2bf15ff6aa70bbb28f928b773 Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Fri, 18 Oct 2019 20:19:29 -0700
+Subject: mm/page_owner: don't access uninitialized memmaps when reading /proc/pagetypeinfo
+
+From: Qian Cai <cai@lca.pw>
+
+commit a26ee565b6cd8dc2bf15ff6aa70bbb28f928b773 upstream.
+
+Uninitialized memmaps contain garbage and in the worst case trigger
+kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get
+touched.
+
+For example, when not onlining a memory block that is spanned by a zone
+and reading /proc/pagetypeinfo with CONFIG_DEBUG_VM_PGFLAGS and
+CONFIG_PAGE_POISONING, we can trigger a kernel BUG:
+
+ :/# echo 1 > /sys/devices/system/memory/memory40/online
+ :/# echo 1 > /sys/devices/system/memory/memory42/online
+ :/# cat /proc/pagetypeinfo > test.file
+ page:fffff2c585200000 is uninitialized and poisoned
+ raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff
+ raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff
+ page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p))
+ There is not page extension available.
+ ------------[ cut here ]------------
+ kernel BUG at include/linux/mm.h:1107!
+ invalid opcode: 0000 [#1] SMP NOPTI
+
+Please note that this change does not affect ZONE_DEVICE, because
+pagetypeinfo_showmixedcount_print() is called from
+mm/vmstat.c:pagetypeinfo_showmixedcount() only for populated zones, and
+ZONE_DEVICE is never populated (zone->present_pages always 0).
+
+[david@redhat.com: move check to outer loop, add comment, rephrase description]
+Link: http://lkml.kernel.org/r/20191011140638.8160-1-david@redhat.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visible after d0dc12e86b319
+Signed-off-by: Qian Cai <cai@lca.pw>
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Cc: Miles Chen <miles.chen@mediatek.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Qian Cai <cai@lca.pw>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: <stable@vger.kernel.org> [4.13+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_owner.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/page_owner.c
++++ b/mm/page_owner.c
+@@ -258,7 +258,8 @@ void pagetypeinfo_showmixedcount_print(s
+ * not matter as the mixed block count will still be correct
+ */
+ for (; pfn < end_pfn; ) {
+- if (!pfn_valid(pfn)) {
++ page = pfn_to_online_page(pfn);
++ if (!page) {
+ pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+ continue;
+ }
+@@ -266,13 +267,13 @@ void pagetypeinfo_showmixedcount_print(s
+ block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+ block_end_pfn = min(block_end_pfn, end_pfn);
+
+- page = pfn_to_page(pfn);
+ pageblock_mt = get_pageblock_migratetype(page);
+
+ for (; pfn < block_end_pfn; pfn++) {
+ if (!pfn_valid_within(pfn))
+ continue;
+
++ /* The pageblock is online, no need to recheck. */
+ page = pfn_to_page(pfn);
+
+ if (page_zone(page) != zone)
--- /dev/null
+From e4f8e513c3d353c134ad4eef9fd0bba12406c7c8 Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Mon, 14 Oct 2019 14:11:51 -0700
+Subject: mm/slub: fix a deadlock in show_slab_objects()
+
+From: Qian Cai <cai@lca.pw>
+
+commit e4f8e513c3d353c134ad4eef9fd0bba12406c7c8 upstream.
+
+A long time ago we fixed a similar deadlock in show_slab_objects() [1].
+However, it is apparently due to the commits like 01fb58bcba63 ("slab:
+remove synchronous synchronize_sched() from memcg cache deactivation
+path") and 03afc0e25f7f ("slab: get_online_mems for
+kmem_cache_{create,destroy,shrink}"), this kind of deadlock is back by
+just reading files in /sys/kernel/slab which will generate a lockdep
+splat below.
+
+Since the "mem_hotplug_lock" here is only to obtain a stable online node
+mask while racing with NUMA node hotplug, in the worst case, the results
+may me miscalculated while doing NUMA node hotplug, but they shall be
+corrected by later reads of the same files.
+
+ WARNING: possible circular locking dependency detected
+ ------------------------------------------------------
+ cat/5224 is trying to acquire lock:
+ ffff900012ac3120 (mem_hotplug_lock.rw_sem){++++}, at:
+ show_slab_objects+0x94/0x3a8
+
+ but task is already holding lock:
+ b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+
+ -> #2 (kn->count#45){++++}:
+ lock_acquire+0x31c/0x360
+ __kernfs_remove+0x290/0x490
+ kernfs_remove+0x30/0x44
+ sysfs_remove_dir+0x70/0x88
+ kobject_del+0x50/0xb0
+ sysfs_slab_unlink+0x2c/0x38
+ shutdown_cache+0xa0/0xf0
+ kmemcg_cache_shutdown_fn+0x1c/0x34
+ kmemcg_workfn+0x44/0x64
+ process_one_work+0x4f4/0x950
+ worker_thread+0x390/0x4bc
+ kthread+0x1cc/0x1e8
+ ret_from_fork+0x10/0x18
+
+ -> #1 (slab_mutex){+.+.}:
+ lock_acquire+0x31c/0x360
+ __mutex_lock_common+0x16c/0xf78
+ mutex_lock_nested+0x40/0x50
+ memcg_create_kmem_cache+0x38/0x16c
+ memcg_kmem_cache_create_func+0x3c/0x70
+ process_one_work+0x4f4/0x950
+ worker_thread+0x390/0x4bc
+ kthread+0x1cc/0x1e8
+ ret_from_fork+0x10/0x18
+
+ -> #0 (mem_hotplug_lock.rw_sem){++++}:
+ validate_chain+0xd10/0x2bcc
+ __lock_acquire+0x7f4/0xb8c
+ lock_acquire+0x31c/0x360
+ get_online_mems+0x54/0x150
+ show_slab_objects+0x94/0x3a8
+ total_objects_show+0x28/0x34
+ slab_attr_show+0x38/0x54
+ sysfs_kf_seq_show+0x198/0x2d4
+ kernfs_seq_show+0xa4/0xcc
+ seq_read+0x30c/0x8a8
+ kernfs_fop_read+0xa8/0x314
+ __vfs_read+0x88/0x20c
+ vfs_read+0xd8/0x10c
+ ksys_read+0xb0/0x120
+ __arm64_sys_read+0x54/0x88
+ el0_svc_handler+0x170/0x240
+ el0_svc+0x8/0xc
+
+ other info that might help us debug this:
+
+ Chain exists of:
+ mem_hotplug_lock.rw_sem --> slab_mutex --> kn->count#45
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(kn->count#45);
+ lock(slab_mutex);
+ lock(kn->count#45);
+ lock(mem_hotplug_lock.rw_sem);
+
+ *** DEADLOCK ***
+
+ 3 locks held by cat/5224:
+ #0: 9eff00095b14b2a0 (&p->lock){+.+.}, at: seq_read+0x4c/0x8a8
+ #1: 0eff008997041480 (&of->mutex){+.+.}, at: kernfs_seq_start+0x34/0xf0
+ #2: b8ff009693eee398 (kn->count#45){++++}, at:
+ kernfs_seq_start+0x44/0xf0
+
+ stack backtrace:
+ Call trace:
+ dump_backtrace+0x0/0x248
+ show_stack+0x20/0x2c
+ dump_stack+0xd0/0x140
+ print_circular_bug+0x368/0x380
+ check_noncircular+0x248/0x250
+ validate_chain+0xd10/0x2bcc
+ __lock_acquire+0x7f4/0xb8c
+ lock_acquire+0x31c/0x360
+ get_online_mems+0x54/0x150
+ show_slab_objects+0x94/0x3a8
+ total_objects_show+0x28/0x34
+ slab_attr_show+0x38/0x54
+ sysfs_kf_seq_show+0x198/0x2d4
+ kernfs_seq_show+0xa4/0xcc
+ seq_read+0x30c/0x8a8
+ kernfs_fop_read+0xa8/0x314
+ __vfs_read+0x88/0x20c
+ vfs_read+0xd8/0x10c
+ ksys_read+0xb0/0x120
+ __arm64_sys_read+0x54/0x88
+ el0_svc_handler+0x170/0x240
+ el0_svc+0x8/0xc
+
+I think it is important to mention that this doesn't expose the
+show_slab_objects to use-after-free. There is only a single path that
+might really race here and that is the slab hotplug notifier callback
+__kmem_cache_shrink (via slab_mem_going_offline_callback) but that path
+doesn't really destroy kmem_cache_node data structures.
+
+[1] http://lkml.iu.edu/hypermail/linux/kernel/1101.0/02850.html
+
+[akpm@linux-foundation.org: add comment explaining why we don't need mem_hotplug_lock]
+Link: http://lkml.kernel.org/r/1570192309-10132-1-git-send-email-cai@lca.pw
+Fixes: 01fb58bcba63 ("slab: remove synchronous synchronize_sched() from memcg cache deactivation path")
+Fixes: 03afc0e25f7f ("slab: get_online_mems for kmem_cache_{create,destroy,shrink}")
+Signed-off-by: Qian Cai <cai@lca.pw>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -4836,7 +4836,17 @@ static ssize_t show_slab_objects(struct
+ }
+ }
+
+- get_online_mems();
++ /*
++ * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
++ * already held which will conflict with an existing lock order:
++ *
++ * mem_hotplug_lock->slab_mutex->kernfs_mutex
++ *
++ * We don't really need mem_hotplug_lock (to hold off
++ * slab_mem_going_offline_callback) here because slab's memory hot
++ * unplug code doesn't destroy the kmem_cache->node[] data.
++ */
++
+ #ifdef CONFIG_SLUB_DEBUG
+ if (flags & SO_ALL) {
+ struct kmem_cache_node *n;
+@@ -4877,7 +4887,6 @@ static ssize_t show_slab_objects(struct
+ x += sprintf(buf + x, " N%d=%lu",
+ node, nodes[node]);
+ #endif
+- put_online_mems();
+ kfree(nodes);
+ return x + sprintf(buf + x, "\n");
+ }
--- /dev/null
+From c07d0073b9ec80a139d07ebf78e9c30d2a28279e Mon Sep 17 00:00:00 2001
+From: Faiz Abbas <faiz_abbas@ti.com>
+Date: Tue, 15 Oct 2019 00:08:49 +0530
+Subject: mmc: cqhci: Commit descriptors before setting the doorbell
+
+From: Faiz Abbas <faiz_abbas@ti.com>
+
+commit c07d0073b9ec80a139d07ebf78e9c30d2a28279e upstream.
+
+Add a write memory barrier to make sure that descriptors are actually
+written to memory, before ringing the doorbell.
+
+Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/cqhci.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/cqhci.c
++++ b/drivers/mmc/host/cqhci.c
+@@ -611,7 +611,8 @@ static int cqhci_request(struct mmc_host
+ cq_host->slot[tag].flags = 0;
+
+ cq_host->qcnt += 1;
+-
++ /* Make sure descriptors are ready before ringing the doorbell */
++ wmb();
+ cqhci_writel(cq_host, 1 << tag, CQHCI_TDBR);
+ if (!(cqhci_readl(cq_host, CQHCI_TDBR) & (1 << tag)))
+ pr_debug("%s: cqhci: doorbell not set for tag %d\n",
--- /dev/null
+From 2bb9f7566ba7ab3c2154964461e37b52cdc6b91b Mon Sep 17 00:00:00 2001
+From: Sascha Hauer <s.hauer@pengutronix.de>
+Date: Fri, 18 Oct 2019 11:39:34 +0200
+Subject: mmc: mxs: fix flags passed to dmaengine_prep_slave_sg
+
+From: Sascha Hauer <s.hauer@pengutronix.de>
+
+commit 2bb9f7566ba7ab3c2154964461e37b52cdc6b91b upstream.
+
+Since ceeeb99cd821 we no longer abuse the DMA_CTRL_ACK flag for custom
+driver use and introduced the MXS_DMA_CTRL_WAIT4END instead. We have not
+changed all users to this flag though. This patch fixes it for the
+mxs-mmc driver.
+
+Fixes: ceeeb99cd821 ("dmaengine: mxs: rename custom flag")
+Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
+Tested-by: Fabio Estevam <festevam@gmail.com>
+Reported-by: Bruno Thomsen <bruno.thomsen@gmail.com>
+Tested-by: Bruno Thomsen <bruno.thomsen@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/mxs-mmc.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/host/mxs-mmc.c
++++ b/drivers/mmc/host/mxs-mmc.c
+@@ -17,6 +17,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
++#include <linux/dma/mxs-dma.h>
+ #include <linux/highmem.h>
+ #include <linux/clk.h>
+ #include <linux/err.h>
+@@ -266,7 +267,7 @@ static void mxs_mmc_bc(struct mxs_mmc_ho
+ ssp->ssp_pio_words[2] = cmd1;
+ ssp->dma_dir = DMA_NONE;
+ ssp->slave_dirn = DMA_TRANS_NONE;
+- desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
++ desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
+ if (!desc)
+ goto out;
+
+@@ -311,7 +312,7 @@ static void mxs_mmc_ac(struct mxs_mmc_ho
+ ssp->ssp_pio_words[2] = cmd1;
+ ssp->dma_dir = DMA_NONE;
+ ssp->slave_dirn = DMA_TRANS_NONE;
+- desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
++ desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
+ if (!desc)
+ goto out;
+
+@@ -441,7 +442,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_
+ host->data = data;
+ ssp->dma_dir = dma_data_dir;
+ ssp->slave_dirn = slave_dirn;
+- desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
++ desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END);
+ if (!desc)
+ goto out;
+
--- /dev/null
+From feb40824d78eac5e48f56498dca941754dff33d7 Mon Sep 17 00:00:00 2001
+From: Faiz Abbas <faiz_abbas@ti.com>
+Date: Thu, 10 Oct 2019 16:22:30 +0530
+Subject: mmc: sdhci-omap: Fix Tuning procedure for temperatures < -20C
+
+From: Faiz Abbas <faiz_abbas@ti.com>
+
+commit feb40824d78eac5e48f56498dca941754dff33d7 upstream.
+
+According to the App note[1] detailing the tuning algorithm, for
+temperatures < -20C, the initial tuning value should be min(largest value
+in LPW - 24, ceil(13/16 ratio of LPW)). The largest value in LPW is
+(max_window + 4 * (max_len - 1)) and not (max_window + 4 * max_len) itself.
+Fix this implementation.
+
+[1] http://www.ti.com/lit/an/spraca9b/spraca9b.pdf
+
+Fixes: 961de0a856e3 ("mmc: sdhci-omap: Workaround errata regarding SDR104/HS200 tuning failures (i929)")
+Cc: stable@vger.kernel.org
+Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/sdhci-omap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-omap.c
++++ b/drivers/mmc/host/sdhci-omap.c
+@@ -372,7 +372,7 @@ static int sdhci_omap_execute_tuning(str
+ * on temperature
+ */
+ if (temperature < -20000)
+- phase_delay = min(max_window + 4 * max_len - 24,
++ phase_delay = min(max_window + 4 * (max_len - 1) - 24,
+ max_window +
+ DIV_ROUND_UP(13 * max_len, 16) * 4);
+ else if (temperature < 20000)
--- /dev/null
+From ac49303d9ef0ad98b79867a380ef23480e48870b Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Date: Mon, 21 Oct 2019 19:56:00 +0200
+Subject: s390/kaslr: add support for R_390_GLOB_DAT relocation type
+
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+
+commit ac49303d9ef0ad98b79867a380ef23480e48870b upstream.
+
+Commit "bpf: Process in-kernel BTF" in linux-next introduced an undefined
+__weak symbol, which results in an R_390_GLOB_DAT relocation type. That
+is not yet handled by the KASLR relocation code, and the kernel stops with
+the message "Unknown relocation type".
+
+Add code to detect and handle R_390_GLOB_DAT relocation types and undefined
+symbols.
+
+Fixes: 805bc0bc238f ("s390/kernel: build a relocatable kernel")
+Cc: <stable@vger.kernel.org> # v5.2+
+Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/boot/startup.c | 14 +++++++++++---
+ arch/s390/kernel/machine_kexec_reloc.c | 1 +
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -101,10 +101,18 @@ static void handle_relocs(unsigned long
+ dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
+ for (rela = rela_start; rela < rela_end; rela++) {
+ loc = rela->r_offset + offset;
+- val = rela->r_addend + offset;
++ val = rela->r_addend;
+ r_sym = ELF64_R_SYM(rela->r_info);
+- if (r_sym)
+- val += dynsym[r_sym].st_value;
++ if (r_sym) {
++ if (dynsym[r_sym].st_shndx != SHN_UNDEF)
++ val += dynsym[r_sym].st_value + offset;
++ } else {
++ /*
++ * 0 == undefined symbol table index (STN_UNDEF),
++ * used for R_390_RELATIVE, only add KASLR offset
++ */
++ val += offset;
++ }
+ r_type = ELF64_R_TYPE(rela->r_info);
+ rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
+ if (rc)
+--- a/arch/s390/kernel/machine_kexec_reloc.c
++++ b/arch/s390/kernel/machine_kexec_reloc.c
+@@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, voi
+ *(u32 *)loc = val;
+ break;
+ case R_390_64: /* Direct 64 bit. */
++ case R_390_GLOB_DAT:
+ *(u64 *)loc = val;
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
--- /dev/null
+From 388bb19be8eab4674a660e0c97eaf60775362bc7 Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Thu, 10 Oct 2019 15:13:33 +0200
+Subject: s390/zcrypt: fix memleak at release
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 388bb19be8eab4674a660e0c97eaf60775362bc7 upstream.
+
+If a process is interrupted while accessing the crypto device and the
+global ap_perms_mutex is contented, release() could return early and
+fail to free related resources.
+
+Fixes: 00fab2350e6b ("s390/zcrypt: multiple zcrypt device nodes support")
+Cc: <stable@vger.kernel.org> # 4.19
+Cc: Harald Freudenberger <freude@linux.ibm.com>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/crypto/zcrypt_api.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/s390/crypto/zcrypt_api.c
++++ b/drivers/s390/crypto/zcrypt_api.c
+@@ -539,8 +539,7 @@ static int zcrypt_release(struct inode *
+ if (filp->f_inode->i_cdev == &zcrypt_cdev) {
+ struct zcdn_device *zcdndev;
+
+- if (mutex_lock_interruptible(&ap_perms_mutex))
+- return -ERESTARTSYS;
++ mutex_lock(&ap_perms_mutex);
+ zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev);
+ mutex_unlock(&ap_perms_mutex);
+ if (zcdndev) {
io_uring-fix-broken-links-with-offloading.patch
io_uring-fix-race-for-sqes-with-userspace.patch
io_uring-used-cached-copies-of-sq-dropped-and-cq-ove.patch
+mmc-mxs-fix-flags-passed-to-dmaengine_prep_slave_sg.patch
+mmc-cqhci-commit-descriptors-before-setting-the-doorbell.patch
+mmc-sdhci-omap-fix-tuning-procedure-for-temperatures-20c.patch
+mm-memory-failure.c-don-t-access-uninitialized-memmaps-in-memory_failure.patch
+mm-slub-fix-a-deadlock-in-show_slab_objects.patch
+mm-page_owner-don-t-access-uninitialized-memmaps-when-reading-proc-pagetypeinfo.patch
+mm-memory_hotplug-don-t-access-uninitialized-memmaps-in-shrink_pgdat_span.patch
+mm-memunmap-don-t-access-uninitialized-memmap-in-memunmap_pages.patch
+mm-memcg-slab-fix-panic-in-__free_slab-caused-by-premature-memcg-pointer-release.patch
+mm-compaction-fix-wrong-pfn-handling-in-__reset_isolation_pfn.patch
+mm-memcg-get-number-of-pages-on-the-lru-list-in-memcgroup-base-on-lru_zone_size.patch
+mm-memblock-do-not-enforce-current-limit-for-memblock_phys-family.patch
+hugetlbfs-don-t-access-uninitialized-memmaps-in-pfn_range_valid_gigantic.patch
+mm-memory-failure-poison-read-receives-sigkill-instead-of-sigbus-if-mmaped-more-than-once.patch
+zram-fix-race-between-backing_dev_show-and-backing_dev_store.patch
+xtensa-drop-export_symbol-for-outs-ins.patch
+xtensa-fix-change_bit-in-exclusive-access-option.patch
+s390-zcrypt-fix-memleak-at-release.patch
+s390-kaslr-add-support-for-r_390_glob_dat-relocation-type.patch
--- /dev/null
+From 8b39da985194aac2998dd9e3a22d00b596cebf1e Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Mon, 14 Oct 2019 15:48:19 -0700
+Subject: xtensa: drop EXPORT_SYMBOL for outs*/ins*
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 8b39da985194aac2998dd9e3a22d00b596cebf1e upstream.
+
+Custom outs*/ins* implementations are long gone from the xtensa port,
+remove matching EXPORT_SYMBOLs.
+This fixes the following build warnings issued by modpost since commit
+15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL* functions"):
+
+ WARNING: "insb" [vmlinux] is a static EXPORT_SYMBOL
+ WARNING: "insw" [vmlinux] is a static EXPORT_SYMBOL
+ WARNING: "insl" [vmlinux] is a static EXPORT_SYMBOL
+ WARNING: "outsb" [vmlinux] is a static EXPORT_SYMBOL
+ WARNING: "outsw" [vmlinux] is a static EXPORT_SYMBOL
+ WARNING: "outsl" [vmlinux] is a static EXPORT_SYMBOL
+
+Cc: stable@vger.kernel.org
+Fixes: d38efc1f150f ("xtensa: adopt generic io routines")
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/kernel/xtensa_ksyms.c | 7 -------
+ 1 file changed, 7 deletions(-)
+
+--- a/arch/xtensa/kernel/xtensa_ksyms.c
++++ b/arch/xtensa/kernel/xtensa_ksyms.c
+@@ -119,13 +119,6 @@ EXPORT_SYMBOL(__invalidate_icache_range)
+ // FIXME EXPORT_SYMBOL(screen_info);
+ #endif
+
+-EXPORT_SYMBOL(outsb);
+-EXPORT_SYMBOL(outsw);
+-EXPORT_SYMBOL(outsl);
+-EXPORT_SYMBOL(insb);
+-EXPORT_SYMBOL(insw);
+-EXPORT_SYMBOL(insl);
+-
+ extern long common_exception_return;
+ EXPORT_SYMBOL(common_exception_return);
+
--- /dev/null
+From 775fd6bfefc66a8c33e91dd9687ed530643b954d Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Tue, 15 Oct 2019 21:51:43 -0700
+Subject: xtensa: fix change_bit in exclusive access option
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 775fd6bfefc66a8c33e91dd9687ed530643b954d upstream.
+
+change_bit implementation for XCHAL_HAVE_EXCLUSIVE case changes all bits
+except the one required due to copy-paste error from clear_bit.
+
+Cc: stable@vger.kernel.org # v5.2+
+Fixes: f7c34874f04a ("xtensa: add exclusive atomics support")
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/include/asm/bitops.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/xtensa/include/asm/bitops.h
++++ b/arch/xtensa/include/asm/bitops.h
+@@ -148,7 +148,7 @@ static inline void change_bit(unsigned i
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp)
+- : "a" (~mask), "a" (p)
++ : "a" (mask), "a" (p)
+ : "memory");
+ }
+
--- /dev/null
+From f7daefe4231e57381d92c2e2ad905a899c28e402 Mon Sep 17 00:00:00 2001
+From: Chenwandun <chenwandun@huawei.com>
+Date: Fri, 18 Oct 2019 20:20:14 -0700
+Subject: zram: fix race between backing_dev_show and backing_dev_store
+
+From: Chenwandun <chenwandun@huawei.com>
+
+commit f7daefe4231e57381d92c2e2ad905a899c28e402 upstream.
+
+CPU0: CPU1:
+backing_dev_show backing_dev_store
+ ...... ......
+ file = zram->backing_dev;
+ down_read(&zram->init_lock); down_read(&zram->init_init_lock)
+ file_path(file, ...); zram->backing_dev = backing_dev;
+ up_read(&zram->init_lock); up_read(&zram->init_lock);
+
+gets the value of zram->backing_dev too early in backing_dev_show, which
+resultin the value being NULL at the beginning, and not NULL later.
+
+backtrace:
+ d_path+0xcc/0x174
+ file_path+0x10/0x18
+ backing_dev_show+0x40/0xb4
+ dev_attr_show+0x20/0x54
+ sysfs_kf_seq_show+0x9c/0x10c
+ kernfs_seq_show+0x28/0x30
+ seq_read+0x184/0x488
+ kernfs_fop_read+0x5c/0x1a4
+ __vfs_read+0x44/0x128
+ vfs_read+0xa0/0x138
+ SyS_read+0x54/0xb4
+
+Link: http://lkml.kernel.org/r/1571046839-16814-1-git-send-email-chenwandun@huawei.com
+Signed-off-by: Chenwandun <chenwandun@huawei.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org> [4.14+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -413,13 +413,14 @@ static void reset_bdev(struct zram *zram
+ static ssize_t backing_dev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
++ struct file *file;
+ struct zram *zram = dev_to_zram(dev);
+- struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+- if (!zram->backing_dev) {
++ file = zram->backing_dev;
++ if (!file) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;