--- /dev/null
+From 3568b88944fef28db3ee989b957da49ffc627ede Mon Sep 17 00:00:00 2001
+From: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Date: Thu, 19 Mar 2020 14:11:38 +0000
+Subject: arm64: compat: Fix syscall number of compat_clock_getres
+
+From: Vincenzo Frascino <vincenzo.frascino@arm.com>
+
+commit 3568b88944fef28db3ee989b957da49ffc627ede upstream.
+
+The syscall number of compat_clock_getres was erroneously set to 247
+(__NR_io_cancel!) instead of 264. This causes the vDSO fallback of
+clock_getres() to land on the wrong syscall for compat tasks.
+
+Fix the numbering.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 53c489e1dfeb6 ("arm64: compat: Add missing syscall numbers")
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/unistd.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/include/asm/unistd.h
++++ b/arch/arm64/include/asm/unistd.h
+@@ -25,8 +25,8 @@
+ #define __NR_compat_gettimeofday 78
+ #define __NR_compat_sigreturn 119
+ #define __NR_compat_rt_sigreturn 173
+-#define __NR_compat_clock_getres 247
+ #define __NR_compat_clock_gettime 263
++#define __NR_compat_clock_getres 264
+ #define __NR_compat_clock_gettime64 403
+ #define __NR_compat_clock_getres_time64 406
+
--- /dev/null
+From 236ebc20d9afc5e9ff52f3cf3f365a91583aac10 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 10 Mar 2020 12:13:53 +0000
+Subject: btrfs: fix log context list corruption after rename whiteout error
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 236ebc20d9afc5e9ff52f3cf3f365a91583aac10 upstream.
+
+During a rename whiteout, if btrfs_whiteout_for_rename() returns an error
+we can end up returning from btrfs_rename() with the log context object
+still in the root's log context list - this happens if 'sync_log' was
+set to true before we called btrfs_whiteout_for_rename() and it is
+dangerous because we end up with a corrupt linked list (root->log_ctxs)
+as the log context object was allocated on the stack.
+
+After btrfs_rename() returns, any task that is running btrfs_sync_log()
+concurrently can end up crashing because that linked list is traversed by
+btrfs_sync_log() (through btrfs_remove_all_log_ctxs()). That results in
+the same issue that commit e6c617102c7e4 ("Btrfs: fix log context list
+corruption after rename exchange operation") fixed.
+
+Fixes: d4682ba03ef618 ("Btrfs: sync log after logging new name")
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10159,6 +10159,10 @@ out_fail:
+ ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
+ if (ret)
+ commit_transaction = true;
++ } else if (sync_log) {
++ mutex_lock(&root->log_mutex);
++ list_del(&ctx.list);
++ mutex_unlock(&root->log_mutex);
+ }
+ if (commit_transaction) {
+ ret = btrfs_commit_transaction(trans);
--- /dev/null
+From 5bbc6604a62814511c32f2e39bc9ffb2c1b92cbe Mon Sep 17 00:00:00 2001
+From: Tom St Denis <tom.stdenis@amd.com>
+Date: Tue, 10 Mar 2020 08:40:41 -0400
+Subject: drm/amd/amdgpu: Fix GPR read from debugfs (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tom St Denis <tom.stdenis@amd.com>
+
+commit 5bbc6604a62814511c32f2e39bc9ffb2c1b92cbe upstream.
+
+The offset into the array was specified in bytes but should
+be in terms of 32-bit words. Also prevent large reads that
+would also cause a buffer overread.
+
+v2: Read from correct offset from internal storage buffer.
+
+Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -694,11 +694,11 @@ static ssize_t amdgpu_debugfs_gpr_read(s
+ ssize_t result = 0;
+ uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+
+- if (size & 3 || *pos & 3)
++ if (size > 4096 || size & 3 || *pos & 3)
+ return -EINVAL;
+
+ /* decode offset */
+- offset = *pos & GENMASK_ULL(11, 0);
++ offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
+ se = (*pos & GENMASK_ULL(19, 12)) >> 12;
+ sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
+ cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
+@@ -729,7 +729,7 @@ static ssize_t amdgpu_debugfs_gpr_read(s
+ while (size) {
+ uint32_t value;
+
+- value = data[offset++];
++ value = data[result >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
--- /dev/null
+From b216a8e7908cd750550c0480cf7d2b3a37f06954 Mon Sep 17 00:00:00 2001
+From: Qiujun Huang <hqjagain@gmail.com>
+Date: Wed, 18 Mar 2020 15:53:50 +0800
+Subject: drm/lease: fix WARNING in idr_destroy
+
+From: Qiujun Huang <hqjagain@gmail.com>
+
+commit b216a8e7908cd750550c0480cf7d2b3a37f06954 upstream.
+
+drm_lease_create takes ownership of leases. And leases will be released
+by drm_master_put.
+
+drm_master_put
+ ->drm_master_destroy
+ ->idr_destroy
+
+So we needn't call idr_destroy again.
+
+Reported-and-tested-by: syzbot+05835159fe322770fe3d@syzkaller.appspotmail.com
+Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/1584518030-4173-1-git-send-email-hqjagain@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_lease.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_lease.c
++++ b/drivers/gpu/drm/drm_lease.c
+@@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct d
+ }
+
+ DRM_DEBUG_LEASE("Creating lease\n");
++ /* lessee will take the ownership of leases */
+ lessee = drm_lease_create(lessor, &leases);
+
+ if (IS_ERR(lessee)) {
+ ret = PTR_ERR(lessee);
++ idr_destroy(&leases);
+ goto out_leases;
+ }
+
+@@ -580,7 +582,6 @@ out_lessee:
+
+ out_leases:
+ put_unused_fd(fd);
+- idr_destroy(&leases);
+
+ DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret);
+ return ret;
--- /dev/null
+From 1b53734bd0b2feed8e7761771b2e76fc9126ea0c Mon Sep 17 00:00:00 2001
+From: Roman Penyaev <rpenyaev@suse.de>
+Date: Sat, 21 Mar 2020 18:22:30 -0700
+Subject: epoll: fix possible lost wakeup on epoll_ctl() path
+
+From: Roman Penyaev <rpenyaev@suse.de>
+
+commit 1b53734bd0b2feed8e7761771b2e76fc9126ea0c upstream.
+
+This fixes possible lost wakeup introduced by commit a218cc491420.
+Originally modifications to ep->wq were serialized by ep->wq.lock, but
+in commit a218cc491420 ("epoll: use rwlock in order to reduce
+ep_poll_callback() contention") a new rw lock was introduced in order to
+relax fd event path, i.e. callers of ep_poll_callback() function.
+
+After the change ep_modify and ep_insert (both are called on epoll_ctl()
+path) were switched to ep->lock, but ep_poll (epoll_wait) was using
+ep->wq.lock on wqueue list modification.
+
+The bug doesn't lead to any wqueue list corruptions, because wake up
+path and list modifications were serialized by ep->wq.lock internally,
+but actual waitqueue_active() check prior wake_up() call can be
+reordered with modifications of ep ready list, thus wake up can be lost.
+
+And yes, can be healed by explicit smp_mb():
+
+ list_add_tail(&epi->rdlink, &ep->rdllist);
+ smp_mb();
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wp);
+
+But let's make it simple, thus current patch replaces ep->wq.lock with
+the ep->lock for wqueue modifications, thus wake up path always observes
+activeness of the wqueue correcty.
+
+Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
+Reported-by: Max Neunhoeffer <max@arangodb.com>
+Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Max Neunhoeffer <max@arangodb.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Christopher Kohlhoff <chris.kohlhoff@clearpool.io>
+Cc: Davidlohr Bueso <dbueso@suse.de>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Jes Sorensen <jes.sorensen@gmail.com>
+Cc: <stable@vger.kernel.org> [5.1+]
+Link: http://lkml.kernel.org/r/20200214170211.561524-1-rpenyaev@suse.de
+References: https://bugzilla.kernel.org/show_bug.cgi?id=205933
+Bisected-by: Max Neunhoeffer <max@arangodb.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/eventpoll.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1860,9 +1860,9 @@ fetch_events:
+ waiter = true;
+ init_waitqueue_entry(&wait, current);
+
+- spin_lock_irq(&ep->wq.lock);
++ write_lock_irq(&ep->lock);
+ __add_wait_queue_exclusive(&ep->wq, &wait);
+- spin_unlock_irq(&ep->wq.lock);
++ write_unlock_irq(&ep->lock);
+ }
+
+ for (;;) {
+@@ -1910,9 +1910,9 @@ send_events:
+ goto fetch_events;
+
+ if (waiter) {
+- spin_lock_irq(&ep->wq.lock);
++ write_lock_irq(&ep->lock);
+ __remove_wait_queue(&ep->wq, &wait);
+- spin_unlock_irq(&ep->wq.lock);
++ write_unlock_irq(&ep->lock);
+ }
+
+ return res;
--- /dev/null
+From 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 Mon Sep 17 00:00:00 2001
+From: Chunguang Xu <brookxu@tencent.com>
+Date: Sat, 21 Mar 2020 18:22:10 -0700
+Subject: memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chunguang Xu <brookxu@tencent.com>
+
+commit 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 upstream.
+
+An eventfd monitors multiple memory thresholds of the cgroup, closes them,
+the kernel deletes all events related to this eventfd. Before all events
+are deleted, another eventfd monitors the memory threshold of this cgroup,
+leading to a crash:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000004
+ #PF: supervisor write access in kernel mode
+ #PF: error_code(0x0002) - not-present page
+ PGD 800000033058e067 P4D 800000033058e067 PUD 3355ce067 PMD 0
+ Oops: 0002 [#1] SMP PTI
+ CPU: 2 PID: 14012 Comm: kworker/2:6 Kdump: loaded Not tainted 5.6.0-rc4 #3
+ Hardware name: LENOVO 20AWS01K00/20AWS01K00, BIOS GLET70WW (2.24 ) 05/21/2014
+ Workqueue: events memcg_event_remove
+ RIP: 0010:__mem_cgroup_usage_unregister_event+0xb3/0x190
+ RSP: 0018:ffffb47e01c4fe18 EFLAGS: 00010202
+ RAX: 0000000000000001 RBX: ffff8bb223a8a000 RCX: 0000000000000001
+ RDX: 0000000000000001 RSI: ffff8bb22fb83540 RDI: 0000000000000001
+ RBP: ffffb47e01c4fe48 R08: 0000000000000000 R09: 0000000000000010
+ R10: 000000000000000c R11: 071c71c71c71c71c R12: ffff8bb226aba880
+ R13: ffff8bb223a8a480 R14: 0000000000000000 R15: 0000000000000000
+ FS: 0000000000000000(0000) GS:ffff8bb242680000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000004 CR3: 000000032c29c003 CR4: 00000000001606e0
+ Call Trace:
+ memcg_event_remove+0x32/0x90
+ process_one_work+0x172/0x380
+ worker_thread+0x49/0x3f0
+ kthread+0xf8/0x130
+ ret_from_fork+0x35/0x40
+ CR2: 0000000000000004
+
+We can reproduce this problem in the following ways:
+
+1. We create a new cgroup subdirectory and a new eventfd, and then we
+ monitor multiple memory thresholds of the cgroup through this eventfd.
+
+2. closing this eventfd, and __mem_cgroup_usage_unregister_event ()
+ will be called multiple times to delete all events related to this
+ eventfd.
+
+The first time __mem_cgroup_usage_unregister_event() is called, the
+kernel will clear all items related to this eventfd in thresholds->
+primary.
+
+Since there is currently only one eventfd, thresholds-> primary becomes
+empty, so the kernel will set thresholds-> primary and hresholds-> spare
+to NULL. If at this time, the user creates a new eventfd and monitor
+the memory threshold of this cgroup, kernel will re-initialize
+thresholds-> primary.
+
+Then when __mem_cgroup_usage_unregister_event () is called for the
+second time, because thresholds-> primary is not empty, the system will
+access thresholds-> spare, but thresholds-> spare is NULL, which will
+trigger a crash.
+
+In general, the longer it takes to delete all events related to this
+eventfd, the easier it is to trigger this problem.
+
+The solution is to check whether the thresholds associated with the
+eventfd has been cleared when deleting the event. If so, we do nothing.
+
+[akpm@linux-foundation.org: fix comment, per Kirill]
+Fixes: 907860ed381a ("cgroups: make cftype.unregister_event() void-returning")
+Signed-off-by: Chunguang Xu <brookxu@tencent.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/077a6f67-aefa-4591-efec-f2f3af2b0b02@gmail.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -4027,7 +4027,7 @@ static void __mem_cgroup_usage_unregiste
+ struct mem_cgroup_thresholds *thresholds;
+ struct mem_cgroup_threshold_ary *new;
+ unsigned long usage;
+- int i, j, size;
++ int i, j, size, entries;
+
+ mutex_lock(&memcg->thresholds_lock);
+
+@@ -4047,14 +4047,20 @@ static void __mem_cgroup_usage_unregiste
+ __mem_cgroup_threshold(memcg, type == _MEMSWAP);
+
+ /* Calculate new number of threshold */
+- size = 0;
++ size = entries = 0;
+ for (i = 0; i < thresholds->primary->size; i++) {
+ if (thresholds->primary->entries[i].eventfd != eventfd)
+ size++;
++ else
++ entries++;
+ }
+
+ new = thresholds->spare;
+
++ /* If no items related to eventfd have been cleared, nothing to do */
++ if (!entries)
++ goto unlock;
++
+ /* Set thresholds array to NULL if we don't have thresholds */
+ if (!size) {
+ kfree(new);
--- /dev/null
+From 12e967fd8e4e6c3d275b4c69c890adc838891300 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Sat, 21 Mar 2020 18:22:26 -0700
+Subject: mm: do not allow MADV_PAGEOUT for CoW pages
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 12e967fd8e4e6c3d275b4c69c890adc838891300 upstream.
+
+Jann has brought up a very interesting point [1]. While shared pages
+are excluded from MADV_PAGEOUT normally, CoW pages can be easily
+reclaimed that way. This can lead to all sorts of hard to debug
+problems. E.g. performance problems outlined by Daniel [2].
+
+There are runtime environments where there is a substantial memory
+shared among security domains via CoW memory and a easy to reclaim way
+of that memory, which MADV_{COLD,PAGEOUT} offers, can lead to either
+performance degradation in for the parent process which might be more
+privileged or even open side channel attacks.
+
+The feasibility of the latter is not really clear to me TBH but there is
+no real reason for exposure at this stage. It seems there is no real
+use case to depend on reclaiming CoW memory via madvise at this stage so
+it is much easier to simply disallow it and this is what this patch
+does. Put it simply MADV_{PAGEOUT,COLD} can operate only on the
+exclusively owned memory which is a straightforward semantic.
+
+[1] http://lkml.kernel.org/r/CAG48ez0G3JkMq61gUmyQAaCq=_TwHbi1XKzWRooxZkv08PQKuw@mail.gmail.com
+[2] http://lkml.kernel.org/r/CAKOZueua_v8jHCpmEtTB6f3i9e2YnmX4mqdYVWhV4E=Z-n+zRQ@mail.gmail.com
+
+Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Daniel Colascione <dancol@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200312082248.GS23944@dhcp22.suse.cz
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_r
+ }
+
+ page = pmd_page(orig_pmd);
++
++ /* Do not interfere with other mappings of this page */
++ if (page_mapcount(page) != 1)
++ goto huge_unlock;
++
+ if (next - addr != HPAGE_PMD_SIZE) {
+ int err;
+
+- if (page_mapcount(page) != 1)
+- goto huge_unlock;
+-
+ get_page(page);
+ spin_unlock(ptl);
+ lock_page(page);
+@@ -426,6 +428,10 @@ regular_page:
+ continue;
+ }
+
++ /* Do not interfere with other mappings of this page */
++ if (page_mapcount(page) != 1)
++ continue;
++
+ VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+ if (pte_young(ptent)) {
--- /dev/null
+From d41e2f3bd54699f85b3d6f45abd09fa24a222cb9 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Sat, 21 Mar 2020 18:22:13 -0700
+Subject: mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
+
+From: Baoquan He <bhe@redhat.com>
+
+commit d41e2f3bd54699f85b3d6f45abd09fa24a222cb9 upstream.
+
+In section_deactivate(), pfn_to_page() doesn't work any more after
+ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case. It
+causes a hot remove failure:
+
+ kernel BUG at mm/page_alloc.c:4806!
+ invalid opcode: 0000 [#1] SMP PTI
+ CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
+ Workqueue: kacpi_hotplug acpi_hotplug_work_fn
+ RIP: 0010:free_pages+0x85/0xa0
+ Call Trace:
+ __remove_pages+0x99/0xc0
+ arch_remove_memory+0x23/0x4d
+ try_remove_memory+0xc8/0x130
+ __remove_memory+0xa/0x11
+ acpi_memory_device_remove+0x72/0x100
+ acpi_bus_trim+0x55/0x90
+ acpi_device_hotplug+0x2eb/0x3d0
+ acpi_hotplug_work_fn+0x1a/0x30
+ process_one_work+0x1a7/0x370
+ worker_thread+0x30/0x380
+ kthread+0x112/0x130
+ ret_from_fork+0x35/0x40
+
+Let's move the ->section_mem_map resetting after
+depopulate_section_memmap() to fix it.
+
+[akpm@linux-foundation.org: remove unneeded initialization, per David]
+Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug")
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Wei Yang <richardw.yang@linux.intel.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200307084229.28251-2-bhe@redhat.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/sparse.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -744,6 +744,7 @@ static void section_deactivate(unsigned
+ struct mem_section *ms = __pfn_to_section(pfn);
+ bool section_is_early = early_section(ms);
+ struct page *memmap = NULL;
++ bool empty;
+ unsigned long *subsection_map = ms->usage
+ ? &ms->usage->subsection_map[0] : NULL;
+
+@@ -774,7 +775,8 @@ static void section_deactivate(unsigned
+ * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
+ */
+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
++ empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
++ if (empty) {
+ unsigned long section_nr = pfn_to_section_nr(pfn);
+
+ /*
+@@ -789,13 +791,15 @@ static void section_deactivate(unsigned
+ ms->usage = NULL;
+ }
+ memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+- ms->section_mem_map = (unsigned long)NULL;
+ }
+
+ if (section_is_early && memmap)
+ free_map_bootmem(memmap);
+ else
+ depopulate_section_memmap(pfn, nr_pages, altmap);
++
++ if (empty)
++ ms->section_mem_map = (unsigned long)NULL;
+ }
+
+ static struct page * __meminit section_activate(int nid, unsigned long pfn,
--- /dev/null
+From d397a45fc741c80c32a14e2de008441e9976f50c Mon Sep 17 00:00:00 2001
+From: Chris Down <chris@chrisdown.name>
+Date: Sat, 21 Mar 2020 18:22:20 -0700
+Subject: mm, memcg: fix corruption on 64-bit divisor in memory.high throttling
+
+From: Chris Down <chris@chrisdown.name>
+
+commit d397a45fc741c80c32a14e2de008441e9976f50c upstream.
+
+Commit 0e4b01df8659 had a bunch of fixups to use the right division
+method. However, it seems that after all that it still wasn't right --
+div_u64 takes a 32-bit divisor.
+
+The headroom is still large (2^32 pages), so on mundane systems you
+won't hit this, but this should definitely be fixed.
+
+Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high")
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Chris Down <chris@chrisdown.name>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Nathan Chancellor <natechancellor@gmail.com>
+Cc: <stable@vger.kernel.org> [5.4.x+]
+Link: http://lkml.kernel.org/r/80780887060514967d414b3cd91f9a316a16ab98.1584036142.git.chris@chrisdown.name
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2339,7 +2339,7 @@ void mem_cgroup_handle_over_high(void)
+ */
+ clamped_high = max(high, 1UL);
+
+- overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
++ overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+ clamped_high);
+
+ penalty_jiffies = ((u64)overage * overage * HZ)
--- /dev/null
+From e26733e0d0ec6798eca93daa300bc3f43616127f Mon Sep 17 00:00:00 2001
+From: Chris Down <chris@chrisdown.name>
+Date: Sat, 21 Mar 2020 18:22:23 -0700
+Subject: mm, memcg: throttle allocators based on ancestral memory.high
+
+From: Chris Down <chris@chrisdown.name>
+
+commit e26733e0d0ec6798eca93daa300bc3f43616127f upstream.
+
+Prior to this commit, we only directly check the affected cgroup's
+memory.high against its usage. However, it's possible that we are being
+reclaimed as a result of hitting an ancestor memory.high and should be
+penalised based on that, instead.
+
+This patch changes memory.high overage throttling to use the largest
+overage in its ancestors when considering how many penalty jiffies to
+charge. This makes sure that we penalise poorly behaving cgroups in the
+same way regardless of at what level of the hierarchy memory.high was
+breached.
+
+Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high")
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Chris Down <chris@chrisdown.name>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Nathan Chancellor <natechancellor@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org> [5.4.x+]
+Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 93 ++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 58 insertions(+), 35 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_s
+ #define MEMCG_DELAY_SCALING_SHIFT 14
+
+ /*
+- * Scheduled by try_charge() to be executed from the userland return path
+- * and reclaims memory over the high limit.
++ * Get the number of jiffies that we should penalise a mischievous cgroup which
++ * is exceeding its memory.high by checking both it and its ancestors.
+ */
+-void mem_cgroup_handle_over_high(void)
++static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
++ unsigned int nr_pages)
+ {
+- unsigned long usage, high, clamped_high;
+- unsigned long pflags;
+- unsigned long penalty_jiffies, overage;
+- unsigned int nr_pages = current->memcg_nr_pages_over_high;
+- struct mem_cgroup *memcg;
++ unsigned long penalty_jiffies;
++ u64 max_overage = 0;
+
+- if (likely(!nr_pages))
+- return;
++ do {
++ unsigned long usage, high;
++ u64 overage;
++
++ usage = page_counter_read(&memcg->memory);
++ high = READ_ONCE(memcg->high);
++
++ /*
++ * Prevent division by 0 in overage calculation by acting as if
++ * it was a threshold of 1 page
++ */
++ high = max(high, 1UL);
++
++ overage = usage - high;
++ overage <<= MEMCG_DELAY_PRECISION_SHIFT;
++ overage = div64_u64(overage, high);
++
++ if (overage > max_overage)
++ max_overage = overage;
++ } while ((memcg = parent_mem_cgroup(memcg)) &&
++ !mem_cgroup_is_root(memcg));
+
+- memcg = get_mem_cgroup_from_mm(current->mm);
+- reclaim_high(memcg, nr_pages, GFP_KERNEL);
+- current->memcg_nr_pages_over_high = 0;
++ if (!max_overage)
++ return 0;
+
+ /*
+- * memory.high is breached and reclaim is unable to keep up. Throttle
+- * allocators proactively to slow down excessive growth.
+- *
+ * We use overage compared to memory.high to calculate the number of
+ * jiffies to sleep (penalty_jiffies). Ideally this value should be
+ * fairly lenient on small overages, and increasingly harsh when the
+@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void)
+ * its crazy behaviour, so we exponentially increase the delay based on
+ * overage amount.
+ */
+-
+- usage = page_counter_read(&memcg->memory);
+- high = READ_ONCE(memcg->high);
+-
+- if (usage <= high)
+- goto out;
+-
+- /*
+- * Prevent division by 0 in overage calculation by acting as if it was a
+- * threshold of 1 page
+- */
+- clamped_high = max(high, 1UL);
+-
+- overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+- clamped_high);
+-
+- penalty_jiffies = ((u64)overage * overage * HZ)
+- >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
++ penalty_jiffies = max_overage * max_overage * HZ;
++ penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
++ penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
+
+ /*
+ * Factor in the task's own contribution to the overage, such that four
+@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
+ * application moving forwards and also permit diagnostics, albeit
+ * extremely slowly.
+ */
+- penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
++ return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
++}
++
++/*
++ * Scheduled by try_charge() to be executed from the userland return path
++ * and reclaims memory over the high limit.
++ */
++void mem_cgroup_handle_over_high(void)
++{
++ unsigned long penalty_jiffies;
++ unsigned long pflags;
++ unsigned int nr_pages = current->memcg_nr_pages_over_high;
++ struct mem_cgroup *memcg;
++
++ if (likely(!nr_pages))
++ return;
++
++ memcg = get_mem_cgroup_from_mm(current->mm);
++ reclaim_high(memcg, nr_pages, GFP_KERNEL);
++ current->memcg_nr_pages_over_high = 0;
++
++ /*
++ * memory.high is breached and reclaim is unable to keep up. Throttle
++ * allocators proactively to slow down excessive growth.
++ */
++ penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+
+ /*
+ * Don't sleep if the amount of jiffies this memcg owes us is so low
--- /dev/null
+From 5076190daded2197f62fe92cf69674488be44175 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 17 Mar 2020 11:04:09 -0700
+Subject: mm: slub: be more careful about the double cmpxchg of freelist
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 5076190daded2197f62fe92cf69674488be44175 upstream.
+
+This is just a cleanup addition to Jann's fix to properly update the
+transaction ID for the slub slowpath in commit fd4d9c7d0c71 ("mm: slub:
+add missing TID bump..").
+
+The transaction ID is what protects us against any concurrent accesses,
+but we should really also make sure to make the 'freelist' comparison
+itself always use the same freelist value that we then used as the new
+next free pointer.
+
+Jann points out that if we do all of this carefully, we could skip the
+transaction ID update for all the paths that only remove entries from
+the lists, and only update the TID when adding entries (to avoid the ABA
+issue with cmpxchg and list handling re-adding a previously seen value).
+
+But this patch just does the "make sure to cmpxchg the same value we
+used" rather than then try to be clever.
+
+Acked-by: Jann Horn <jannh@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2978,11 +2978,13 @@ redo:
+ barrier();
+
+ if (likely(page == c->page)) {
+- set_freepointer(s, tail_obj, c->freelist);
++ void **freelist = READ_ONCE(c->freelist);
++
++ set_freepointer(s, tail_obj, freelist);
+
+ if (unlikely(!this_cpu_cmpxchg_double(
+ s->cpu_slab->freelist, s->cpu_slab->tid,
+- c->freelist, tid,
++ freelist, tid,
+ head, next_tid(tid)))) {
+
+ note_cmpxchg_failure("slab_free", s, tid);
--- /dev/null
+From 0715e6c516f106ed553828a671d30ad9a3431536 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Sat, 21 Mar 2020 18:22:37 -0700
+Subject: mm, slub: prevent kmalloc_node crashes and memory leaks
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 0715e6c516f106ed553828a671d30ad9a3431536 upstream.
+
+Sachin reports [1] a crash in SLUB __slab_alloc():
+
+ BUG: Kernel NULL pointer dereference on read at 0x000073b0
+ Faulting instruction address: 0xc0000000003d55f4
+ Oops: Kernel access of bad area, sig: 11 [#1]
+ LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+ Modules linked in:
+ CPU: 19 PID: 1 Comm: systemd Not tainted 5.6.0-rc2-next-20200218-autotest #1
+ NIP: c0000000003d55f4 LR: c0000000003d5b94 CTR: 0000000000000000
+ REGS: c0000008b37836d0 TRAP: 0300 Not tainted (5.6.0-rc2-next-20200218-autotest)
+ MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24004844 XER: 00000000
+ CFAR: c00000000000dec4 DAR: 00000000000073b0 DSISR: 40000000 IRQMASK: 1
+ GPR00: c0000000003d5b94 c0000008b3783960 c00000000155d400 c0000008b301f500
+ GPR04: 0000000000000dc0 0000000000000002 c0000000003443d8 c0000008bb398620
+ GPR08: 00000008ba2f0000 0000000000000001 0000000000000000 0000000000000000
+ GPR12: 0000000024004844 c00000001ec52a00 0000000000000000 0000000000000000
+ GPR16: c0000008a1b20048 c000000001595898 c000000001750c18 0000000000000002
+ GPR20: c000000001750c28 c000000001624470 0000000fffffffe0 5deadbeef0000122
+ GPR24: 0000000000000001 0000000000000dc0 0000000000000002 c0000000003443d8
+ GPR28: c0000008b301f500 c0000008bb398620 0000000000000000 c00c000002287180
+ NIP ___slab_alloc+0x1f4/0x760
+ LR __slab_alloc+0x34/0x60
+ Call Trace:
+ ___slab_alloc+0x334/0x760 (unreliable)
+ __slab_alloc+0x34/0x60
+ __kmalloc_node+0x110/0x490
+ kvmalloc_node+0x58/0x110
+ mem_cgroup_css_online+0x108/0x270
+ online_css+0x48/0xd0
+ cgroup_apply_control_enable+0x2ec/0x4d0
+ cgroup_mkdir+0x228/0x5f0
+ kernfs_iop_mkdir+0x90/0xf0
+ vfs_mkdir+0x110/0x230
+ do_mkdirat+0xb0/0x1a0
+ system_call+0x5c/0x68
+
+This is a PowerPC platform with following NUMA topology:
+
+ available: 2 nodes (0-1)
+ node 0 cpus:
+ node 0 size: 0 MB
+ node 0 free: 0 MB
+ node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ node 1 size: 35247 MB
+ node 1 free: 30907 MB
+ node distances:
+ node 0 1
+ 0: 10 40
+ 1: 40 10
+
+ possible numa nodes: 0-31
+
+This only happens with a mmotm patch "mm/memcontrol.c: allocate
+shrinker_map on appropriate NUMA node" [2] which effectively calls
+kmalloc_node for each possible node. SLUB however only allocates
+kmem_cache_node on online N_NORMAL_MEMORY nodes, and relies on
+node_to_mem_node to return such valid node for other nodes since commit
+a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating
+on memoryless node"). This is however not true in this configuration
+where the _node_numa_mem_ array is not initialized for nodes 0 and 2-31,
+thus it contains zeroes and get_partial() ends up accessing
+non-allocated kmem_cache_node.
+
+A related issue was reported by Bharata (originally by Ramachandran) [3]
+where a similar PowerPC configuration, but with mainline kernel without
+patch [2] ends up allocating large amounts of pages by kmalloc-1k
+kmalloc-512. This seems to have the same underlying issue with
+node_to_mem_node() not behaving as expected, and might probably also
+lead to an infinite loop with CONFIG_SLUB_CPU_PARTIAL [4].
+
+This patch should fix both issues by not relying on node_to_mem_node()
+anymore and instead simply falling back to NUMA_NO_NODE, when
+kmalloc_node(node) is attempted for a node that's not online, or has no
+usable memory. The "usable memory" condition is also changed from
+node_present_pages() to N_NORMAL_MEMORY node state, as that is exactly
+the condition that SLUB uses to allocate kmem_cache_node structures.
+The check in get_partial() is removed completely, as the checks in
+___slab_alloc() are now sufficient to prevent get_partial() being
+reached with an invalid node.
+
+[1] https://lore.kernel.org/linux-next/3381CD91-AB3D-4773-BA04-E7A072A63968@linux.vnet.ibm.com/
+[2] https://lore.kernel.org/linux-mm/fff0e636-4c36-ed10-281c-8cdb0687c839@virtuozzo.com/
+[3] https://lore.kernel.org/linux-mm/20200317092624.GB22538@in.ibm.com/
+[4] https://lore.kernel.org/linux-mm/088b5996-faae-8a56-ef9c-5b567125ae54@suse.cz/
+
+Fixes: a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node")
+Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
+Reported-by: PUVICHAKRAVARTHY RAMACHANDRAN <puvichakravarthy@in.ibm.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
+Tested-by: Bharata B Rao <bharata@linux.ibm.com>
+Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Christopher Lameter <cl@linux.com>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200320115533.9604-1-vbabka@suse.cz
+Debugged-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c | 26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1954,8 +1954,6 @@ static void *get_partial(struct kmem_cac
+
+ if (node == NUMA_NO_NODE)
+ searchnode = numa_mem_id();
+- else if (!node_present_pages(node))
+- searchnode = node_to_mem_node(node);
+
+ object = get_partial_node(s, get_node(s, searchnode), c, flags);
+ if (object || node != NUMA_NO_NODE)
+@@ -2544,17 +2542,27 @@ static void *___slab_alloc(struct kmem_c
+ struct page *page;
+
+ page = c->page;
+- if (!page)
++ if (!page) {
++ /*
++ * if the node is not online or has no normal memory, just
++ * ignore the node constraint
++ */
++ if (unlikely(node != NUMA_NO_NODE &&
++ !node_state(node, N_NORMAL_MEMORY)))
++ node = NUMA_NO_NODE;
+ goto new_slab;
++ }
+ redo:
+
+ if (unlikely(!node_match(page, node))) {
+- int searchnode = node;
+-
+- if (node != NUMA_NO_NODE && !node_present_pages(node))
+- searchnode = node_to_mem_node(node);
+-
+- if (unlikely(!node_match(page, searchnode))) {
++ /*
++ * same as above but node_match() being false already
++ * implies node != NUMA_NO_NODE
++ */
++ if (!node_state(node, N_NORMAL_MEMORY)) {
++ node = NUMA_NO_NODE;
++ goto redo;
++ } else {
+ stat(s, ALLOC_NODE_MISMATCH);
+ deactivate_slab(s, page, c->freelist, c);
+ goto new_slab;
--- /dev/null
+From d72520ad004a8ce18a6ba6cde317f0081b27365a Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Sat, 21 Mar 2020 18:22:17 -0700
+Subject: page-flags: fix a crash at SetPageError(THP_SWAP)
+
+From: Qian Cai <cai@lca.pw>
+
+commit d72520ad004a8ce18a6ba6cde317f0081b27365a upstream.
+
+Commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped
+out") supported writing THP to a swap device but forgot to upgrade an
+older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related
+flags on compound pages") which could trigger a crash during THP
+swapping out with DEBUG_VM_PGFLAGS=y,
+
+ kernel BUG at include/linux/page-flags.h:317!
+
+ page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page))
+ page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0 compound_pincount:0
+ anon flags: 0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked)
+
+ end_swap_bio_write()
+ SetPageError(page)
+ VM_BUG_ON_PAGE(1 && PageCompound(page))
+
+ <IRQ>
+ bio_endio+0x297/0x560
+ dec_pending+0x218/0x430 [dm_mod]
+ clone_endio+0xe4/0x2c0 [dm_mod]
+ bio_endio+0x297/0x560
+ blk_update_request+0x201/0x920
+ scsi_end_request+0x6b/0x4b0
+ scsi_io_completion+0x509/0x7e0
+ scsi_finish_command+0x1ed/0x2a0
+ scsi_softirq_done+0x1c9/0x1d0
+ __blk_mqnterrupt+0xf/0x20
+ </IRQ>
+
+Fix by checking PF_NO_TAIL in those places instead.
+
+Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out")
+Signed-off-by: Qian Cai <cai@lca.pw>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: "Huang, Ying" <ying.huang@intel.com>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/page-flags.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -311,7 +311,7 @@ static inline int TestClearPage##uname(s
+
+ __PAGEFLAG(Locked, locked, PF_NO_TAIL)
+ PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
+-PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
++PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
+ PAGEFLAG(Referenced, referenced, PF_HEAD)
+ TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
+ __SETPAGEFLAG(Referenced, referenced, PF_HEAD)
--- /dev/null
+From 5d892919fdd0cefd361697472d4e1b174a594991 Mon Sep 17 00:00:00 2001
+From: Corentin Labbe <clabbe@baylibre.com>
+Date: Wed, 18 Mar 2020 15:26:49 +0000
+Subject: rtc: max8907: add missing select REGMAP_IRQ
+
+From: Corentin Labbe <clabbe@baylibre.com>
+
+commit 5d892919fdd0cefd361697472d4e1b174a594991 upstream.
+
+I have hit the following build error:
+
+ armv7a-hardfloat-linux-gnueabi-ld: drivers/rtc/rtc-max8907.o: in function `max8907_rtc_probe':
+ rtc-max8907.c:(.text+0x400): undefined reference to `regmap_irq_get_virq'
+
+max8907 should select REGMAP_IRQ
+
+Fixes: 94c01ab6d7544 ("rtc: add MAX8907 RTC driver")
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/rtc/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/rtc/Kconfig
++++ b/drivers/rtc/Kconfig
+@@ -327,6 +327,7 @@ config RTC_DRV_MAX6900
+ config RTC_DRV_MAX8907
+ tristate "Maxim MAX8907"
+ depends on MFD_MAX8907 || COMPILE_TEST
++ select REGMAP_IRQ
+ help
+ If you say yes here you will get support for the
+ RTC of Maxim MAX8907 PMIC.
intel_th-fix-user-visible-error-codes.patch
intel_th-pci-add-elkhart-lake-cpu-support.patch
modpost-move-the-namespace-field-in-module.symvers-last.patch
+rtc-max8907-add-missing-select-regmap_irq.patch
+arm64-compat-fix-syscall-number-of-compat_clock_getres.patch
+xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch
+btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch
+drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch
+drm-lease-fix-warning-in-idr_destroy.patch
+stm-class-sys-t-fix-the-use-of-time_after.patch
+memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch
+mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch
+mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch
+mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch
+mm-do-not-allow-madv_pageout-for-cow-pages.patch
+epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch
+mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch
+mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch
+page-flags-fix-a-crash-at-setpageerror-thp_swap.patch
+x86-mm-split-vmalloc_sync_all.patch
--- /dev/null
+From 283f87c0d5d32b4a5c22636adc559bca82196ed3 Mon Sep 17 00:00:00 2001
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Date: Tue, 17 Mar 2020 08:22:11 +0200
+Subject: stm class: sys-t: Fix the use of time_after()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+commit 283f87c0d5d32b4a5c22636adc559bca82196ed3 upstream.
+
+The operands of time_after() are in a wrong order in both instances in
+the sys-t driver. Fix that.
+
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Fixes: 39f10239df75 ("stm class: p_sys-t: Add support for CLOCKSYNC packets")
+Fixes: d69d5e83110f ("stm class: Add MIPI SyS-T protocol support")
+Cc: stable@vger.kernel.org # v4.20+
+Link: https://lore.kernel.org/r/20200317062215.15598-3-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hwtracing/stm/p_sys-t.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/hwtracing/stm/p_sys-t.c
++++ b/drivers/hwtracing/stm/p_sys-t.c
+@@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_
+ static inline bool sys_t_need_ts(struct sys_t_output *op)
+ {
+ if (op->node.ts_interval &&
+- time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
++ time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) {
+ op->ts_jiffies = jiffies;
+
+ return true;
+@@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct
+ static bool sys_t_need_clock_sync(struct sys_t_output *op)
+ {
+ if (op->node.clocksync_interval &&
+- time_after(op->clocksync_jiffies + op->node.clocksync_interval,
+- jiffies)) {
++ time_after(jiffies,
++ op->clocksync_jiffies + op->node.clocksync_interval)) {
+ op->clocksync_jiffies = jiffies;
+
+ return true;
--- /dev/null
+From 763802b53a427ed3cbd419dbba255c414fdd9e7c Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Sat, 21 Mar 2020 18:22:41 -0700
+Subject: x86/mm: split vmalloc_sync_all()
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit 763802b53a427ed3cbd419dbba255c414fdd9e7c upstream.
+
+Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in
+__purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in
+the vunmap() code-path. While this change was necessary to maintain
+correctness on x86-32-pae kernels, it also adds additional cycles for
+architectures that don't need it.
+
+Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported
+severe performance regressions in micro-benchmarks because it now also
+calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But
+the vmalloc_sync_all() implementation on x86-64 is only needed for newly
+created mappings.
+
+To avoid the unnecessary work on x86-64 and to gain the performance
+back, split up vmalloc_sync_all() into two functions:
+
+ * vmalloc_sync_mappings(), and
+ * vmalloc_sync_unmappings()
+
+Most call-sites to vmalloc_sync_all() only care about new mappings being
+synchronized. The only exception is the new call-site added in the
+above mentioned commit.
+
+Shile Zhang directed us to a report of an 80% regression in reaim
+throughput.
+
+Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Borislav Petkov <bp@suse.de>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [GHES]
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org
+Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/
+Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c | 26 ++++++++++++++++++++++++--
+ drivers/acpi/apei/ghes.c | 2 +-
+ include/linux/vmalloc.h | 5 +++--
+ kernel/notifier.c | 2 +-
+ mm/nommu.c | 10 +++++++---
+ mm/vmalloc.c | 11 +++++++----
+ 6 files changed, 43 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -189,7 +189,7 @@ static inline pmd_t *vmalloc_sync_one(pg
+ return pmd_k;
+ }
+
+-void vmalloc_sync_all(void)
++static void vmalloc_sync(void)
+ {
+ unsigned long address;
+
+@@ -216,6 +216,16 @@ void vmalloc_sync_all(void)
+ }
+ }
+
++void vmalloc_sync_mappings(void)
++{
++ vmalloc_sync();
++}
++
++void vmalloc_sync_unmappings(void)
++{
++ vmalloc_sync();
++}
++
+ /*
+ * 32-bit:
+ *
+@@ -318,11 +328,23 @@ out:
+
+ #else /* CONFIG_X86_64: */
+
+-void vmalloc_sync_all(void)
++void vmalloc_sync_mappings(void)
+ {
++ /*
++ * 64-bit mappings might allocate new p4d/pud pages
++ * that need to be propagated to all tasks' PGDs.
++ */
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ }
+
++void vmalloc_sync_unmappings(void)
++{
++ /*
++ * Unmappings never allocate or free p4d/pud pages.
++ * No work is required here.
++ */
++}
++
+ /*
+ * 64-bit:
+ *
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
+ * New allocation must be visible in all pgd before it can be found by
+ * an NMI allocating from the pool.
+ */
+- vmalloc_sync_all();
++ vmalloc_sync_mappings();
+
+ rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
+ if (rc)
+--- a/include/linux/vmalloc.h
++++ b/include/linux/vmalloc.h
+@@ -139,8 +139,9 @@ extern int remap_vmalloc_range_partial(s
+
+ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+ unsigned long pgoff);
+-void vmalloc_sync_all(void);
+-
++void vmalloc_sync_mappings(void);
++void vmalloc_sync_unmappings(void);
++
+ /*
+ * Lowlevel-APIs (not for driver use!)
+ */
+--- a/kernel/notifier.c
++++ b/kernel/notifier.c
+@@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
+
+ int register_die_notifier(struct notifier_block *nb)
+ {
+- vmalloc_sync_all();
++ vmalloc_sync_mappings();
+ return atomic_notifier_chain_register(&die_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_die_notifier);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
+ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
+ /*
+- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
+- * have one.
++ * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
++ * chose not to have one.
+ */
+-void __weak vmalloc_sync_all(void)
++void __weak vmalloc_sync_mappings(void)
++{
++}
++
++void __weak vmalloc_sync_unmappings(void)
+ {
+ }
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1287,7 +1287,7 @@ static bool __purge_vmap_area_lazy(unsig
+ * First make sure the mappings are removed from all page-tables
+ * before they are freed.
+ */
+- vmalloc_sync_all();
++ vmalloc_sync_unmappings();
+
+ /*
+ * TODO: to calculate a flush range without looping.
+@@ -3120,16 +3120,19 @@ int remap_vmalloc_range(struct vm_area_s
+ EXPORT_SYMBOL(remap_vmalloc_range);
+
+ /*
+- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
+- * have one.
++ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
++ * not to have one.
+ *
+ * The purpose of this function is to make sure the vmalloc area
+ * mappings are identical in all page-tables in the system.
+ */
+-void __weak vmalloc_sync_all(void)
++void __weak vmalloc_sync_mappings(void)
+ {
+ }
+
++void __weak vmalloc_sync_unmappings(void)
++{
++}
+
+ static int f(pte_t *pte, unsigned long addr, void *data)
+ {
--- /dev/null
+From 045706bff837ee89c13f1ace173db71922c1c40b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 6 Mar 2020 17:08:57 +0200
+Subject: xhci: Do not open code __print_symbolic() in xhci trace events
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 045706bff837ee89c13f1ace173db71922c1c40b upstream.
+
+libtraceevent (used by perf and trace-cmd) failed to parse the
+xhci_urb_dequeue trace event. This is because the user space trace
+event format parsing is not a full C compiler. It can handle some basic
+logic, but is not meant to be able to handle everything C can do.
+
+In cases where a trace event field needs to be converted from a number
+to a string, there's the __print_symbolic() macro that should be used:
+
+ See samples/trace_events/trace-events-sample.h
+
+Some xhci trace events open coded the __print_symbolic() causing the
+user spaces tools to fail to parse it. This has to be replaced with
+__print_symbolic() instead.
+
+CC: stable@vger.kernel.org
+Reported-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206531
+Fixes: 5abdc2e6e12ff ("usb: host: xhci: add urb_enqueue/dequeue/giveback tracers")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20200306150858.21904-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci-trace.h | 23 ++++++-----------------
+ 1 file changed, 6 insertions(+), 17 deletions(-)
+
+--- a/drivers/usb/host/xhci-trace.h
++++ b/drivers/usb/host/xhci-trace.h
+@@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb,
+ ),
+ TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x",
+ __entry->epnum, __entry->dir_in ? "in" : "out",
+- ({ char *s;
+- switch (__entry->type) {
+- case USB_ENDPOINT_XFER_INT:
+- s = "intr";
+- break;
+- case USB_ENDPOINT_XFER_CONTROL:
+- s = "control";
+- break;
+- case USB_ENDPOINT_XFER_BULK:
+- s = "bulk";
+- break;
+- case USB_ENDPOINT_XFER_ISOC:
+- s = "isoc";
+- break;
+- default:
+- s = "UNKNOWN";
+- } s; }), __entry->urb, __entry->pipe, __entry->slot_id,
++ __print_symbolic(__entry->type,
++ { USB_ENDPOINT_XFER_INT, "intr" },
++ { USB_ENDPOINT_XFER_CONTROL, "control" },
++ { USB_ENDPOINT_XFER_BULK, "bulk" },
++ { USB_ENDPOINT_XFER_ISOC, "isoc" }),
++ __entry->urb, __entry->pipe, __entry->slot_id,
+ __entry->actual, __entry->length, __entry->num_mapped_sgs,
+ __entry->num_sgs, __entry->stream, __entry->flags
+ )