5.5-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)
diff --git a/queue-5.5/arm64-compat-fix-syscall-number-of-compat_clock_getres.patch b/queue-5.5/arm64-compat-fix-syscall-number-of-compat_clock_getres.patch

new file mode 100644 (file)

index 0000000..a931260
--- /dev/null
+++ b/queue-5.5/arm64-compat-fix-syscall-number-of-compat_clock_getres.patch
@@ -0,0 +1,39 @@
+From 3568b88944fef28db3ee989b957da49ffc627ede Mon Sep 17 00:00:00 2001
+From: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Date: Thu, 19 Mar 2020 14:11:38 +0000
+Subject: arm64: compat: Fix syscall number of compat_clock_getres
+
+From: Vincenzo Frascino <vincenzo.frascino@arm.com>
+
+commit 3568b88944fef28db3ee989b957da49ffc627ede upstream.
+
+The syscall number of compat_clock_getres was erroneously set to 247
+(__NR_io_cancel!) instead of 264. This causes the vDSO fallback of
+clock_getres() to land on the wrong syscall for compat tasks.
+
+Fix the numbering.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 53c489e1dfeb6 ("arm64: compat: Add missing syscall numbers")
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/unistd.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/include/asm/unistd.h
++++ b/arch/arm64/include/asm/unistd.h
+@@ -25,8 +25,8 @@
+ #define __NR_compat_gettimeofday      78
+ #define __NR_compat_sigreturn         119
+ #define __NR_compat_rt_sigreturn      173
+-#define __NR_compat_clock_getres      247
+ #define __NR_compat_clock_gettime     263
++#define __NR_compat_clock_getres      264
+ #define __NR_compat_clock_gettime64   403
+ #define __NR_compat_clock_getres_time64       406
+ 
diff --git a/queue-5.5/btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch b/queue-5.5/btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch

new file mode 100644 (file)

index 0000000..ea1ce4e
--- /dev/null
+++ b/queue-5.5/btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch
@@ -0,0 +1,45 @@
+From 236ebc20d9afc5e9ff52f3cf3f365a91583aac10 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 10 Mar 2020 12:13:53 +0000
+Subject: btrfs: fix log context list corruption after rename whiteout error
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 236ebc20d9afc5e9ff52f3cf3f365a91583aac10 upstream.
+
+During a rename whiteout, if btrfs_whiteout_for_rename() returns an error
+we can end up returning from btrfs_rename() with the log context object
+still in the root's log context list - this happens if 'sync_log' was
+set to true before we called btrfs_whiteout_for_rename() and it is
+dangerous because we end up with a corrupt linked list (root->log_ctxs)
+as the log context object was allocated on the stack.
+
+After btrfs_rename() returns, any task that is running btrfs_sync_log()
+concurrently can end up crashing because that linked list is traversed by
+btrfs_sync_log() (through btrfs_remove_all_log_ctxs()). That results in
+the same issue that commit e6c617102c7e4 ("Btrfs: fix log context list
+corruption after rename exchange operation") fixed.
+
+Fixes: d4682ba03ef618 ("Btrfs: sync log after logging new name")
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10159,6 +10159,10 @@ out_fail:
+               ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
+               if (ret)
+                       commit_transaction = true;
++      } else if (sync_log) {
++              mutex_lock(&root->log_mutex);
++              list_del(&ctx.list);
++              mutex_unlock(&root->log_mutex);
+       }
+       if (commit_transaction) {
+               ret = btrfs_commit_transaction(trans);
diff --git a/queue-5.5/drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch b/queue-5.5/drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch

new file mode 100644 (file)

index 0000000..9eff51b
--- /dev/null
+++ b/queue-5.5/drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch
@@ -0,0 +1,54 @@
+From 5bbc6604a62814511c32f2e39bc9ffb2c1b92cbe Mon Sep 17 00:00:00 2001
+From: Tom St Denis <tom.stdenis@amd.com>
+Date: Tue, 10 Mar 2020 08:40:41 -0400
+Subject: drm/amd/amdgpu: Fix GPR read from debugfs (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tom St Denis <tom.stdenis@amd.com>
+
+commit 5bbc6604a62814511c32f2e39bc9ffb2c1b92cbe upstream.
+
+The offset into the array was specified in bytes but should
+be in terms of 32-bit words.  Also prevent large reads that
+would also cause a buffer overread.
+
+v2:  Read from correct offset from internal storage buffer.
+
+Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -694,11 +694,11 @@ static ssize_t amdgpu_debugfs_gpr_read(s
+       ssize_t result = 0;
+       uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+ 
+-      if (size & 3 || *pos & 3)
++      if (size > 4096 || size & 3 || *pos & 3)
+               return -EINVAL;
+ 
+       /* decode offset */
+-      offset = *pos & GENMASK_ULL(11, 0);
++      offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
+       se = (*pos & GENMASK_ULL(19, 12)) >> 12;
+       sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
+       cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
+@@ -729,7 +729,7 @@ static ssize_t amdgpu_debugfs_gpr_read(s
+       while (size) {
+               uint32_t value;
+ 
+-              value = data[offset++];
++              value = data[result >> 2];
+               r = put_user(value, (uint32_t *)buf);
+               if (r) {
+                       result = r;
diff --git a/queue-5.5/drm-lease-fix-warning-in-idr_destroy.patch b/queue-5.5/drm-lease-fix-warning-in-idr_destroy.patch

new file mode 100644 (file)

index 0000000..40b1ec2
--- /dev/null
+++ b/queue-5.5/drm-lease-fix-warning-in-idr_destroy.patch
@@ -0,0 +1,52 @@
+From b216a8e7908cd750550c0480cf7d2b3a37f06954 Mon Sep 17 00:00:00 2001
+From: Qiujun Huang <hqjagain@gmail.com>
+Date: Wed, 18 Mar 2020 15:53:50 +0800
+Subject: drm/lease: fix WARNING in idr_destroy
+
+From: Qiujun Huang <hqjagain@gmail.com>
+
+commit b216a8e7908cd750550c0480cf7d2b3a37f06954 upstream.
+
+drm_lease_create takes ownership of leases. And leases will be released
+by drm_master_put.
+
+drm_master_put
+    ->drm_master_destroy
+            ->idr_destroy
+
+So we needn't call idr_destroy again.
+
+Reported-and-tested-by: syzbot+05835159fe322770fe3d@syzkaller.appspotmail.com
+Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/1584518030-4173-1-git-send-email-hqjagain@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_lease.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_lease.c
++++ b/drivers/gpu/drm/drm_lease.c
+@@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct d
+       }
+ 
+       DRM_DEBUG_LEASE("Creating lease\n");
++      /* lessee will take the ownership of leases */
+       lessee = drm_lease_create(lessor, &leases);
+ 
+       if (IS_ERR(lessee)) {
+               ret = PTR_ERR(lessee);
++              idr_destroy(&leases);
+               goto out_leases;
+       }
+ 
+@@ -580,7 +582,6 @@ out_lessee:
+ 
+ out_leases:
+       put_unused_fd(fd);
+-      idr_destroy(&leases);
+ 
+       DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret);
+       return ret;
diff --git a/queue-5.5/epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch b/queue-5.5/epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch

new file mode 100644 (file)

index 0000000..6e29e94
--- /dev/null
+++ b/queue-5.5/epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch
@@ -0,0 +1,82 @@
+From 1b53734bd0b2feed8e7761771b2e76fc9126ea0c Mon Sep 17 00:00:00 2001
+From: Roman Penyaev <rpenyaev@suse.de>
+Date: Sat, 21 Mar 2020 18:22:30 -0700
+Subject: epoll: fix possible lost wakeup on epoll_ctl() path
+
+From: Roman Penyaev <rpenyaev@suse.de>
+
+commit 1b53734bd0b2feed8e7761771b2e76fc9126ea0c upstream.
+
+This fixes possible lost wakeup introduced by commit a218cc491420.
+Originally modifications to ep->wq were serialized by ep->wq.lock, but
+in commit a218cc491420 ("epoll: use rwlock in order to reduce
+ep_poll_callback() contention") a new rw lock was introduced in order to
+relax fd event path, i.e. callers of ep_poll_callback() function.
+
+After the change ep_modify and ep_insert (both are called on epoll_ctl()
+path) were switched to ep->lock, but ep_poll (epoll_wait) was using
+ep->wq.lock on wqueue list modification.
+
+The bug doesn't lead to any wqueue list corruptions, because wake up
+path and list modifications were serialized by ep->wq.lock internally,
+but actual waitqueue_active() check prior wake_up() call can be
+reordered with modifications of ep ready list, thus wake up can be lost.
+
+And yes, can be healed by explicit smp_mb():
+
+  list_add_tail(&epi->rdlink, &ep->rdllist);
+  smp_mb();
+  if (waitqueue_active(&ep->wq))
+       wake_up(&ep->wp);
+
+But let's make it simple, thus current patch replaces ep->wq.lock with
+the ep->lock for wqueue modifications, thus wake up path always observes
+activeness of the wqueue correcty.
+
+Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
+Reported-by: Max Neunhoeffer <max@arangodb.com>
+Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Max Neunhoeffer <max@arangodb.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Christopher Kohlhoff <chris.kohlhoff@clearpool.io>
+Cc: Davidlohr Bueso <dbueso@suse.de>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Jes Sorensen <jes.sorensen@gmail.com>
+Cc: <stable@vger.kernel.org>   [5.1+]
+Link: http://lkml.kernel.org/r/20200214170211.561524-1-rpenyaev@suse.de
+References: https://bugzilla.kernel.org/show_bug.cgi?id=205933
+Bisected-by: Max Neunhoeffer <max@arangodb.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/eventpoll.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1860,9 +1860,9 @@ fetch_events:
+               waiter = true;
+               init_waitqueue_entry(&wait, current);
+ 
+-              spin_lock_irq(&ep->wq.lock);
++              write_lock_irq(&ep->lock);
+               __add_wait_queue_exclusive(&ep->wq, &wait);
+-              spin_unlock_irq(&ep->wq.lock);
++              write_unlock_irq(&ep->lock);
+       }
+ 
+       for (;;) {
+@@ -1910,9 +1910,9 @@ send_events:
+               goto fetch_events;
+ 
+       if (waiter) {
+-              spin_lock_irq(&ep->wq.lock);
++              write_lock_irq(&ep->lock);
+               __remove_wait_queue(&ep->wq, &wait);
+-              spin_unlock_irq(&ep->wq.lock);
++              write_unlock_irq(&ep->lock);
+       }
+ 
+       return res;
diff --git a/queue-5.5/memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch b/queue-5.5/memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch

new file mode 100644 (file)

index 0000000..90de1b6
--- /dev/null
+++ b/queue-5.5/memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch
@@ -0,0 +1,123 @@
+From 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 Mon Sep 17 00:00:00 2001
+From: Chunguang Xu <brookxu@tencent.com>
+Date: Sat, 21 Mar 2020 18:22:10 -0700
+Subject: memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chunguang Xu <brookxu@tencent.com>
+
+commit 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 upstream.
+
+An eventfd monitors multiple memory thresholds of the cgroup, closes them,
+the kernel deletes all events related to this eventfd.  Before all events
+are deleted, another eventfd monitors the memory threshold of this cgroup,
+leading to a crash:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000004
+  #PF: supervisor write access in kernel mode
+  #PF: error_code(0x0002) - not-present page
+  PGD 800000033058e067 P4D 800000033058e067 PUD 3355ce067 PMD 0
+  Oops: 0002 [#1] SMP PTI
+  CPU: 2 PID: 14012 Comm: kworker/2:6 Kdump: loaded Not tainted 5.6.0-rc4 #3
+  Hardware name: LENOVO 20AWS01K00/20AWS01K00, BIOS GLET70WW (2.24 ) 05/21/2014
+  Workqueue: events memcg_event_remove
+  RIP: 0010:__mem_cgroup_usage_unregister_event+0xb3/0x190
+  RSP: 0018:ffffb47e01c4fe18 EFLAGS: 00010202
+  RAX: 0000000000000001 RBX: ffff8bb223a8a000 RCX: 0000000000000001
+  RDX: 0000000000000001 RSI: ffff8bb22fb83540 RDI: 0000000000000001
+  RBP: ffffb47e01c4fe48 R08: 0000000000000000 R09: 0000000000000010
+  R10: 000000000000000c R11: 071c71c71c71c71c R12: ffff8bb226aba880
+  R13: ffff8bb223a8a480 R14: 0000000000000000 R15: 0000000000000000
+  FS:  0000000000000000(0000) GS:ffff8bb242680000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000004 CR3: 000000032c29c003 CR4: 00000000001606e0
+  Call Trace:
+    memcg_event_remove+0x32/0x90
+    process_one_work+0x172/0x380
+    worker_thread+0x49/0x3f0
+    kthread+0xf8/0x130
+    ret_from_fork+0x35/0x40
+  CR2: 0000000000000004
+
+We can reproduce this problem in the following ways:
+
+1. We create a new cgroup subdirectory and a new eventfd, and then we
+   monitor multiple memory thresholds of the cgroup through this eventfd.
+
+2.  closing this eventfd, and __mem_cgroup_usage_unregister_event ()
+   will be called multiple times to delete all events related to this
+   eventfd.
+
+The first time __mem_cgroup_usage_unregister_event() is called, the
+kernel will clear all items related to this eventfd in thresholds->
+primary.
+
+Since there is currently only one eventfd, thresholds-> primary becomes
+empty, so the kernel will set thresholds-> primary and hresholds-> spare
+to NULL.  If at this time, the user creates a new eventfd and monitor
+the memory threshold of this cgroup, kernel will re-initialize
+thresholds-> primary.
+
+Then when __mem_cgroup_usage_unregister_event () is called for the
+second time, because thresholds-> primary is not empty, the system will
+access thresholds-> spare, but thresholds-> spare is NULL, which will
+trigger a crash.
+
+In general, the longer it takes to delete all events related to this
+eventfd, the easier it is to trigger this problem.
+
+The solution is to check whether the thresholds associated with the
+eventfd has been cleared when deleting the event.  If so, we do nothing.
+
+[akpm@linux-foundation.org: fix comment, per Kirill]
+Fixes: 907860ed381a ("cgroups: make cftype.unregister_event() void-returning")
+Signed-off-by: Chunguang Xu <brookxu@tencent.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/077a6f67-aefa-4591-efec-f2f3af2b0b02@gmail.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -4027,7 +4027,7 @@ static void __mem_cgroup_usage_unregiste
+       struct mem_cgroup_thresholds *thresholds;
+       struct mem_cgroup_threshold_ary *new;
+       unsigned long usage;
+-      int i, j, size;
++      int i, j, size, entries;
+ 
+       mutex_lock(&memcg->thresholds_lock);
+ 
+@@ -4047,14 +4047,20 @@ static void __mem_cgroup_usage_unregiste
+       __mem_cgroup_threshold(memcg, type == _MEMSWAP);
+ 
+       /* Calculate new number of threshold */
+-      size = 0;
++      size = entries = 0;
+       for (i = 0; i < thresholds->primary->size; i++) {
+               if (thresholds->primary->entries[i].eventfd != eventfd)
+                       size++;
++              else
++                      entries++;
+       }
+ 
+       new = thresholds->spare;
+ 
++      /* If no items related to eventfd have been cleared, nothing to do */
++      if (!entries)
++              goto unlock;
++
+       /* Set thresholds array to NULL if we don't have thresholds */
+       if (!size) {
+               kfree(new);
diff --git a/queue-5.5/mm-do-not-allow-madv_pageout-for-cow-pages.patch b/queue-5.5/mm-do-not-allow-madv_pageout-for-cow-pages.patch

new file mode 100644 (file)

index 0000000..dd5b957
--- /dev/null
+++ b/queue-5.5/mm-do-not-allow-madv_pageout-for-cow-pages.patch
@@ -0,0 +1,79 @@
+From 12e967fd8e4e6c3d275b4c69c890adc838891300 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Sat, 21 Mar 2020 18:22:26 -0700
+Subject: mm: do not allow MADV_PAGEOUT for CoW pages
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 12e967fd8e4e6c3d275b4c69c890adc838891300 upstream.
+
+Jann has brought up a very interesting point [1].  While shared pages
+are excluded from MADV_PAGEOUT normally, CoW pages can be easily
+reclaimed that way.  This can lead to all sorts of hard to debug
+problems.  E.g.  performance problems outlined by Daniel [2].
+
+There are runtime environments where there is a substantial memory
+shared among security domains via CoW memory and a easy to reclaim way
+of that memory, which MADV_{COLD,PAGEOUT} offers, can lead to either
+performance degradation in for the parent process which might be more
+privileged or even open side channel attacks.
+
+The feasibility of the latter is not really clear to me TBH but there is
+no real reason for exposure at this stage.  It seems there is no real
+use case to depend on reclaiming CoW memory via madvise at this stage so
+it is much easier to simply disallow it and this is what this patch
+does.  Put it simply MADV_{PAGEOUT,COLD} can operate only on the
+exclusively owned memory which is a straightforward semantic.
+
+[1] http://lkml.kernel.org/r/CAG48ez0G3JkMq61gUmyQAaCq=_TwHbi1XKzWRooxZkv08PQKuw@mail.gmail.com
+[2] http://lkml.kernel.org/r/CAKOZueua_v8jHCpmEtTB6f3i9e2YnmX4mqdYVWhV4E=Z-n+zRQ@mail.gmail.com
+
+Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Daniel Colascione <dancol@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200312082248.GS23944@dhcp22.suse.cz
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_r
+               }
+ 
+               page = pmd_page(orig_pmd);
++
++              /* Do not interfere with other mappings of this page */
++              if (page_mapcount(page) != 1)
++                      goto huge_unlock;
++
+               if (next - addr != HPAGE_PMD_SIZE) {
+                       int err;
+ 
+-                      if (page_mapcount(page) != 1)
+-                              goto huge_unlock;
+-
+                       get_page(page);
+                       spin_unlock(ptl);
+                       lock_page(page);
+@@ -426,6 +428,10 @@ regular_page:
+                       continue;
+               }
+ 
++              /* Do not interfere with other mappings of this page */
++              if (page_mapcount(page) != 1)
++                      continue;
++
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+ 
+               if (pte_young(ptent)) {
diff --git a/queue-5.5/mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch b/queue-5.5/mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch

new file mode 100644 (file)

index 0000000..12848f1
--- /dev/null
+++ b/queue-5.5/mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch
@@ -0,0 +1,92 @@
+From d41e2f3bd54699f85b3d6f45abd09fa24a222cb9 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Sat, 21 Mar 2020 18:22:13 -0700
+Subject: mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
+
+From: Baoquan He <bhe@redhat.com>
+
+commit d41e2f3bd54699f85b3d6f45abd09fa24a222cb9 upstream.
+
+In section_deactivate(), pfn_to_page() doesn't work any more after
+ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.  It
+causes a hot remove failure:
+
+  kernel BUG at mm/page_alloc.c:4806!
+  invalid opcode: 0000 [#1] SMP PTI
+  CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G        W         5.5.0-next-20200205+ #340
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
+  Workqueue: kacpi_hotplug acpi_hotplug_work_fn
+  RIP: 0010:free_pages+0x85/0xa0
+  Call Trace:
+   __remove_pages+0x99/0xc0
+   arch_remove_memory+0x23/0x4d
+   try_remove_memory+0xc8/0x130
+   __remove_memory+0xa/0x11
+   acpi_memory_device_remove+0x72/0x100
+   acpi_bus_trim+0x55/0x90
+   acpi_device_hotplug+0x2eb/0x3d0
+   acpi_hotplug_work_fn+0x1a/0x30
+   process_one_work+0x1a7/0x370
+   worker_thread+0x30/0x380
+   kthread+0x112/0x130
+   ret_from_fork+0x35/0x40
+
+Let's move the ->section_mem_map resetting after
+depopulate_section_memmap() to fix it.
+
+[akpm@linux-foundation.org: remove unneeded initialization, per David]
+Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug")
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Wei Yang <richardw.yang@linux.intel.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200307084229.28251-2-bhe@redhat.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/sparse.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -744,6 +744,7 @@ static void section_deactivate(unsigned
+       struct mem_section *ms = __pfn_to_section(pfn);
+       bool section_is_early = early_section(ms);
+       struct page *memmap = NULL;
++      bool empty;
+       unsigned long *subsection_map = ms->usage
+               ? &ms->usage->subsection_map[0] : NULL;
+ 
+@@ -774,7 +775,8 @@ static void section_deactivate(unsigned
+        * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
+        */
+       bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+-      if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
++      empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
++      if (empty) {
+               unsigned long section_nr = pfn_to_section_nr(pfn);
+ 
+               /*
+@@ -789,13 +791,15 @@ static void section_deactivate(unsigned
+                       ms->usage = NULL;
+               }
+               memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+-              ms->section_mem_map = (unsigned long)NULL;
+       }
+ 
+       if (section_is_early && memmap)
+               free_map_bootmem(memmap);
+       else
+               depopulate_section_memmap(pfn, nr_pages, altmap);
++
++      if (empty)
++              ms->section_mem_map = (unsigned long)NULL;
+ }
+ 
+ static struct page * __meminit section_activate(int nid, unsigned long pfn,
diff --git a/queue-5.5/mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch b/queue-5.5/mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch

new file mode 100644 (file)

index 0000000..894ed86
--- /dev/null
+++ b/queue-5.5/mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch
@@ -0,0 +1,45 @@
+From d397a45fc741c80c32a14e2de008441e9976f50c Mon Sep 17 00:00:00 2001
+From: Chris Down <chris@chrisdown.name>
+Date: Sat, 21 Mar 2020 18:22:20 -0700
+Subject: mm, memcg: fix corruption on 64-bit divisor in memory.high throttling
+
+From: Chris Down <chris@chrisdown.name>
+
+commit d397a45fc741c80c32a14e2de008441e9976f50c upstream.
+
+Commit 0e4b01df8659 had a bunch of fixups to use the right division
+method.  However, it seems that after all that it still wasn't right --
+div_u64 takes a 32-bit divisor.
+
+The headroom is still large (2^32 pages), so on mundane systems you
+won't hit this, but this should definitely be fixed.
+
+Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high")
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Chris Down <chris@chrisdown.name>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Nathan Chancellor <natechancellor@gmail.com>
+Cc: <stable@vger.kernel.org>   [5.4.x+]
+Link: http://lkml.kernel.org/r/80780887060514967d414b3cd91f9a316a16ab98.1584036142.git.chris@chrisdown.name
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2339,7 +2339,7 @@ void mem_cgroup_handle_over_high(void)
+        */
+       clamped_high = max(high, 1UL);
+ 
+-      overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
++      overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+                         clamped_high);
+ 
+       penalty_jiffies = ((u64)overage * overage * HZ)
diff --git a/queue-5.5/mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch b/queue-5.5/mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch

new file mode 100644 (file)

index 0000000..618d3cb
--- /dev/null
+++ b/queue-5.5/mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch
@@ -0,0 +1,160 @@
+From e26733e0d0ec6798eca93daa300bc3f43616127f Mon Sep 17 00:00:00 2001
+From: Chris Down <chris@chrisdown.name>
+Date: Sat, 21 Mar 2020 18:22:23 -0700
+Subject: mm, memcg: throttle allocators based on ancestral memory.high
+
+From: Chris Down <chris@chrisdown.name>
+
+commit e26733e0d0ec6798eca93daa300bc3f43616127f upstream.
+
+Prior to this commit, we only directly check the affected cgroup's
+memory.high against its usage.  However, it's possible that we are being
+reclaimed as a result of hitting an ancestor memory.high and should be
+penalised based on that, instead.
+
+This patch changes memory.high overage throttling to use the largest
+overage in its ancestors when considering how many penalty jiffies to
+charge.  This makes sure that we penalise poorly behaving cgroups in the
+same way regardless of at what level of the hierarchy memory.high was
+breached.
+
+Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high")
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Chris Down <chris@chrisdown.name>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Nathan Chancellor <natechancellor@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: <stable@vger.kernel.org>   [5.4.x+]
+Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |   93 ++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 58 insertions(+), 35 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_s
+  #define MEMCG_DELAY_SCALING_SHIFT 14
+ 
+ /*
+- * Scheduled by try_charge() to be executed from the userland return path
+- * and reclaims memory over the high limit.
++ * Get the number of jiffies that we should penalise a mischievous cgroup which
++ * is exceeding its memory.high by checking both it and its ancestors.
+  */
+-void mem_cgroup_handle_over_high(void)
++static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
++                                        unsigned int nr_pages)
+ {
+-      unsigned long usage, high, clamped_high;
+-      unsigned long pflags;
+-      unsigned long penalty_jiffies, overage;
+-      unsigned int nr_pages = current->memcg_nr_pages_over_high;
+-      struct mem_cgroup *memcg;
++      unsigned long penalty_jiffies;
++      u64 max_overage = 0;
+ 
+-      if (likely(!nr_pages))
+-              return;
++      do {
++              unsigned long usage, high;
++              u64 overage;
++
++              usage = page_counter_read(&memcg->memory);
++              high = READ_ONCE(memcg->high);
++
++              /*
++               * Prevent division by 0 in overage calculation by acting as if
++               * it was a threshold of 1 page
++               */
++              high = max(high, 1UL);
++
++              overage = usage - high;
++              overage <<= MEMCG_DELAY_PRECISION_SHIFT;
++              overage = div64_u64(overage, high);
++
++              if (overage > max_overage)
++                      max_overage = overage;
++      } while ((memcg = parent_mem_cgroup(memcg)) &&
++               !mem_cgroup_is_root(memcg));
+ 
+-      memcg = get_mem_cgroup_from_mm(current->mm);
+-      reclaim_high(memcg, nr_pages, GFP_KERNEL);
+-      current->memcg_nr_pages_over_high = 0;
++      if (!max_overage)
++              return 0;
+ 
+       /*
+-       * memory.high is breached and reclaim is unable to keep up. Throttle
+-       * allocators proactively to slow down excessive growth.
+-       *
+        * We use overage compared to memory.high to calculate the number of
+        * jiffies to sleep (penalty_jiffies). Ideally this value should be
+        * fairly lenient on small overages, and increasingly harsh when the
+@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void)
+        * its crazy behaviour, so we exponentially increase the delay based on
+        * overage amount.
+        */
+-
+-      usage = page_counter_read(&memcg->memory);
+-      high = READ_ONCE(memcg->high);
+-
+-      if (usage <= high)
+-              goto out;
+-
+-      /*
+-       * Prevent division by 0 in overage calculation by acting as if it was a
+-       * threshold of 1 page
+-       */
+-      clamped_high = max(high, 1UL);
+-
+-      overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+-                        clamped_high);
+-
+-      penalty_jiffies = ((u64)overage * overage * HZ)
+-              >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
++      penalty_jiffies = max_overage * max_overage * HZ;
++      penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
++      penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
+ 
+       /*
+        * Factor in the task's own contribution to the overage, such that four
+@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
+        * application moving forwards and also permit diagnostics, albeit
+        * extremely slowly.
+        */
+-      penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
++      return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
++}
++
++/*
++ * Scheduled by try_charge() to be executed from the userland return path
++ * and reclaims memory over the high limit.
++ */
++void mem_cgroup_handle_over_high(void)
++{
++      unsigned long penalty_jiffies;
++      unsigned long pflags;
++      unsigned int nr_pages = current->memcg_nr_pages_over_high;
++      struct mem_cgroup *memcg;
++
++      if (likely(!nr_pages))
++              return;
++
++      memcg = get_mem_cgroup_from_mm(current->mm);
++      reclaim_high(memcg, nr_pages, GFP_KERNEL);
++      current->memcg_nr_pages_over_high = 0;
++
++      /*
++       * memory.high is breached and reclaim is unable to keep up. Throttle
++       * allocators proactively to slow down excessive growth.
++       */
++      penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+ 
+       /*
+        * Don't sleep if the amount of jiffies this memcg owes us is so low
diff --git a/queue-5.5/mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch b/queue-5.5/mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch

new file mode 100644 (file)

index 0000000..9350838
--- /dev/null
+++ b/queue-5.5/mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch
@@ -0,0 +1,52 @@
+From 5076190daded2197f62fe92cf69674488be44175 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 17 Mar 2020 11:04:09 -0700
+Subject: mm: slub: be more careful about the double cmpxchg of freelist
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 5076190daded2197f62fe92cf69674488be44175 upstream.
+
+This is just a cleanup addition to Jann's fix to properly update the
+transaction ID for the slub slowpath in commit fd4d9c7d0c71 ("mm: slub:
+add missing TID bump..").
+
+The transaction ID is what protects us against any concurrent accesses,
+but we should really also make sure to make the 'freelist' comparison
+itself always use the same freelist value that we then used as the new
+next free pointer.
+
+Jann points out that if we do all of this carefully, we could skip the
+transaction ID update for all the paths that only remove entries from
+the lists, and only update the TID when adding entries (to avoid the ABA
+issue with cmpxchg and list handling re-adding a previously seen value).
+
+But this patch just does the "make sure to cmpxchg the same value we
+used" rather than then try to be clever.
+
+Acked-by: Jann Horn <jannh@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2978,11 +2978,13 @@ redo:
+       barrier();
+ 
+       if (likely(page == c->page)) {
+-              set_freepointer(s, tail_obj, c->freelist);
++              void **freelist = READ_ONCE(c->freelist);
++
++              set_freepointer(s, tail_obj, freelist);
+ 
+               if (unlikely(!this_cpu_cmpxchg_double(
+                               s->cpu_slab->freelist, s->cpu_slab->tid,
+-                              c->freelist, tid,
++                              freelist, tid,
+                               head, next_tid(tid)))) {
+ 
+                       note_cmpxchg_failure("slab_free", s, tid);
diff --git a/queue-5.5/mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch b/queue-5.5/mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch

new file mode 100644 (file)

index 0000000..8845cc3
--- /dev/null
+++ b/queue-5.5/mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch
@@ -0,0 +1,169 @@
+From 0715e6c516f106ed553828a671d30ad9a3431536 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Sat, 21 Mar 2020 18:22:37 -0700
+Subject: mm, slub: prevent kmalloc_node crashes and memory leaks
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 0715e6c516f106ed553828a671d30ad9a3431536 upstream.
+
+Sachin reports [1] a crash in SLUB __slab_alloc():
+
+  BUG: Kernel NULL pointer dereference on read at 0x000073b0
+  Faulting instruction address: 0xc0000000003d55f4
+  Oops: Kernel access of bad area, sig: 11 [#1]
+  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+  Modules linked in:
+  CPU: 19 PID: 1 Comm: systemd Not tainted 5.6.0-rc2-next-20200218-autotest #1
+  NIP:  c0000000003d55f4 LR: c0000000003d5b94 CTR: 0000000000000000
+  REGS: c0000008b37836d0 TRAP: 0300   Not tainted  (5.6.0-rc2-next-20200218-autotest)
+  MSR:  8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 24004844  XER: 00000000
+  CFAR: c00000000000dec4 DAR: 00000000000073b0 DSISR: 40000000 IRQMASK: 1
+  GPR00: c0000000003d5b94 c0000008b3783960 c00000000155d400 c0000008b301f500
+  GPR04: 0000000000000dc0 0000000000000002 c0000000003443d8 c0000008bb398620
+  GPR08: 00000008ba2f0000 0000000000000001 0000000000000000 0000000000000000
+  GPR12: 0000000024004844 c00000001ec52a00 0000000000000000 0000000000000000
+  GPR16: c0000008a1b20048 c000000001595898 c000000001750c18 0000000000000002
+  GPR20: c000000001750c28 c000000001624470 0000000fffffffe0 5deadbeef0000122
+  GPR24: 0000000000000001 0000000000000dc0 0000000000000002 c0000000003443d8
+  GPR28: c0000008b301f500 c0000008bb398620 0000000000000000 c00c000002287180
+  NIP ___slab_alloc+0x1f4/0x760
+  LR __slab_alloc+0x34/0x60
+  Call Trace:
+    ___slab_alloc+0x334/0x760 (unreliable)
+    __slab_alloc+0x34/0x60
+    __kmalloc_node+0x110/0x490
+    kvmalloc_node+0x58/0x110
+    mem_cgroup_css_online+0x108/0x270
+    online_css+0x48/0xd0
+    cgroup_apply_control_enable+0x2ec/0x4d0
+    cgroup_mkdir+0x228/0x5f0
+    kernfs_iop_mkdir+0x90/0xf0
+    vfs_mkdir+0x110/0x230
+    do_mkdirat+0xb0/0x1a0
+    system_call+0x5c/0x68
+
+This is a PowerPC platform with following NUMA topology:
+
+  available: 2 nodes (0-1)
+  node 0 cpus:
+  node 0 size: 0 MB
+  node 0 free: 0 MB
+  node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+  node 1 size: 35247 MB
+  node 1 free: 30907 MB
+  node distances:
+  node   0   1
+    0:  10  40
+    1:  40  10
+
+  possible numa nodes: 0-31
+
+This only happens with a mmotm patch "mm/memcontrol.c: allocate
+shrinker_map on appropriate NUMA node" [2] which effectively calls
+kmalloc_node for each possible node.  SLUB however only allocates
+kmem_cache_node on online N_NORMAL_MEMORY nodes, and relies on
+node_to_mem_node to return such valid node for other nodes since commit
+a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating
+on memoryless node").  This is however not true in this configuration
+where the _node_numa_mem_ array is not initialized for nodes 0 and 2-31,
+thus it contains zeroes and get_partial() ends up accessing
+non-allocated kmem_cache_node.
+
+A related issue was reported by Bharata (originally by Ramachandran) [3]
+where a similar PowerPC configuration, but with mainline kernel without
+patch [2] ends up allocating large amounts of pages by kmalloc-1k
+kmalloc-512.  This seems to have the same underlying issue with
+node_to_mem_node() not behaving as expected, and might probably also
+lead to an infinite loop with CONFIG_SLUB_CPU_PARTIAL [4].
+
+This patch should fix both issues by not relying on node_to_mem_node()
+anymore and instead simply falling back to NUMA_NO_NODE, when
+kmalloc_node(node) is attempted for a node that's not online, or has no
+usable memory.  The "usable memory" condition is also changed from
+node_present_pages() to N_NORMAL_MEMORY node state, as that is exactly
+the condition that SLUB uses to allocate kmem_cache_node structures.
+The check in get_partial() is removed completely, as the checks in
+___slab_alloc() are now sufficient to prevent get_partial() being
+reached with an invalid node.
+
+[1] https://lore.kernel.org/linux-next/3381CD91-AB3D-4773-BA04-E7A072A63968@linux.vnet.ibm.com/
+[2] https://lore.kernel.org/linux-mm/fff0e636-4c36-ed10-281c-8cdb0687c839@virtuozzo.com/
+[3] https://lore.kernel.org/linux-mm/20200317092624.GB22538@in.ibm.com/
+[4] https://lore.kernel.org/linux-mm/088b5996-faae-8a56-ef9c-5b567125ae54@suse.cz/
+
+Fixes: a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node")
+Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
+Reported-by: PUVICHAKRAVARTHY RAMACHANDRAN <puvichakravarthy@in.ibm.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
+Tested-by: Bharata B Rao <bharata@linux.ibm.com>
+Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Christopher Lameter <cl@linux.com>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200320115533.9604-1-vbabka@suse.cz
+Debugged-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slub.c |   26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1954,8 +1954,6 @@ static void *get_partial(struct kmem_cac
+ 
+       if (node == NUMA_NO_NODE)
+               searchnode = numa_mem_id();
+-      else if (!node_present_pages(node))
+-              searchnode = node_to_mem_node(node);
+ 
+       object = get_partial_node(s, get_node(s, searchnode), c, flags);
+       if (object || node != NUMA_NO_NODE)
+@@ -2544,17 +2542,27 @@ static void *___slab_alloc(struct kmem_c
+       struct page *page;
+ 
+       page = c->page;
+-      if (!page)
++      if (!page) {
++              /*
++               * if the node is not online or has no normal memory, just
++               * ignore the node constraint
++               */
++              if (unlikely(node != NUMA_NO_NODE &&
++                           !node_state(node, N_NORMAL_MEMORY)))
++                      node = NUMA_NO_NODE;
+               goto new_slab;
++      }
+ redo:
+ 
+       if (unlikely(!node_match(page, node))) {
+-              int searchnode = node;
+-
+-              if (node != NUMA_NO_NODE && !node_present_pages(node))
+-                      searchnode = node_to_mem_node(node);
+-
+-              if (unlikely(!node_match(page, searchnode))) {
++              /*
++               * same as above but node_match() being false already
++               * implies node != NUMA_NO_NODE
++               */
++              if (!node_state(node, N_NORMAL_MEMORY)) {
++                      node = NUMA_NO_NODE;
++                      goto redo;
++              } else {
+                       stat(s, ALLOC_NODE_MISMATCH);
+                       deactivate_slab(s, page, c->freelist, c);
+                       goto new_slab;
diff --git a/queue-5.5/page-flags-fix-a-crash-at-setpageerror-thp_swap.patch b/queue-5.5/page-flags-fix-a-crash-at-setpageerror-thp_swap.patch

new file mode 100644 (file)

index 0000000..14475ca
--- /dev/null
+++ b/queue-5.5/page-flags-fix-a-crash-at-setpageerror-thp_swap.patch
@@ -0,0 +1,66 @@
+From d72520ad004a8ce18a6ba6cde317f0081b27365a Mon Sep 17 00:00:00 2001
+From: Qian Cai <cai@lca.pw>
+Date: Sat, 21 Mar 2020 18:22:17 -0700
+Subject: page-flags: fix a crash at SetPageError(THP_SWAP)
+
+From: Qian Cai <cai@lca.pw>
+
+commit d72520ad004a8ce18a6ba6cde317f0081b27365a upstream.
+
+Commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped
+out") supported writing THP to a swap device but forgot to upgrade an
+older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related
+flags on compound pages") which could trigger a crash during THP
+swapping out with DEBUG_VM_PGFLAGS=y,
+
+  kernel BUG at include/linux/page-flags.h:317!
+
+  page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page))
+  page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0 compound_pincount:0
+  anon flags: 0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked)
+
+  end_swap_bio_write()
+    SetPageError(page)
+      VM_BUG_ON_PAGE(1 && PageCompound(page))
+
+  <IRQ>
+  bio_endio+0x297/0x560
+  dec_pending+0x218/0x430 [dm_mod]
+  clone_endio+0xe4/0x2c0 [dm_mod]
+  bio_endio+0x297/0x560
+  blk_update_request+0x201/0x920
+  scsi_end_request+0x6b/0x4b0
+  scsi_io_completion+0x509/0x7e0
+  scsi_finish_command+0x1ed/0x2a0
+  scsi_softirq_done+0x1c9/0x1d0
+  __blk_mqnterrupt+0xf/0x20
+  </IRQ>
+
+Fix by checking PF_NO_TAIL in those places instead.
+
+Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out")
+Signed-off-by: Qian Cai <cai@lca.pw>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: "Huang, Ying" <ying.huang@intel.com>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/page-flags.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -311,7 +311,7 @@ static inline int TestClearPage##uname(s
+ 
+ __PAGEFLAG(Locked, locked, PF_NO_TAIL)
+ PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
+-PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
++PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
+ PAGEFLAG(Referenced, referenced, PF_HEAD)
+       TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
+       __SETPAGEFLAG(Referenced, referenced, PF_HEAD)
diff --git a/queue-5.5/rtc-max8907-add-missing-select-regmap_irq.patch b/queue-5.5/rtc-max8907-add-missing-select-regmap_irq.patch

new file mode 100644 (file)

index 0000000..03b5a2e
--- /dev/null
+++ b/queue-5.5/rtc-max8907-add-missing-select-regmap_irq.patch
@@ -0,0 +1,36 @@
+From 5d892919fdd0cefd361697472d4e1b174a594991 Mon Sep 17 00:00:00 2001
+From: Corentin Labbe <clabbe@baylibre.com>
+Date: Wed, 18 Mar 2020 15:26:49 +0000
+Subject: rtc: max8907: add missing select REGMAP_IRQ
+
+From: Corentin Labbe <clabbe@baylibre.com>
+
+commit 5d892919fdd0cefd361697472d4e1b174a594991 upstream.
+
+I have hit the following build error:
+
+  armv7a-hardfloat-linux-gnueabi-ld: drivers/rtc/rtc-max8907.o: in function `max8907_rtc_probe':
+  rtc-max8907.c:(.text+0x400): undefined reference to `regmap_irq_get_virq'
+
+max8907 should select REGMAP_IRQ
+
+Fixes: 94c01ab6d7544 ("rtc: add MAX8907 RTC driver")
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/rtc/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/rtc/Kconfig
++++ b/drivers/rtc/Kconfig
+@@ -327,6 +327,7 @@ config RTC_DRV_MAX6900
+ config RTC_DRV_MAX8907
+       tristate "Maxim MAX8907"
+       depends on MFD_MAX8907 || COMPILE_TEST
++      select REGMAP_IRQ
+       help
+         If you say yes here you will get support for the
+         RTC of Maxim MAX8907 PMIC.
diff --git a/queue-5.5/series b/queue-5.5/series

index 4a373632e423820b8c7d027c3ea5b20aad4cdea1..ca1fb4874e9ffd7f97457e48729470a9ee605be0 100644 (file)
--- a/queue-5.5/series
+++ b/queue-5.5/series
@@ -83,3 +83,20 @@ intel_th-msu-fix-the-unexpected-state-warning.patch
  intel_th-fix-user-visible-error-codes.patch
  intel_th-pci-add-elkhart-lake-cpu-support.patch
  modpost-move-the-namespace-field-in-module.symvers-last.patch
+rtc-max8907-add-missing-select-regmap_irq.patch
+arm64-compat-fix-syscall-number-of-compat_clock_getres.patch
+xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch
+btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch
+drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch
+drm-lease-fix-warning-in-idr_destroy.patch
+stm-class-sys-t-fix-the-use-of-time_after.patch
+memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch
+mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch
+mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch
+mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch
+mm-do-not-allow-madv_pageout-for-cow-pages.patch
+epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch
+mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch
+mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch
+page-flags-fix-a-crash-at-setpageerror-thp_swap.patch
+x86-mm-split-vmalloc_sync_all.patch
diff --git a/queue-5.5/stm-class-sys-t-fix-the-use-of-time_after.patch b/queue-5.5/stm-class-sys-t-fix-the-use-of-time_after.patch

new file mode 100644 (file)

index 0000000..e664ece
--- /dev/null
+++ b/queue-5.5/stm-class-sys-t-fix-the-use-of-time_after.patch
@@ -0,0 +1,46 @@
+From 283f87c0d5d32b4a5c22636adc559bca82196ed3 Mon Sep 17 00:00:00 2001
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Date: Tue, 17 Mar 2020 08:22:11 +0200
+Subject: stm class: sys-t: Fix the use of time_after()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+commit 283f87c0d5d32b4a5c22636adc559bca82196ed3 upstream.
+
+The operands of time_after() are in a wrong order in both instances in
+the sys-t driver. Fix that.
+
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Fixes: 39f10239df75 ("stm class: p_sys-t: Add support for CLOCKSYNC packets")
+Fixes: d69d5e83110f ("stm class: Add MIPI SyS-T protocol support")
+Cc: stable@vger.kernel.org # v4.20+
+Link: https://lore.kernel.org/r/20200317062215.15598-3-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hwtracing/stm/p_sys-t.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/hwtracing/stm/p_sys-t.c
++++ b/drivers/hwtracing/stm/p_sys-t.c
+@@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_
+ static inline bool sys_t_need_ts(struct sys_t_output *op)
+ {
+       if (op->node.ts_interval &&
+-          time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
++          time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) {
+               op->ts_jiffies = jiffies;
+ 
+               return true;
+@@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct
+ static bool sys_t_need_clock_sync(struct sys_t_output *op)
+ {
+       if (op->node.clocksync_interval &&
+-          time_after(op->clocksync_jiffies + op->node.clocksync_interval,
+-                     jiffies)) {
++          time_after(jiffies,
++                     op->clocksync_jiffies + op->node.clocksync_interval)) {
+               op->clocksync_jiffies = jiffies;
+ 
+               return true;
diff --git a/queue-5.5/x86-mm-split-vmalloc_sync_all.patch b/queue-5.5/x86-mm-split-vmalloc_sync_all.patch

new file mode 100644 (file)

index 0000000..584eee6
--- /dev/null
+++ b/queue-5.5/x86-mm-split-vmalloc_sync_all.patch
@@ -0,0 +1,205 @@
+From 763802b53a427ed3cbd419dbba255c414fdd9e7c Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Sat, 21 Mar 2020 18:22:41 -0700
+Subject: x86/mm: split vmalloc_sync_all()
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit 763802b53a427ed3cbd419dbba255c414fdd9e7c upstream.
+
+Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in
+__purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in
+the vunmap() code-path.  While this change was necessary to maintain
+correctness on x86-32-pae kernels, it also adds additional cycles for
+architectures that don't need it.
+
+Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported
+severe performance regressions in micro-benchmarks because it now also
+calls the x86-64 implementation of vmalloc_sync_all() on vunmap().  But
+the vmalloc_sync_all() implementation on x86-64 is only needed for newly
+created mappings.
+
+To avoid the unnecessary work on x86-64 and to gain the performance
+back, split up vmalloc_sync_all() into two functions:
+
+       * vmalloc_sync_mappings(), and
+       * vmalloc_sync_unmappings()
+
+Most call-sites to vmalloc_sync_all() only care about new mappings being
+synchronized.  The only exception is the new call-site added in the
+above mentioned commit.
+
+Shile Zhang directed us to a report of an 80% regression in reaim
+throughput.
+
+Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Reported-by: Shile Zhang <shile.zhang@linux.alibaba.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Tested-by: Borislav Petkov <bp@suse.de>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>       [GHES]
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org
+Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/
+Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c      |   26 ++++++++++++++++++++++++--
+ drivers/acpi/apei/ghes.c |    2 +-
+ include/linux/vmalloc.h  |    5 +++--
+ kernel/notifier.c        |    2 +-
+ mm/nommu.c               |   10 +++++++---
+ mm/vmalloc.c             |   11 +++++++----
+ 6 files changed, 43 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -189,7 +189,7 @@ static inline pmd_t *vmalloc_sync_one(pg
+       return pmd_k;
+ }
+ 
+-void vmalloc_sync_all(void)
++static void vmalloc_sync(void)
+ {
+       unsigned long address;
+ 
+@@ -216,6 +216,16 @@ void vmalloc_sync_all(void)
+       }
+ }
+ 
++void vmalloc_sync_mappings(void)
++{
++      vmalloc_sync();
++}
++
++void vmalloc_sync_unmappings(void)
++{
++      vmalloc_sync();
++}
++
+ /*
+  * 32-bit:
+  *
+@@ -318,11 +328,23 @@ out:
+ 
+ #else /* CONFIG_X86_64: */
+ 
+-void vmalloc_sync_all(void)
++void vmalloc_sync_mappings(void)
+ {
++      /*
++       * 64-bit mappings might allocate new p4d/pud pages
++       * that need to be propagated to all tasks' PGDs.
++       */
+       sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ }
+ 
++void vmalloc_sync_unmappings(void)
++{
++      /*
++       * Unmappings never allocate or free p4d/pud pages.
++       * No work is required here.
++       */
++}
++
+ /*
+  * 64-bit:
+  *
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
+        * New allocation must be visible in all pgd before it can be found by
+        * an NMI allocating from the pool.
+        */
+-      vmalloc_sync_all();
++      vmalloc_sync_mappings();
+ 
+       rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
+       if (rc)
+--- a/include/linux/vmalloc.h
++++ b/include/linux/vmalloc.h
+@@ -139,8 +139,9 @@ extern int remap_vmalloc_range_partial(s
+ 
+ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+                                                       unsigned long pgoff);
+-void vmalloc_sync_all(void);
+- 
++void vmalloc_sync_mappings(void);
++void vmalloc_sync_unmappings(void);
++
+ /*
+  *    Lowlevel-APIs (not for driver use!)
+  */
+--- a/kernel/notifier.c
++++ b/kernel/notifier.c
+@@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
+ 
+ int register_die_notifier(struct notifier_block *nb)
+ {
+-      vmalloc_sync_all();
++      vmalloc_sync_mappings();
+       return atomic_notifier_chain_register(&die_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(register_die_notifier);
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
+ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+ 
+ /*
+- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
+- * have one.
++ * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
++ * chose not to have one.
+  */
+-void __weak vmalloc_sync_all(void)
++void __weak vmalloc_sync_mappings(void)
++{
++}
++
++void __weak vmalloc_sync_unmappings(void)
+ {
+ }
+ 
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1287,7 +1287,7 @@ static bool __purge_vmap_area_lazy(unsig
+        * First make sure the mappings are removed from all page-tables
+        * before they are freed.
+        */
+-      vmalloc_sync_all();
++      vmalloc_sync_unmappings();
+ 
+       /*
+        * TODO: to calculate a flush range without looping.
+@@ -3120,16 +3120,19 @@ int remap_vmalloc_range(struct vm_area_s
+ EXPORT_SYMBOL(remap_vmalloc_range);
+ 
+ /*
+- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
+- * have one.
++ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
++ * not to have one.
+  *
+  * The purpose of this function is to make sure the vmalloc area
+  * mappings are identical in all page-tables in the system.
+  */
+-void __weak vmalloc_sync_all(void)
++void __weak vmalloc_sync_mappings(void)
+ {
+ }
+ 
++void __weak vmalloc_sync_unmappings(void)
++{
++}
+ 
+ static int f(pte_t *pte, unsigned long addr, void *data)
+ {
diff --git a/queue-5.5/xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch b/queue-5.5/xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch

new file mode 100644 (file)

index 0000000..34d618e
--- /dev/null
+++ b/queue-5.5/xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch
@@ -0,0 +1,69 @@
+From 045706bff837ee89c13f1ace173db71922c1c40b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 6 Mar 2020 17:08:57 +0200
+Subject: xhci: Do not open code __print_symbolic() in xhci trace events
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 045706bff837ee89c13f1ace173db71922c1c40b upstream.
+
+libtraceevent (used by perf and trace-cmd) failed to parse the
+xhci_urb_dequeue trace event. This is because the user space trace
+event format parsing is not a full C compiler. It can handle some basic
+logic, but is not meant to be able to handle everything C can do.
+
+In cases where a trace event field needs to be converted from a number
+to a string, there's the __print_symbolic() macro that should be used:
+
+ See samples/trace_events/trace-events-sample.h
+
+Some xhci trace events open coded the __print_symbolic() causing the
+user spaces tools to fail to parse it. This has to be replaced with
+__print_symbolic() instead.
+
+CC: stable@vger.kernel.org
+Reported-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206531
+Fixes: 5abdc2e6e12ff ("usb: host: xhci: add urb_enqueue/dequeue/giveback tracers")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20200306150858.21904-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci-trace.h |   23 ++++++-----------------
+ 1 file changed, 6 insertions(+), 17 deletions(-)
+
+--- a/drivers/usb/host/xhci-trace.h
++++ b/drivers/usb/host/xhci-trace.h
+@@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb,
+       ),
+       TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x",
+                       __entry->epnum, __entry->dir_in ? "in" : "out",
+-                      ({ char *s;
+-                      switch (__entry->type) {
+-                      case USB_ENDPOINT_XFER_INT:
+-                              s = "intr";
+-                              break;
+-                      case USB_ENDPOINT_XFER_CONTROL:
+-                              s = "control";
+-                              break;
+-                      case USB_ENDPOINT_XFER_BULK:
+-                              s = "bulk";
+-                              break;
+-                      case USB_ENDPOINT_XFER_ISOC:
+-                              s = "isoc";
+-                              break;
+-                      default:
+-                              s = "UNKNOWN";
+-                      } s; }), __entry->urb, __entry->pipe, __entry->slot_id,
++                      __print_symbolic(__entry->type,
++                                 { USB_ENDPOINT_XFER_INT,     "intr" },
++                                 { USB_ENDPOINT_XFER_CONTROL, "control" },
++                                 { USB_ENDPOINT_XFER_BULK,    "bulk" },
++                                 { USB_ENDPOINT_XFER_ISOC,    "isoc" }),
++                      __entry->urb, __entry->pipe, __entry->slot_id,
+                       __entry->actual, __entry->length, __entry->num_mapped_sgs,
+                       __entry->num_sgs, __entry->stream, __entry->flags
+               )
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 23 Mar 2020 14:55:08 +0000 (15:55 +0100)
queue-5.5/arm64-compat-fix-syscall-number-of-compat_clock_getres.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/btrfs-fix-log-context-list-corruption-after-rename-whiteout-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/drm-amd-amdgpu-fix-gpr-read-from-debugfs-v2.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/drm-lease-fix-warning-in-idr_destroy.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/epoll-fix-possible-lost-wakeup-on-epoll_ctl-path.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/memcg-fix-null-pointer-dereference-in-__mem_cgroup_usage_unregister_event.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-do-not-allow-madv_pageout-for-cow-pages.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-hotplug-fix-hot-remove-failure-in-sparsemem-vmemmap-case.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-memcg-fix-corruption-on-64-bit-divisor-in-memory.high-throttling.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-memcg-throttle-allocators-based-on-ancestral-memory.high.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-slub-be-more-careful-about-the-double-cmpxchg-of-freelist.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/mm-slub-prevent-kmalloc_node-crashes-and-memory-leaks.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/page-flags-fix-a-crash-at-setpageerror-thp_swap.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/rtc-max8907-add-missing-select-regmap_irq.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/series		patch \| blob \| blame \| history
queue-5.5/stm-class-sys-t-fix-the-use-of-time_after.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/x86-mm-split-vmalloc_sync_all.patch	[new file with mode: 0644]	patch \| blob
queue-5.5/xhci-do-not-open-code-__print_symbolic-in-xhci-trace-events.patch	[new file with mode: 0644]	patch \| blob