--- /dev/null
+From 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 Mon Sep 17 00:00:00 2001
+From: Chris Co <chrco@microsoft.com>
+Date: Tue, 10 Nov 2020 19:01:18 +0000
+Subject: Drivers: hv: vmbus: Allow cleanup of VMBUS_CONNECT_CPU if disconnected
+
+From: Chris Co <chrco@microsoft.com>
+
+commit 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 upstream.
+
+When invoking kexec() on a Linux guest running on a Hyper-V host, the
+kernel panics.
+
+ RIP: 0010:cpuhp_issue_call+0x137/0x140
+ Call Trace:
+ __cpuhp_remove_state_cpuslocked+0x99/0x100
+ __cpuhp_remove_state+0x1c/0x30
+ hv_kexec_handler+0x23/0x30 [hv_vmbus]
+ hv_machine_shutdown+0x1e/0x30
+ machine_shutdown+0x10/0x20
+ kernel_kexec+0x6d/0x96
+ __do_sys_reboot+0x1ef/0x230
+ __x64_sys_reboot+0x1d/0x20
+ do_syscall_64+0x6b/0x3d8
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+This was due to hv_synic_cleanup() callback returning -EBUSY to
+cpuhp_issue_call() when tearing down the VMBUS_CONNECT_CPU, even
+if the vmbus_connection.conn_state = DISCONNECTED. hv_synic_cleanup()
+should succeed in the case where vmbus_connection.conn_state
+is DISCONNECTED.
+
+Fix is to add an extra condition to test for
+vmbus_connection.conn_state == CONNECTED on the VMBUS_CONNECT_CPU and
+only return early if true. This way the kexec() path can still shut
+everything down while preserving the initial behavior of preventing
+CPU offlining on the VMBUS_CONNECT_CPU while the VM is running.
+
+Fixes: 8a857c55420f29 ("Drivers: hv: vmbus: Always handle the VMBus messages on CPU0")
+Signed-off-by: Chris Co <chrco@microsoft.com>
+Reviewed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201110190118.15596-1-chrco@linux.microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/hv/hv.c
++++ b/drivers/hv/hv.c
+@@ -251,9 +251,13 @@ int hv_synic_cleanup(unsigned int cpu)
+
+ /*
+ * Hyper-V does not provide a way to change the connect CPU once
+- * it is set; we must prevent the connect CPU from going offline.
++ * it is set; we must prevent the connect CPU from going offline
++ * while the VM is running normally. But in the panic or kexec()
++ * path where the vmbus is already disconnected, the CPU must be
++ * allowed to shut down.
+ */
+- if (cpu == VMBUS_CONNECT_CPU)
++ if (cpu == VMBUS_CONNECT_CPU &&
++ vmbus_connection.conn_state == CONNECTED)
+ return -EBUSY;
+
+ /*
--- /dev/null
+From 728321e53045d2668bf2b8627a8d61bc2c480d3b Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 13 Nov 2020 02:21:19 -0500
+Subject: drm/amd/display: Add missing pflip irq for dcn2.0
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 728321e53045d2668bf2b8627a8d61bc2c480d3b upstream.
+
+If we have more than 4 displays we will run
+into dummy irq calls or flip timout issues.
+
+Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
++++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+@@ -299,8 +299,8 @@ irq_source_info_dcn20[DAL_IRQ_SOURCES_NU
+ pflip_int_entry(1),
+ pflip_int_entry(2),
+ pflip_int_entry(3),
+- [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(),
+- [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(),
++ pflip_int_entry(4),
++ pflip_int_entry(5),
+ [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(),
+ gpio_pad_int_entry(0),
+ gpio_pad_int_entry(1),
--- /dev/null
+From d2e3fce9ddafe689c6f7cb355f23560637e30b9d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Tue, 10 Nov 2020 23:04:47 +0200
+Subject: drm/i915: Handle max_bpc==16
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit d2e3fce9ddafe689c6f7cb355f23560637e30b9d upstream.
+
+EDID can declare the maximum supported bpc up to 16,
+and apparently there are displays that do so. Currently
+we assume 12 bpc is tha max. Fix the assumption and
+toss in a MISSING_CASE() for any other value we don't
+expect to see.
+
+This fixes modesets with a display with EDID max bpc > 12.
+Previously any modeset would just silently fail on platforms
+that didn't otherwise limit this via the max_bpc property.
+In particular we don't add the max_bpc property to HDMI
+ports on gmch platforms, and thus we would see the raw
+max_bpc coming from the EDID.
+
+I suppose we could already adjust this to also allow 16bpc,
+but seeing as no current platform supports that there is
+little point.
+
+Cc: stable@vger.kernel.org
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2632
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20201110210447.27454-1-ville.syrjala@linux.intel.com
+Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
+(cherry picked from commit 2ca5a7b85b0c2b97ef08afbd7799b022e29f192e)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/display/intel_display.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -11893,10 +11893,11 @@ compute_sink_pipe_bpp(const struct drm_c
+ case 10 ... 11:
+ bpp = 10 * 3;
+ break;
+- case 12:
++ case 12 ... 16:
+ bpp = 12 * 3;
+ break;
+ default:
++ MISSING_CASE(conn_state->max_bpc);
+ return -EINVAL;
+ }
+
--- /dev/null
+From 7bc40aedf24d31d8bea80e1161e996ef4299fb10 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Thu, 12 Nov 2020 11:22:04 +0100
+Subject: mac80211: free sta in sta_info_insert_finish() on errors
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 7bc40aedf24d31d8bea80e1161e996ef4299fb10 upstream.
+
+If sta_info_insert_finish() fails, we currently keep the station
+around and free it only in the caller, but there's only one such
+caller and it always frees it immediately.
+
+As syzbot found, another consequence of this split is that we can
+put things that sleep only into __cleanup_single_sta() and not in
+sta_info_free(), but this is the only place that requires such of
+sta_info_free() now.
+
+Change this to free the station in sta_info_insert_finish(), in
+which case we can still sleep. This will also let us unify the
+cleanup code later.
+
+Cc: stable@vger.kernel.org
+Fixes: dcd479e10a05 ("mac80211: always wind down STA state")
+Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com
+Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com
+Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com
+Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com
+Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com
+Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/sta_info.c | 14 ++++----------
+ 1 file changed, 4 insertions(+), 10 deletions(-)
+
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -688,7 +688,7 @@ static int sta_info_insert_finish(struct
+ out_drop_sta:
+ local->num_sta--;
+ synchronize_net();
+- __cleanup_single_sta(sta);
++ cleanup_single_sta(sta);
+ out_err:
+ mutex_unlock(&local->sta_mtx);
+ kfree(sinfo);
+@@ -707,19 +707,13 @@ int sta_info_insert_rcu(struct sta_info
+
+ err = sta_info_insert_check(sta);
+ if (err) {
++ sta_info_free(local, sta);
+ mutex_unlock(&local->sta_mtx);
+ rcu_read_lock();
+- goto out_free;
++ return err;
+ }
+
+- err = sta_info_insert_finish(sta);
+- if (err)
+- goto out_free;
+-
+- return 0;
+- out_free:
+- sta_info_free(local, sta);
+- return err;
++ return sta_info_insert_finish(sta);
+ }
+
+ int sta_info_insert(struct sta_info *sta)
--- /dev/null
+From b2911a84396f72149dce310a3b64d8948212c1b3 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 11 Nov 2020 19:33:59 +0100
+Subject: mac80211: minstrel: fix tx status processing corner case
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit b2911a84396f72149dce310a3b64d8948212c1b3 upstream.
+
+Some drivers fill the status rate list without setting the rate index after
+the final rate to -1. minstrel_ht already deals with this, but minstrel
+doesn't, which causes it to get stuck at the lowest rate on these drivers.
+
+Fix this by checking the count as well.
+
+Cc: stable@vger.kernel.org
+Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm")
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/rc80211_minstrel.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mac80211/rc80211_minstrel.c
++++ b/net/mac80211/rc80211_minstrel.c
+@@ -270,7 +270,7 @@ minstrel_tx_status(void *priv, struct ie
+ success = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+- if (ar[i].idx < 0)
++ if (ar[i].idx < 0 || !ar[i].count)
+ break;
+
+ ndx = rix_to_ndx(mi, ar[i].idx);
--- /dev/null
+From 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 11 Nov 2020 19:33:58 +0100
+Subject: mac80211: minstrel: remove deferred sampling code
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d upstream.
+
+Deferring sampling attempts to the second stage has some bad interactions
+with drivers that process the rate table in hardware and use the probe flag
+to indicate probing packets (e.g. most mt76 drivers). On affected drivers
+it can lead to probing not working at all.
+
+If the link conditions turn worse, it might not be such a good idea to
+do a lot of sampling for lower rates in this case.
+
+Fix this by simply skipping the sample attempt instead of deferring it,
+but keep the checks that would allow it to be sampled if it was skipped
+too often, but only if it has less than 95% success probability.
+
+Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing
+packets.
+
+Cc: stable@vger.kernel.org
+Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm")
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/rc80211_minstrel.c | 25 ++++---------------------
+ net/mac80211/rc80211_minstrel.h | 1 -
+ 2 files changed, 4 insertions(+), 22 deletions(-)
+
+--- a/net/mac80211/rc80211_minstrel.c
++++ b/net/mac80211/rc80211_minstrel.c
+@@ -283,12 +283,6 @@ minstrel_tx_status(void *priv, struct ie
+ mi->r[ndx].stats.success += success;
+ }
+
+- if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
+- mi->sample_packets++;
+-
+- if (mi->sample_deferred > 0)
+- mi->sample_deferred--;
+-
+ if (time_after(jiffies, mi->last_stats_update +
+ (mp->update_interval * HZ) / 1000))
+ minstrel_update_stats(mp, mi);
+@@ -363,7 +357,7 @@ minstrel_get_rate(void *priv, struct iee
+ return;
+
+ delta = (mi->total_packets * sampling_ratio / 100) -
+- (mi->sample_packets + mi->sample_deferred / 2);
++ mi->sample_packets;
+
+ /* delta < 0: no sampling required */
+ prev_sample = mi->prev_sample;
+@@ -372,7 +366,6 @@ minstrel_get_rate(void *priv, struct iee
+ return;
+
+ if (mi->total_packets >= 10000) {
+- mi->sample_deferred = 0;
+ mi->sample_packets = 0;
+ mi->total_packets = 0;
+ } else if (delta > mi->n_rates * 2) {
+@@ -397,19 +390,8 @@ minstrel_get_rate(void *priv, struct iee
+ * rate sampling method should be used.
+ * Respect such rates that are not sampled for 20 interations.
+ */
+- if (mrr_capable &&
+- msr->perfect_tx_time > mr->perfect_tx_time &&
+- msr->stats.sample_skipped < 20) {
+- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
+- * packets that have the sampling rate deferred to the
+- * second MRR stage. Increase the sample counter only
+- * if the deferred sample rate was actually used.
+- * Use the sample_deferred counter to make sure that
+- * the sampling is not done in large bursts */
+- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+- rate++;
+- mi->sample_deferred++;
+- } else {
++ if (msr->perfect_tx_time < mr->perfect_tx_time ||
++ msr->stats.sample_skipped >= 20) {
+ if (!msr->sample_limit)
+ return;
+
+@@ -429,6 +411,7 @@ minstrel_get_rate(void *priv, struct iee
+
+ rate->idx = mi->r[ndx].rix;
+ rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
++ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+ }
+
+
+--- a/net/mac80211/rc80211_minstrel.h
++++ b/net/mac80211/rc80211_minstrel.h
+@@ -79,7 +79,6 @@ struct minstrel_sta_info {
+ u8 max_prob_rate;
+ unsigned int total_packets;
+ unsigned int sample_packets;
+- int sample_deferred;
+
+ unsigned int sample_row;
+ unsigned int sample_column;
--- /dev/null
+From 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Sat, 21 Nov 2020 22:17:12 -0800
+Subject: mm: memcg/slab: fix root memcg vmstats
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb upstream.
+
+If we reparent the slab objects to the root memcg, when we free the slab
+object, we need to update the per-memcg vmstats to keep it correct for
+the root memcg. Now this at least affects the vmstat of
+NR_KERNEL_STACK_KB for !CONFIG_VMAP_STACK when the thread stack size is
+smaller than the PAGE_SIZE.
+
+David said:
+ "I assume that without this fix that the root memcg's vmstat would
+ always be inflated if we reparented"
+
+Fixes: ec9f02384f60 ("mm: workingset: fix vmstat counters for shadow nodes")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Christopher Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yafang Shao <laoar.shao@gmail.com>
+Cc: Chris Down <chris@chrisdown.name>
+Cc: <stable@vger.kernel.org> [5.3+]
+Link: https://lkml.kernel.org/r/20201110031015.15715-1-songmuchun@bytedance.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -776,8 +776,13 @@ void __mod_lruvec_slab_state(void *p, en
+ rcu_read_lock();
+ memcg = memcg_from_slab_page(page);
+
+- /* Untracked pages have no memcg, no lruvec. Update only the node */
+- if (!memcg || memcg == root_mem_cgroup) {
++ /*
++ * Untracked pages have no memcg, no lruvec. Update only the
++ * node. If we reparent the slab objects to the root memcg,
++ * when we free the slab object, we need to update the per-memcg
++ * vmstats to keep it correct for the root memcg.
++ */
++ if (!memcg) {
+ __mod_node_page_state(pgdat, idx, val);
+ } else {
+ lruvec = mem_cgroup_lruvec(pgdat, memcg);
--- /dev/null
+From f80b08fc44536a311a9f3182e50f318b79076425 Mon Sep 17 00:00:00 2001
+From: Charan Teja Reddy <charante@codeaurora.org>
+Date: Thu, 6 Aug 2020 23:25:24 -0700
+Subject: mm, page_alloc: skip ->waternark_boost for atomic order-0 allocations
+
+From: Charan Teja Reddy <charante@codeaurora.org>
+
+commit f80b08fc44536a311a9f3182e50f318b79076425 upstream.
+
+When boosting is enabled, it is observed that rate of atomic order-0
+allocation failures are high due to the fact that free levels in the
+system are checked with ->watermark_boost offset. This is not a problem
+for sleepable allocations but for atomic allocations which looks like
+regression.
+
+This problem is seen frequently on system setup of Android kernel running
+on Snapdragon hardware with 4GB RAM size. When no extfrag event occurred
+in the system, ->watermark_boost factor is zero, thus the watermark
+configurations in the system are:
+
+ _watermark = (
+ [WMARK_MIN] = 1272, --> ~5MB
+ [WMARK_LOW] = 9067, --> ~36MB
+ [WMARK_HIGH] = 9385), --> ~38MB
+ watermark_boost = 0
+
+After launching some memory hungry applications in Android which can cause
+extfrag events in the system to an extent that ->watermark_boost can be
+set to max i.e. default boost factor makes it to 150% of high watermark.
+
+ _watermark = (
+ [WMARK_MIN] = 1272, --> ~5MB
+ [WMARK_LOW] = 9067, --> ~36MB
+ [WMARK_HIGH] = 9385), --> ~38MB
+ watermark_boost = 14077, -->~57MB
+
+With default system configuration, for an atomic order-0 allocation to
+succeed, having free memory of ~2MB will suffice. But boosting makes the
+min_wmark to ~61MB thus for an atomic order-0 allocation to be successful
+system should have minimum of ~23MB of free memory(from calculations of
+zone_watermark_ok(), min = 3/4(min/2)). But failures are observed despite
+system is having ~20MB of free memory. In the testing, this is
+reproducible as early as first 300secs since boot and with furtherlowram
+configurations(<2GB) it is observed as early as first 150secs since boot.
+
+These failures can be avoided by excluding the ->watermark_boost in
+watermark caluculations for atomic order-0 allocations.
+
+[akpm@linux-foundation.org: fix comment grammar, reflow comment]
+[charante@codeaurora.org: fix suggested by Mel Gorman]
+ Link: http://lkml.kernel.org/r/31556793-57b1-1c21-1a9d-22674d9bd938@codeaurora.org
+
+Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Vinayak Menon <vinmenon@codeaurora.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Link: http://lkml.kernel.org/r/1589882284-21010-1-git-send-email-charante@codeaurora.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ralph Siemsen <ralph.siemsen@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c | 25 +++++++++++++++++++++----
+ 1 file changed, 21 insertions(+), 4 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3484,7 +3484,8 @@ bool zone_watermark_ok(struct zone *z, u
+ }
+
+ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
+- unsigned long mark, int classzone_idx, unsigned int alloc_flags)
++ unsigned long mark, int classzone_idx,
++ unsigned int alloc_flags, gfp_t gfp_mask)
+ {
+ long free_pages = zone_page_state(z, NR_FREE_PAGES);
+ long cma_pages = 0;
+@@ -3505,8 +3506,23 @@ static inline bool zone_watermark_fast(s
+ if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+ return true;
+
+- return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+- free_pages);
++ if (__zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
++ free_pages))
++ return true;
++ /*
++ * Ignore watermark boosting for GFP_ATOMIC order-0 allocations
++ * when checking the min watermark. The min watermark is the
++ * point where boosting is ignored so that kswapd is woken up
++ * when below the low watermark.
++ */
++ if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost
++ && ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) {
++ mark = z->_watermark[WMARK_MIN];
++ return __zone_watermark_ok(z, order, mark, classzone_idx,
++ alloc_flags, free_pages);
++ }
++
++ return false;
+ }
+
+ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+@@ -3647,7 +3663,8 @@ retry:
+
+ mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
+ if (!zone_watermark_fast(zone, order, mark,
+- ac_classzone_idx(ac), alloc_flags)) {
++ ac_classzone_idx(ac), alloc_flags,
++ gfp_mask)) {
+ int ret;
+
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
--- /dev/null
+From bfe8cc1db02ab243c62780f17fc57f65bde0afe1 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Date: Sat, 21 Nov 2020 22:17:15 -0800
+Subject: mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault()
+
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+
+commit bfe8cc1db02ab243c62780f17fc57f65bde0afe1 upstream.
+
+Alexander reported a syzkaller / KASAN finding on s390, see below for
+complete output.
+
+In do_huge_pmd_anonymous_page(), the pre-allocated pagetable will be
+freed in some cases. In the case of userfaultfd_missing(), this will
+happen after calling handle_userfault(), which might have released the
+mmap_lock. Therefore, the following pte_free(vma->vm_mm, pgtable) will
+access an unstable vma->vm_mm, which could have been freed or re-used
+already.
+
+For all architectures other than s390 this will go w/o any negative
+impact, because pte_free() simply frees the page and ignores the
+passed-in mm. The implementation for SPARC32 would also access
+mm->page_table_lock for pte_free(), but there is no THP support in
+SPARC32, so the buggy code path will not be used there.
+
+For s390, the mm->context.pgtable_list is being used to maintain the 2K
+pagetable fragments, and operating on an already freed or even re-used
+mm could result in various more or less subtle bugs due to list /
+pagetable corruption.
+
+Fix this by calling pte_free() before handle_userfault(), similar to how
+it is already done in __do_huge_pmd_anonymous_page() for the WRITE /
+non-huge_zero_page case.
+
+Commit 6b251fc96cf2c ("userfaultfd: call handle_userfault() for
+userfaultfd_missing() faults") actually introduced both, the
+do_huge_pmd_anonymous_page() and also __do_huge_pmd_anonymous_page()
+changes wrt to calling handle_userfault(), but only in the latter case
+it put the pte_free() before calling handle_userfault().
+
+ BUG: KASAN: use-after-free in do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744
+ Read of size 8 at addr 00000000962d6988 by task syz-executor.0/9334
+
+ CPU: 1 PID: 9334 Comm: syz-executor.0 Not tainted 5.10.0-rc1-syzkaller-07083-g4c9720875573 #0
+ Hardware name: IBM 3906 M04 701 (KVM/Linux)
+ Call Trace:
+ do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744
+ create_huge_pmd mm/memory.c:4256 [inline]
+ __handle_mm_fault+0xe6e/0x1068 mm/memory.c:4480
+ handle_mm_fault+0x288/0x748 mm/memory.c:4607
+ do_exception+0x394/0xae0 arch/s390/mm/fault.c:479
+ do_dat_exception+0x34/0x80 arch/s390/mm/fault.c:567
+ pgm_check_handler+0x1da/0x22c arch/s390/kernel/entry.S:706
+ copy_from_user_mvcos arch/s390/lib/uaccess.c:111 [inline]
+ raw_copy_from_user+0x3a/0x88 arch/s390/lib/uaccess.c:174
+ _copy_from_user+0x48/0xa8 lib/usercopy.c:16
+ copy_from_user include/linux/uaccess.h:192 [inline]
+ __do_sys_sigaltstack kernel/signal.c:4064 [inline]
+ __s390x_sys_sigaltstack+0xc8/0x240 kernel/signal.c:4060
+ system_call+0xe0/0x28c arch/s390/kernel/entry.S:415
+
+ Allocated by task 9334:
+ slab_alloc_node mm/slub.c:2891 [inline]
+ slab_alloc mm/slub.c:2899 [inline]
+ kmem_cache_alloc+0x118/0x348 mm/slub.c:2904
+ vm_area_dup+0x9c/0x2b8 kernel/fork.c:356
+ __split_vma+0xba/0x560 mm/mmap.c:2742
+ split_vma+0xca/0x108 mm/mmap.c:2800
+ mlock_fixup+0x4ae/0x600 mm/mlock.c:550
+ apply_vma_lock_flags+0x2c6/0x398 mm/mlock.c:619
+ do_mlock+0x1aa/0x718 mm/mlock.c:711
+ __do_sys_mlock2 mm/mlock.c:738 [inline]
+ __s390x_sys_mlock2+0x86/0xa8 mm/mlock.c:728
+ system_call+0xe0/0x28c arch/s390/kernel/entry.S:415
+
+ Freed by task 9333:
+ slab_free mm/slub.c:3142 [inline]
+ kmem_cache_free+0x7c/0x4b8 mm/slub.c:3158
+ __vma_adjust+0x7b2/0x2508 mm/mmap.c:960
+ vma_merge+0x87e/0xce0 mm/mmap.c:1209
+ userfaultfd_release+0x412/0x6b8 fs/userfaultfd.c:868
+ __fput+0x22c/0x7a8 fs/file_table.c:281
+ task_work_run+0x200/0x320 kernel/task_work.c:151
+ tracehook_notify_resume include/linux/tracehook.h:188 [inline]
+ do_notify_resume+0x100/0x148 arch/s390/kernel/signal.c:538
+ system_call+0xe6/0x28c arch/s390/kernel/entry.S:416
+
+ The buggy address belongs to the object at 00000000962d6948 which belongs to the cache vm_area_struct of size 200
+ The buggy address is located 64 bytes inside of 200-byte region [00000000962d6948, 00000000962d6a10)
+ The buggy address belongs to the page: page:00000000313a09fe refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x962d6 flags: 0x3ffff00000000200(slab)
+ raw: 3ffff00000000200 000040000257e080 0000000c0000000c 000000008020ba00
+ raw: 0000000000000000 000f001e00000000 ffffffff00000001 0000000096959501
+ page dumped because: kasan: bad access detected
+ page->mem_cgroup:0000000096959501
+
+ Memory state around the buggy address:
+ 00000000962d6880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 00000000962d6900: 00 fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb
+ >00000000962d6980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ 00000000962d6a00: fb fb fc fc fc fc fc fc fc fc 00 00 00 00 00 00
+ 00000000962d6a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ==================================================================
+
+Fixes: 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults")
+Reported-by: Alexander Egorenkov <egorenar@linux.ibm.com>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: <stable@vger.kernel.org> [4.3+]
+Link: https://lkml.kernel.org/r/20201110190329.11920-1-gerald.schaefer@linux.ibm.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -722,7 +722,6 @@ vm_fault_t do_huge_pmd_anonymous_page(st
+ transparent_hugepage_use_zero_page()) {
+ pgtable_t pgtable;
+ struct page *zero_page;
+- bool set;
+ vm_fault_t ret;
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (unlikely(!pgtable))
+@@ -735,25 +734,25 @@ vm_fault_t do_huge_pmd_anonymous_page(st
+ }
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ ret = 0;
+- set = false;
+ if (pmd_none(*vmf->pmd)) {
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret) {
+ spin_unlock(vmf->ptl);
++ pte_free(vma->vm_mm, pgtable);
+ } else if (userfaultfd_missing(vma)) {
+ spin_unlock(vmf->ptl);
++ pte_free(vma->vm_mm, pgtable);
+ ret = handle_userfault(vmf, VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ } else {
+ set_huge_zero_page(pgtable, vma->vm_mm, vma,
+ haddr, vmf->pmd, zero_page);
+ spin_unlock(vmf->ptl);
+- set = true;
+ }
+- } else
++ } else {
+ spin_unlock(vmf->ptl);
+- if (!set)
+ pte_free(vma->vm_mm, pgtable);
++ }
+ return ret;
+ }
+ gfp = alloc_hugepage_direct_gfpmask(vma);
--- /dev/null
+From 60d53566100abde4acc5504b524bc97f89015690 Mon Sep 17 00:00:00 2001
+From: Adrian Hunter <adrian.hunter@intel.com>
+Date: Thu, 12 Nov 2020 15:36:56 +0200
+Subject: mmc: sdhci-pci: Prefer SDR25 timing for High Speed mode for BYT-based Intel controllers
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+commit 60d53566100abde4acc5504b524bc97f89015690 upstream.
+
+A UHS setting of SDR25 can give better results for High Speed mode.
+This is because there is no setting corresponding to high speed. Currently
+SDHCI sets no value, which means zero which is also the setting for SDR12.
+There was an attempt to change this in sdhci.c but it caused problems for
+some drivers, so it was reverted and the change was made to sdhci-brcmstb
+in commit 2fefc7c5f7d16e ("mmc: sdhci-brcmstb: Fix incorrect switch to HS
+mode"). Several other drivers also do this.
+
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Cc: stable@vger.kernel.org # v5.4+
+Link: https://lore.kernel.org/r/20201112133656.20317-1-adrian.hunter@intel.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/sdhci-pci-core.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-pci-core.c
++++ b/drivers/mmc/host/sdhci-pci-core.c
+@@ -669,6 +669,15 @@ static void sdhci_intel_set_power(struct
+ }
+ }
+
++static void sdhci_intel_set_uhs_signaling(struct sdhci_host *host,
++ unsigned int timing)
++{
++ /* Set UHS timing to SDR25 for High Speed mode */
++ if (timing == MMC_TIMING_MMC_HS || timing == MMC_TIMING_SD_HS)
++ timing = MMC_TIMING_UHS_SDR25;
++ sdhci_set_uhs_signaling(host, timing);
++}
++
+ #define INTEL_HS400_ES_REG 0x78
+ #define INTEL_HS400_ES_BIT BIT(0)
+
+@@ -725,7 +734,7 @@ static const struct sdhci_ops sdhci_inte
+ .enable_dma = sdhci_pci_enable_dma,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+- .set_uhs_signaling = sdhci_set_uhs_signaling,
++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling,
+ .hw_reset = sdhci_pci_hw_reset,
+ };
+
+@@ -735,7 +744,7 @@ static const struct sdhci_ops sdhci_inte
+ .enable_dma = sdhci_pci_enable_dma,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_cqhci_reset,
+- .set_uhs_signaling = sdhci_set_uhs_signaling,
++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling,
+ .hw_reset = sdhci_pci_hw_reset,
+ .irq = sdhci_cqhci_irq,
+ };
--- /dev/null
+From cf23705244c947151179f929774fabf71e239eee Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@linux.microsoft.com>
+Date: Fri, 30 Oct 2020 13:38:48 +0100
+Subject: ptrace: Set PF_SUPERPRIV when checking capability
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+commit cf23705244c947151179f929774fabf71e239eee upstream.
+
+Commit 69f594a38967 ("ptrace: do not audit capability check when outputing
+/proc/pid/stat") replaced the use of ns_capable() with
+has_ns_capability{,_noaudit}() which doesn't set PF_SUPERPRIV.
+
+Commit 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in
+ptrace_has_cap()") replaced has_ns_capability{,_noaudit}() with
+security_capable(), which doesn't set PF_SUPERPRIV neither.
+
+Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a
+new ns_capable_noaudit() helper is available. Let's use it!
+
+As a result, the signature of ptrace_has_cap() is restored to its original one.
+
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Cc: Eric Paris <eparis@redhat.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Serge E. Hallyn <serge@hallyn.com>
+Cc: Tyler Hicks <tyhicks@linux.microsoft.com>
+Cc: stable@vger.kernel.org
+Fixes: 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()")
+Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat")
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20201030123849.770769-2-mic@digikod.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/ptrace.c | 16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct ta
+ return ret;
+ }
+
+-static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns,
+- unsigned int mode)
++static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+ {
+- int ret;
+-
+ if (mode & PTRACE_MODE_NOAUDIT)
+- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT);
+- else
+- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE);
+-
+- return ret == 0;
++ return ns_capable_noaudit(ns, CAP_SYS_PTRACE);
++ return ns_capable(ns, CAP_SYS_PTRACE);
+ }
+
+ /* Returns 0 on success, -errno on denial. */
+@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct ta
+ gid_eq(caller_gid, tcred->sgid) &&
+ gid_eq(caller_gid, tcred->gid))
+ goto ok;
+- if (ptrace_has_cap(cred, tcred->user_ns, mode))
++ if (ptrace_has_cap(tcred->user_ns, mode))
+ goto ok;
+ rcu_read_unlock();
+ return -EPERM;
+@@ -345,7 +339,7 @@ ok:
+ mm = task->mm;
+ if (mm &&
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+- !ptrace_has_cap(cred, mm->user_ns, mode)))
++ !ptrace_has_cap(mm->user_ns, mode)))
+ return -EPERM;
+
+ return security_ptrace_access_check(task, mode);
--- /dev/null
+From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001
+From: Thomas Richter <tmricht@linux.ibm.com>
+Date: Wed, 11 Nov 2020 16:26:25 +0100
+Subject: s390/cpum_sf.c: fix file permission for cpum_sfb_size
+
+From: Thomas Richter <tmricht@linux.ibm.com>
+
+commit 78d732e1f326f74f240d416af9484928303d9951 upstream.
+
+This file is installed by the s390 CPU Measurement sampling
+facility device driver to export supported minimum and
+maximum sample buffer sizes.
+This file is read by lscpumf tool to display the details
+of the device driver capabilities. The lscpumf tool might
+be invoked by a non-root user. In this case it does not
+print anything because the file contents can not be read.
+
+Fix this by allowing read access for all users. Reading
+the file contents is ok, changing the file contents is
+left to the root user only.
+
+For further reference and details see:
+ [1] https://github.com/ibm-s390-tools/s390-tools/issues/97
+
+Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur")
+Cc: <stable@vger.kernel.org> # 3.14
+Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
+Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/perf_cpum_sf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/perf_cpum_sf.c
++++ b/arch/s390/kernel/perf_cpum_sf.c
+@@ -2217,4 +2217,4 @@ out:
+ }
+
+ arch_initcall(init_cpum_sampling_pmu);
+-core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
++core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
--- /dev/null
+From 6f117cb854a44a79898d844e6ae3fd23bd94e786 Mon Sep 17 00:00:00 2001
+From: Stefan Haberland <sth@linux.ibm.com>
+Date: Mon, 16 Nov 2020 16:23:47 +0100
+Subject: s390/dasd: fix null pointer dereference for ERP requests
+
+From: Stefan Haberland <sth@linux.ibm.com>
+
+commit 6f117cb854a44a79898d844e6ae3fd23bd94e786 upstream.
+
+When requeueing all requests on the device request queue to the blocklayer
+we might get to an ERP (error recovery) request that is a copy of an
+original CQR.
+
+Those requests do not have blocklayer request information or a pointer to
+the dasd_queue set. When trying to access those data it will lead to a
+null pointer dereference in dasd_requeue_all_requests().
+
+Fix by checking if the request is an ERP request that can simply be
+ignored. The blocklayer request will be requeued by the original CQR that
+is on the device queue right behind the ERP request.
+
+Fixes: 9487cfd3430d ("s390/dasd: fix handling of internal requests")
+Cc: <stable@vger.kernel.org> #4.16
+Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
+Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dasd.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -2980,6 +2980,12 @@ static int _dasd_requeue_request(struct
+
+ if (!block)
+ return -EINVAL;
++ /*
++ * If the request is an ERP request there is nothing to requeue.
++ * This will be done with the remaining original request.
++ */
++ if (cqr->refers)
++ return 0;
+ spin_lock_irq(&cqr->dq->lock);
+ req = (struct request *) cqr->callback_data;
+ blk_mq_requeue_request(req, false);
--- /dev/null
+From fb14528e443646dd3fd02df4437fcf5265b66baa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@linux.microsoft.com>
+Date: Fri, 30 Oct 2020 13:38:49 +0100
+Subject: seccomp: Set PF_SUPERPRIV when checking capability
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+commit fb14528e443646dd3fd02df4437fcf5265b66baa upstream.
+
+Replace the use of security_capable(current_cred(), ...) with
+ns_capable_noaudit() which set PF_SUPERPRIV.
+
+Since commit 98f368e9e263 ("kernel: Add noaudit variant of
+ns_capable()"), a new ns_capable_noaudit() helper is available. Let's
+use it!
+
+Cc: Jann Horn <jannh@google.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Tyler Hicks <tyhicks@linux.microsoft.com>
+Cc: Will Drewry <wad@chromium.org>
+Cc: stable@vger.kernel.org
+Fixes: e2cfabdfd075 ("seccomp: add system call filtering using BPF")
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20201030123849.770769-3-mic@digikod.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/seccomp.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -37,7 +37,7 @@
+ #include <linux/filter.h>
+ #include <linux/pid.h>
+ #include <linux/ptrace.h>
+-#include <linux/security.h>
++#include <linux/capability.h>
+ #include <linux/tracehook.h>
+ #include <linux/uaccess.h>
+ #include <linux/anon_inodes.h>
+@@ -453,8 +453,7 @@ static struct seccomp_filter *seccomp_pr
+ * behavior of privileged children.
+ */
+ if (!task_no_new_privs(current) &&
+- security_capable(current_cred(), current_user_ns(),
+- CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
++ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+ return ERR_PTR(-EACCES);
+
+ /* Allocate a new seccomp_filter */
regulator-fix-memory-leak-with-repeated-set_machine_constraints.patch
regulator-avoid-resolve_supply-infinite-recursion.patch
regulator-workaround-self-referent-regulators.patch
+xtensa-fix-tlbtemp-area-placement.patch
+xtensa-disable-preemption-around-cache-alias-management-calls.patch
+mac80211-minstrel-remove-deferred-sampling-code.patch
+mac80211-minstrel-fix-tx-status-processing-corner-case.patch
+mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch
+s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch
+s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch
+drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch
+drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch
+drm-i915-handle-max_bpc-16.patch
+mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch
+ptrace-set-pf_superpriv-when-checking-capability.patch
+seccomp-set-pf_superpriv-when-checking-capability.patch
+x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch
+mm-memcg-slab-fix-root-memcg-vmstats.patch
+mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch
+mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch
--- /dev/null
+From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001
+From: Chen Yu <yu.c.chen@intel.com>
+Date: Fri, 13 Nov 2020 09:59:23 +0800
+Subject: x86/microcode/intel: Check patch signature before saving microcode for early loading
+
+From: Chen Yu <yu.c.chen@intel.com>
+
+commit 1a371e67dc77125736cc56d3a0893f06b75855b6 upstream.
+
+Currently, scan_microcode() leverages microcode_matches() to check
+if the microcode matches the CPU by comparing the family and model.
+However, the processor stepping and flags of the microcode signature
+should also be considered when saving a microcode patch for early
+update.
+
+Use find_matching_signature() in scan_microcode() and get rid of the
+now-unused microcode_matches() which is a good cleanup in itself.
+
+Complete the verification of the patch being saved for early loading in
+save_microcode_patch() directly. This needs to be done there too because
+save_mc_for_early() will call save_microcode_patch() too.
+
+The second reason why this needs to be done is because the loader still
+tries to support, at least hypothetically, mixed-steppings systems and
+thus adds all patches to the cache that belong to the same CPU model
+albeit with different steppings.
+
+For example:
+
+ microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6
+ microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23
+ microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27
+ microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23
+ microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27
+ microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23
+
+The patch which is being saved for early loading, however, can only be
+the one which fits the CPU this runs on so do the signature verification
+before saving.
+
+ [ bp: Do signature verification in save_microcode_patch()
+ and rewrite commit message. ]
+
+Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
+Signed-off-by: Chen Yu <yu.c.chen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535
+Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/microcode/intel.c | 63 +++++-----------------------------
+ 1 file changed, 10 insertions(+), 53 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc,
+ return find_matching_signature(mc, csig, cpf);
+ }
+
+-/*
+- * Given CPU signature and a microcode patch, this function finds if the
+- * microcode patch has matching family and model with the CPU.
+- *
+- * %true - if there's a match
+- * %false - otherwise
+- */
+-static bool microcode_matches(struct microcode_header_intel *mc_header,
+- unsigned long sig)
+-{
+- unsigned long total_size = get_totalsize(mc_header);
+- unsigned long data_size = get_datasize(mc_header);
+- struct extended_sigtable *ext_header;
+- unsigned int fam_ucode, model_ucode;
+- struct extended_signature *ext_sig;
+- unsigned int fam, model;
+- int ext_sigcount, i;
+-
+- fam = x86_family(sig);
+- model = x86_model(sig);
+-
+- fam_ucode = x86_family(mc_header->sig);
+- model_ucode = x86_model(mc_header->sig);
+-
+- if (fam == fam_ucode && model == model_ucode)
+- return true;
+-
+- /* Look for ext. headers: */
+- if (total_size <= data_size + MC_HEADER_SIZE)
+- return false;
+-
+- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE;
+- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+- ext_sigcount = ext_header->count;
+-
+- for (i = 0; i < ext_sigcount; i++) {
+- fam_ucode = x86_family(ext_sig->sig);
+- model_ucode = x86_model(ext_sig->sig);
+-
+- if (fam == fam_ucode && model == model_ucode)
+- return true;
+-
+- ext_sig++;
+- }
+- return false;
+-}
+-
+ static struct ucode_patch *memdup_patch(void *data, unsigned int size)
+ {
+ struct ucode_patch *p;
+@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(
+ return p;
+ }
+
+-static void save_microcode_patch(void *data, unsigned int size)
++static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size)
+ {
+ struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+ struct ucode_patch *iter, *tmp, *p = NULL;
+@@ -210,6 +163,9 @@ static void save_microcode_patch(void *d
+ if (!p)
+ return;
+
++ if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf))
++ return;
++
+ /*
+ * Save for early loading. On 32-bit, that needs to be a physical
+ * address as the APs are running from physical addresses, before
+@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size,
+
+ size -= mc_size;
+
+- if (!microcode_matches(mc_header, uci->cpu_sig.sig)) {
++ if (!find_matching_signature(data, uci->cpu_sig.sig,
++ uci->cpu_sig.pf)) {
+ data += mc_size;
+ continue;
+ }
+
+ if (save) {
+- save_microcode_patch(data, mc_size);
++ save_microcode_patch(uci, data, mc_size);
+ goto next;
+ }
+
+@@ -483,14 +440,14 @@ static void show_saved_mc(void)
+ * Save this microcode patch. It will be loaded early when a CPU is
+ * hot-added or resumes.
+ */
+-static void save_mc_for_early(u8 *mc, unsigned int size)
++static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
+ {
+ /* Synchronization during CPU hotplug. */
+ static DEFINE_MUTEX(x86_cpu_microcode_mutex);
+
+ mutex_lock(&x86_cpu_microcode_mutex);
+
+- save_microcode_patch(mc, size);
++ save_microcode_patch(uci, mc, size);
+ show_saved_mc();
+
+ mutex_unlock(&x86_cpu_microcode_mutex);
+@@ -934,7 +891,7 @@ static enum ucode_state generic_load_mic
+ * permanent memory. So it will be loaded early when a CPU is hot added
+ * or resumes.
+ */
+- save_mc_for_early(new_mc, new_mc_size);
++ save_mc_for_early(uci, new_mc, new_mc_size);
+
+ pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
--- /dev/null
+From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Mon, 16 Nov 2020 01:38:59 -0800
+Subject: xtensa: disable preemption around cache alias management calls
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 upstream.
+
+Although cache alias management calls set up and tear down TLB entries
+and fast_second_level_miss is able to restore TLB entry should it be
+evicted they absolutely cannot preempt each other because they use the
+same TLBTEMP area for different purposes.
+Disable preemption around all cache alias management calls to enforce
+that.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/mm/cache.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/xtensa/mm/cache.c
++++ b/arch/xtensa/mm/cache.c
+@@ -71,8 +71,10 @@ static inline void kmap_invalidate_coher
+ kvaddr = TLBTEMP_BASE_1 +
+ (page_to_phys(page) & DCACHE_ALIAS_MASK);
+
++ preempt_disable();
+ __invalidate_dcache_page_alias(kvaddr,
+ page_to_phys(page));
++ preempt_enable();
+ }
+ }
+ }
+@@ -157,6 +159,7 @@ void flush_dcache_page(struct page *page
+ if (!alias && !mapping)
+ return;
+
++ preempt_disable();
+ virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
+ __flush_invalidate_dcache_page_alias(virt, phys);
+
+@@ -167,6 +170,7 @@ void flush_dcache_page(struct page *page
+
+ if (mapping)
+ __invalidate_icache_page_alias(virt, phys);
++ preempt_enable();
+ }
+
+ /* There shouldn't be an entry in the cache for this page anymore. */
+@@ -200,8 +204,10 @@ void local_flush_cache_page(struct vm_ar
+ unsigned long phys = page_to_phys(pfn_to_page(pfn));
+ unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK);
+
++ preempt_disable();
+ __flush_invalidate_dcache_page_alias(virt, phys);
+ __invalidate_icache_page_alias(virt, phys);
++ preempt_enable();
+ }
+ EXPORT_SYMBOL(local_flush_cache_page);
+
+@@ -228,11 +234,13 @@ update_mmu_cache(struct vm_area_struct *
+ unsigned long phys = page_to_phys(page);
+ unsigned long tmp;
+
++ preempt_disable();
+ tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
+ __flush_invalidate_dcache_page_alias(tmp, phys);
+ tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
+ __flush_invalidate_dcache_page_alias(tmp, phys);
+ __invalidate_icache_page_alias(tmp, phys);
++ preempt_enable();
+
+ clear_bit(PG_arch_1, &page->flags);
+ }
+@@ -266,7 +274,9 @@ void copy_to_user_page(struct vm_area_st
+
+ if (alias) {
+ unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
++ preempt_disable();
+ __flush_invalidate_dcache_page_alias(t, phys);
++ preempt_enable();
+ }
+
+ /* Copy data */
+@@ -281,9 +291,11 @@ void copy_to_user_page(struct vm_area_st
+ if (alias) {
+ unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+
++ preempt_disable();
+ __flush_invalidate_dcache_range((unsigned long) dst, len);
+ if ((vma->vm_flags & VM_EXEC) != 0)
+ __invalidate_icache_page_alias(t, phys);
++ preempt_enable();
+
+ } else if ((vma->vm_flags & VM_EXEC) != 0) {
+ __flush_dcache_range((unsigned long)dst,len);
+@@ -305,7 +317,9 @@ extern void copy_from_user_page(struct v
+
+ if (alias) {
+ unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
++ preempt_disable();
+ __flush_invalidate_dcache_page_alias(t, phys);
++ preempt_enable();
+ }
+
+ memcpy(dst, src, len);
--- /dev/null
+From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Mon, 16 Nov 2020 01:25:56 -0800
+Subject: xtensa: fix TLBTEMP area placement
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 481535c5b41d191b22775a6873de5ec0e1cdced1 upstream.
+
+fast_second_level_miss handler for the TLBTEMP area has an assumption
+that page table directory entry for the TLBTEMP address range is 0. For
+it to be true the TLBTEMP area must be aligned to 4MB boundary and not
+share its 4MB region with anything that may use a page table. This is
+not true currently: TLBTEMP shares space with vmalloc space which
+results in the following kinds of runtime errors when
+fast_second_level_miss loads page table directory entry for the vmalloc
+space instead of fixing up the TLBTEMP area:
+
+ Unable to handle kernel paging request at virtual address c7ff0e00
+ pc = d0009275, ra = 90009478
+ Oops: sig: 9 [#1] PREEMPT
+ CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58
+ Workqueue: xprtiod xs_stream_data_receive_workfn
+ a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000
+ a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90
+ pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00
+ lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010
+ Call Trace:
+ xs_stream_data_receive_workfn+0x43c/0x770
+ process_one_work+0x1a1/0x324
+ worker_thread+0x1cc/0x3c0
+ kthread+0x10d/0x124
+ ret_from_kernel_thread+0xc/0x18
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/xtensa/mmu.rst | 9 ++++++---
+ arch/xtensa/include/asm/pgtable.h | 2 +-
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/Documentation/xtensa/mmu.rst
++++ b/Documentation/xtensa/mmu.rst
+@@ -82,7 +82,8 @@ Default MMUv2-compatible layout::
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+- | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE
++ +------------------+
++ | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+@@ -124,7 +125,8 @@ Default MMUv2-compatible layout::
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+- | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE
++ +------------------+
++ | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+@@ -167,7 +169,8 @@ Default MMUv2-compatible layout::
+ +------------------+
+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
+ +------------------+ VMALLOC_END
+- | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE
++ +------------------+
++ | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE
+ | remap area 1 |
+ +------------------+
+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
+--- a/arch/xtensa/include/asm/pgtable.h
++++ b/arch/xtensa/include/asm/pgtable.h
+@@ -70,7 +70,7 @@
+ */
+ #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000)
+ #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF)
+-#define TLBTEMP_BASE_1 (VMALLOC_END + 1)
++#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000)
+ #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)
+ #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE
+ #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE)