From 58c0c99a20c50cfc313ea25a0e72b720283143fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Nov 2020 10:56:51 +0100 Subject: [PATCH] 5.4-stable patches added patches: drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch drm-i915-handle-max_bpc-16.patch mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch mac80211-minstrel-fix-tx-status-processing-corner-case.patch mac80211-minstrel-remove-deferred-sampling-code.patch mm-memcg-slab-fix-root-memcg-vmstats.patch mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch ptrace-set-pf_superpriv-when-checking-capability.patch s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch seccomp-set-pf_superpriv-when-checking-capability.patch x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch xtensa-disable-preemption-around-cache-alias-management-calls.patch xtensa-fix-tlbtemp-area-placement.patch --- ...of-vmbus_connect_cpu-if-disconnected.patch | 67 +++++++ ...lay-add-missing-pflip-irq-for-dcn2.0.patch | 34 ++++ queue-5.4/drm-i915-handle-max_bpc-16.patch | 57 ++++++ ...-in-sta_info_insert_finish-on-errors.patch | 72 ++++++++ ...fix-tx-status-processing-corner-case.patch | 37 ++++ ...nstrel-remove-deferred-sampling-code.patch | 108 +++++++++++ ...mm-memcg-slab-fix-root-memcg-vmstats.patch | 62 +++++++ ...boost-for-atomic-order-0-allocations.patch | 113 ++++++++++++ ...vm_mm-after-calling-handle_userfault.patch | 159 ++++++++++++++++ ...mode-for-byt-based-intel-controllers.patch | 63 +++++++ ...f_superpriv-when-checking-capability.patch | 86 +++++++++ ...ix-file-permission-for-cpum_sfb_size.patch | 43 +++++ ...pointer-dereference-for-erp-requests.patch | 47 +++++ ...f_superpriv-when-checking-capability.patch | 56 ++++++ queue-5.4/series | 17 ++ ...e-saving-microcode-for-early-loading.patch | 173 ++++++++++++++++++ ...-around-cache-alias-management-calls.patch | 110 +++++++++++ .../xtensa-fix-tlbtemp-area-placement.patch | 86 +++++++++ 18 files changed, 1390 insertions(+) create mode 100644 queue-5.4/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch create mode 100644 queue-5.4/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch create mode 100644 queue-5.4/drm-i915-handle-max_bpc-16.patch create mode 100644 queue-5.4/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch create mode 100644 queue-5.4/mac80211-minstrel-fix-tx-status-processing-corner-case.patch create mode 100644 queue-5.4/mac80211-minstrel-remove-deferred-sampling-code.patch create mode 100644 queue-5.4/mm-memcg-slab-fix-root-memcg-vmstats.patch create mode 100644 queue-5.4/mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch create mode 100644 queue-5.4/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch create mode 100644 queue-5.4/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch create mode 100644 queue-5.4/ptrace-set-pf_superpriv-when-checking-capability.patch create mode 100644 queue-5.4/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch create mode 100644 queue-5.4/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch create mode 100644 queue-5.4/seccomp-set-pf_superpriv-when-checking-capability.patch create mode 100644 queue-5.4/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch create mode 100644 queue-5.4/xtensa-disable-preemption-around-cache-alias-management-calls.patch create mode 100644 queue-5.4/xtensa-fix-tlbtemp-area-placement.patch diff --git a/queue-5.4/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch b/queue-5.4/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch new file mode 100644 index 00000000000..86326314233 --- /dev/null +++ b/queue-5.4/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch @@ -0,0 +1,67 @@ +From 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 Mon Sep 17 00:00:00 2001 +From: Chris Co +Date: Tue, 10 Nov 2020 19:01:18 +0000 +Subject: Drivers: hv: vmbus: Allow cleanup of VMBUS_CONNECT_CPU if disconnected + +From: Chris Co + +commit 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 upstream. + +When invoking kexec() on a Linux guest running on a Hyper-V host, the +kernel panics. + + RIP: 0010:cpuhp_issue_call+0x137/0x140 + Call Trace: + __cpuhp_remove_state_cpuslocked+0x99/0x100 + __cpuhp_remove_state+0x1c/0x30 + hv_kexec_handler+0x23/0x30 [hv_vmbus] + hv_machine_shutdown+0x1e/0x30 + machine_shutdown+0x10/0x20 + kernel_kexec+0x6d/0x96 + __do_sys_reboot+0x1ef/0x230 + __x64_sys_reboot+0x1d/0x20 + do_syscall_64+0x6b/0x3d8 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +This was due to hv_synic_cleanup() callback returning -EBUSY to +cpuhp_issue_call() when tearing down the VMBUS_CONNECT_CPU, even +if the vmbus_connection.conn_state = DISCONNECTED. hv_synic_cleanup() +should succeed in the case where vmbus_connection.conn_state +is DISCONNECTED. + +Fix is to add an extra condition to test for +vmbus_connection.conn_state == CONNECTED on the VMBUS_CONNECT_CPU and +only return early if true. This way the kexec() path can still shut +everything down while preserving the initial behavior of preventing +CPU offlining on the VMBUS_CONNECT_CPU while the VM is running. + +Fixes: 8a857c55420f29 ("Drivers: hv: vmbus: Always handle the VMBus messages on CPU0") +Signed-off-by: Chris Co +Reviewed-by: Andrea Parri (Microsoft) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201110190118.15596-1-chrco@linux.microsoft.com +Signed-off-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hv/hv.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -251,9 +251,13 @@ int hv_synic_cleanup(unsigned int cpu) + + /* + * Hyper-V does not provide a way to change the connect CPU once +- * it is set; we must prevent the connect CPU from going offline. ++ * it is set; we must prevent the connect CPU from going offline ++ * while the VM is running normally. But in the panic or kexec() ++ * path where the vmbus is already disconnected, the CPU must be ++ * allowed to shut down. + */ +- if (cpu == VMBUS_CONNECT_CPU) ++ if (cpu == VMBUS_CONNECT_CPU && ++ vmbus_connection.conn_state == CONNECTED) + return -EBUSY; + + /* diff --git a/queue-5.4/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch b/queue-5.4/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch new file mode 100644 index 00000000000..60fecce5f93 --- /dev/null +++ b/queue-5.4/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch @@ -0,0 +1,34 @@ +From 728321e53045d2668bf2b8627a8d61bc2c480d3b Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Fri, 13 Nov 2020 02:21:19 -0500 +Subject: drm/amd/display: Add missing pflip irq for dcn2.0 + +From: Alex Deucher + +commit 728321e53045d2668bf2b8627a8d61bc2c480d3b upstream. + +If we have more than 4 displays we will run +into dummy irq calls or flip timout issues. + +Reviewed-by: Nicholas Kazlauskas +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c ++++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c +@@ -299,8 +299,8 @@ irq_source_info_dcn20[DAL_IRQ_SOURCES_NU + pflip_int_entry(1), + pflip_int_entry(2), + pflip_int_entry(3), +- [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), +- [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), ++ pflip_int_entry(4), ++ pflip_int_entry(5), + [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), + gpio_pad_int_entry(0), + gpio_pad_int_entry(1), diff --git a/queue-5.4/drm-i915-handle-max_bpc-16.patch b/queue-5.4/drm-i915-handle-max_bpc-16.patch new file mode 100644 index 00000000000..cc521ab6255 --- /dev/null +++ b/queue-5.4/drm-i915-handle-max_bpc-16.patch @@ -0,0 +1,57 @@ +From d2e3fce9ddafe689c6f7cb355f23560637e30b9d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Tue, 10 Nov 2020 23:04:47 +0200 +Subject: drm/i915: Handle max_bpc==16 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit d2e3fce9ddafe689c6f7cb355f23560637e30b9d upstream. + +EDID can declare the maximum supported bpc up to 16, +and apparently there are displays that do so. Currently +we assume 12 bpc is tha max. Fix the assumption and +toss in a MISSING_CASE() for any other value we don't +expect to see. + +This fixes modesets with a display with EDID max bpc > 12. +Previously any modeset would just silently fail on platforms +that didn't otherwise limit this via the max_bpc property. +In particular we don't add the max_bpc property to HDMI +ports on gmch platforms, and thus we would see the raw +max_bpc coming from the EDID. + +I suppose we could already adjust this to also allow 16bpc, +but seeing as no current platform supports that there is +little point. + +Cc: stable@vger.kernel.org +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2632 +Signed-off-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20201110210447.27454-1-ville.syrjala@linux.intel.com +Reviewed-by: José Roberto de Souza +(cherry picked from commit 2ca5a7b85b0c2b97ef08afbd7799b022e29f192e) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/display/intel_display.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/display/intel_display.c ++++ b/drivers/gpu/drm/i915/display/intel_display.c +@@ -11893,10 +11893,11 @@ compute_sink_pipe_bpp(const struct drm_c + case 10 ... 11: + bpp = 10 * 3; + break; +- case 12: ++ case 12 ... 16: + bpp = 12 * 3; + break; + default: ++ MISSING_CASE(conn_state->max_bpc); + return -EINVAL; + } + diff --git a/queue-5.4/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch b/queue-5.4/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch new file mode 100644 index 00000000000..234f584fb69 --- /dev/null +++ b/queue-5.4/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch @@ -0,0 +1,72 @@ +From 7bc40aedf24d31d8bea80e1161e996ef4299fb10 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Thu, 12 Nov 2020 11:22:04 +0100 +Subject: mac80211: free sta in sta_info_insert_finish() on errors + +From: Johannes Berg + +commit 7bc40aedf24d31d8bea80e1161e996ef4299fb10 upstream. + +If sta_info_insert_finish() fails, we currently keep the station +around and free it only in the caller, but there's only one such +caller and it always frees it immediately. + +As syzbot found, another consequence of this split is that we can +put things that sleep only into __cleanup_single_sta() and not in +sta_info_free(), but this is the only place that requires such of +sta_info_free() now. + +Change this to free the station in sta_info_insert_finish(), in +which case we can still sleep. This will also let us unify the +cleanup code later. + +Cc: stable@vger.kernel.org +Fixes: dcd479e10a05 ("mac80211: always wind down STA state") +Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com +Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com +Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com +Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com +Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com +Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/sta_info.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -688,7 +688,7 @@ static int sta_info_insert_finish(struct + out_drop_sta: + local->num_sta--; + synchronize_net(); +- __cleanup_single_sta(sta); ++ cleanup_single_sta(sta); + out_err: + mutex_unlock(&local->sta_mtx); + kfree(sinfo); +@@ -707,19 +707,13 @@ int sta_info_insert_rcu(struct sta_info + + err = sta_info_insert_check(sta); + if (err) { ++ sta_info_free(local, sta); + mutex_unlock(&local->sta_mtx); + rcu_read_lock(); +- goto out_free; ++ return err; + } + +- err = sta_info_insert_finish(sta); +- if (err) +- goto out_free; +- +- return 0; +- out_free: +- sta_info_free(local, sta); +- return err; ++ return sta_info_insert_finish(sta); + } + + int sta_info_insert(struct sta_info *sta) diff --git a/queue-5.4/mac80211-minstrel-fix-tx-status-processing-corner-case.patch b/queue-5.4/mac80211-minstrel-fix-tx-status-processing-corner-case.patch new file mode 100644 index 00000000000..5b684e405f5 --- /dev/null +++ b/queue-5.4/mac80211-minstrel-fix-tx-status-processing-corner-case.patch @@ -0,0 +1,37 @@ +From b2911a84396f72149dce310a3b64d8948212c1b3 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Wed, 11 Nov 2020 19:33:59 +0100 +Subject: mac80211: minstrel: fix tx status processing corner case + +From: Felix Fietkau + +commit b2911a84396f72149dce310a3b64d8948212c1b3 upstream. + +Some drivers fill the status rate list without setting the rate index after +the final rate to -1. minstrel_ht already deals with this, but minstrel +doesn't, which causes it to get stuck at the lowest rate on these drivers. + +Fix this by checking the count as well. + +Cc: stable@vger.kernel.org +Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") +Signed-off-by: Felix Fietkau +Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/rc80211_minstrel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mac80211/rc80211_minstrel.c ++++ b/net/mac80211/rc80211_minstrel.c +@@ -270,7 +270,7 @@ minstrel_tx_status(void *priv, struct ie + success = !!(info->flags & IEEE80211_TX_STAT_ACK); + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { +- if (ar[i].idx < 0) ++ if (ar[i].idx < 0 || !ar[i].count) + break; + + ndx = rix_to_ndx(mi, ar[i].idx); diff --git a/queue-5.4/mac80211-minstrel-remove-deferred-sampling-code.patch b/queue-5.4/mac80211-minstrel-remove-deferred-sampling-code.patch new file mode 100644 index 00000000000..46d6ff41bfb --- /dev/null +++ b/queue-5.4/mac80211-minstrel-remove-deferred-sampling-code.patch @@ -0,0 +1,108 @@ +From 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Wed, 11 Nov 2020 19:33:58 +0100 +Subject: mac80211: minstrel: remove deferred sampling code + +From: Felix Fietkau + +commit 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d upstream. + +Deferring sampling attempts to the second stage has some bad interactions +with drivers that process the rate table in hardware and use the probe flag +to indicate probing packets (e.g. most mt76 drivers). On affected drivers +it can lead to probing not working at all. + +If the link conditions turn worse, it might not be such a good idea to +do a lot of sampling for lower rates in this case. + +Fix this by simply skipping the sample attempt instead of deferring it, +but keep the checks that would allow it to be sampled if it was skipped +too often, but only if it has less than 95% success probability. + +Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing +packets. + +Cc: stable@vger.kernel.org +Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") +Signed-off-by: Felix Fietkau +Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/rc80211_minstrel.c | 25 ++++--------------------- + net/mac80211/rc80211_minstrel.h | 1 - + 2 files changed, 4 insertions(+), 22 deletions(-) + +--- a/net/mac80211/rc80211_minstrel.c ++++ b/net/mac80211/rc80211_minstrel.c +@@ -283,12 +283,6 @@ minstrel_tx_status(void *priv, struct ie + mi->r[ndx].stats.success += success; + } + +- if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) +- mi->sample_packets++; +- +- if (mi->sample_deferred > 0) +- mi->sample_deferred--; +- + if (time_after(jiffies, mi->last_stats_update + + (mp->update_interval * HZ) / 1000)) + minstrel_update_stats(mp, mi); +@@ -363,7 +357,7 @@ minstrel_get_rate(void *priv, struct iee + return; + + delta = (mi->total_packets * sampling_ratio / 100) - +- (mi->sample_packets + mi->sample_deferred / 2); ++ mi->sample_packets; + + /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; +@@ -372,7 +366,6 @@ minstrel_get_rate(void *priv, struct iee + return; + + if (mi->total_packets >= 10000) { +- mi->sample_deferred = 0; + mi->sample_packets = 0; + mi->total_packets = 0; + } else if (delta > mi->n_rates * 2) { +@@ -397,19 +390,8 @@ minstrel_get_rate(void *priv, struct iee + * rate sampling method should be used. + * Respect such rates that are not sampled for 20 interations. + */ +- if (mrr_capable && +- msr->perfect_tx_time > mr->perfect_tx_time && +- msr->stats.sample_skipped < 20) { +- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark +- * packets that have the sampling rate deferred to the +- * second MRR stage. Increase the sample counter only +- * if the deferred sample rate was actually used. +- * Use the sample_deferred counter to make sure that +- * the sampling is not done in large bursts */ +- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; +- rate++; +- mi->sample_deferred++; +- } else { ++ if (msr->perfect_tx_time < mr->perfect_tx_time || ++ msr->stats.sample_skipped >= 20) { + if (!msr->sample_limit) + return; + +@@ -429,6 +411,7 @@ minstrel_get_rate(void *priv, struct iee + + rate->idx = mi->r[ndx].rix; + rate->count = minstrel_get_retry_count(&mi->r[ndx], info); ++ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } + + +--- a/net/mac80211/rc80211_minstrel.h ++++ b/net/mac80211/rc80211_minstrel.h +@@ -79,7 +79,6 @@ struct minstrel_sta_info { + u8 max_prob_rate; + unsigned int total_packets; + unsigned int sample_packets; +- int sample_deferred; + + unsigned int sample_row; + unsigned int sample_column; diff --git a/queue-5.4/mm-memcg-slab-fix-root-memcg-vmstats.patch b/queue-5.4/mm-memcg-slab-fix-root-memcg-vmstats.patch new file mode 100644 index 00000000000..7a80b78b06a --- /dev/null +++ b/queue-5.4/mm-memcg-slab-fix-root-memcg-vmstats.patch @@ -0,0 +1,62 @@ +From 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Sat, 21 Nov 2020 22:17:12 -0800 +Subject: mm: memcg/slab: fix root memcg vmstats + +From: Muchun Song + +commit 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb upstream. + +If we reparent the slab objects to the root memcg, when we free the slab +object, we need to update the per-memcg vmstats to keep it correct for +the root memcg. Now this at least affects the vmstat of +NR_KERNEL_STACK_KB for !CONFIG_VMAP_STACK when the thread stack size is +smaller than the PAGE_SIZE. + +David said: + "I assume that without this fix that the root memcg's vmstat would + always be inflated if we reparented" + +Fixes: ec9f02384f60 ("mm: workingset: fix vmstat counters for shadow nodes") +Signed-off-by: Muchun Song +Signed-off-by: Andrew Morton +Reviewed-by: Shakeel Butt +Acked-by: Roman Gushchin +Acked-by: Johannes Weiner +Acked-by: David Rientjes +Cc: Michal Hocko +Cc: Vladimir Davydov +Cc: Christopher Lameter +Cc: Pekka Enberg +Cc: Joonsoo Kim +Cc: Roman Gushchin +Cc: Vlastimil Babka +Cc: Yafang Shao +Cc: Chris Down +Cc: [5.3+] +Link: https://lkml.kernel.org/r/20201110031015.15715-1-songmuchun@bytedance.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -776,8 +776,13 @@ void __mod_lruvec_slab_state(void *p, en + rcu_read_lock(); + memcg = memcg_from_slab_page(page); + +- /* Untracked pages have no memcg, no lruvec. Update only the node */ +- if (!memcg || memcg == root_mem_cgroup) { ++ /* ++ * Untracked pages have no memcg, no lruvec. Update only the ++ * node. If we reparent the slab objects to the root memcg, ++ * when we free the slab object, we need to update the per-memcg ++ * vmstats to keep it correct for the root memcg. ++ */ ++ if (!memcg) { + __mod_node_page_state(pgdat, idx, val); + } else { + lruvec = mem_cgroup_lruvec(pgdat, memcg); diff --git a/queue-5.4/mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch b/queue-5.4/mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch new file mode 100644 index 00000000000..4efcc0f0a67 --- /dev/null +++ b/queue-5.4/mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch @@ -0,0 +1,113 @@ +From f80b08fc44536a311a9f3182e50f318b79076425 Mon Sep 17 00:00:00 2001 +From: Charan Teja Reddy +Date: Thu, 6 Aug 2020 23:25:24 -0700 +Subject: mm, page_alloc: skip ->waternark_boost for atomic order-0 allocations + +From: Charan Teja Reddy + +commit f80b08fc44536a311a9f3182e50f318b79076425 upstream. + +When boosting is enabled, it is observed that rate of atomic order-0 +allocation failures are high due to the fact that free levels in the +system are checked with ->watermark_boost offset. This is not a problem +for sleepable allocations but for atomic allocations which looks like +regression. + +This problem is seen frequently on system setup of Android kernel running +on Snapdragon hardware with 4GB RAM size. When no extfrag event occurred +in the system, ->watermark_boost factor is zero, thus the watermark +configurations in the system are: + + _watermark = ( + [WMARK_MIN] = 1272, --> ~5MB + [WMARK_LOW] = 9067, --> ~36MB + [WMARK_HIGH] = 9385), --> ~38MB + watermark_boost = 0 + +After launching some memory hungry applications in Android which can cause +extfrag events in the system to an extent that ->watermark_boost can be +set to max i.e. default boost factor makes it to 150% of high watermark. + + _watermark = ( + [WMARK_MIN] = 1272, --> ~5MB + [WMARK_LOW] = 9067, --> ~36MB + [WMARK_HIGH] = 9385), --> ~38MB + watermark_boost = 14077, -->~57MB + +With default system configuration, for an atomic order-0 allocation to +succeed, having free memory of ~2MB will suffice. But boosting makes the +min_wmark to ~61MB thus for an atomic order-0 allocation to be successful +system should have minimum of ~23MB of free memory(from calculations of +zone_watermark_ok(), min = 3/4(min/2)). But failures are observed despite +system is having ~20MB of free memory. In the testing, this is +reproducible as early as first 300secs since boot and with furtherlowram +configurations(<2GB) it is observed as early as first 150secs since boot. + +These failures can be avoided by excluding the ->watermark_boost in +watermark caluculations for atomic order-0 allocations. + +[akpm@linux-foundation.org: fix comment grammar, reflow comment] +[charante@codeaurora.org: fix suggested by Mel Gorman] + Link: http://lkml.kernel.org/r/31556793-57b1-1c21-1a9d-22674d9bd938@codeaurora.org + +Signed-off-by: Charan Teja Reddy +Signed-off-by: Andrew Morton +Acked-by: Vlastimil Babka +Cc: Vinayak Menon +Cc: Mel Gorman +Link: http://lkml.kernel.org/r/1589882284-21010-1-git-send-email-charante@codeaurora.org +Signed-off-by: Linus Torvalds +Signed-off-by: Ralph Siemsen +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3484,7 +3484,8 @@ bool zone_watermark_ok(struct zone *z, u + } + + static inline bool zone_watermark_fast(struct zone *z, unsigned int order, +- unsigned long mark, int classzone_idx, unsigned int alloc_flags) ++ unsigned long mark, int classzone_idx, ++ unsigned int alloc_flags, gfp_t gfp_mask) + { + long free_pages = zone_page_state(z, NR_FREE_PAGES); + long cma_pages = 0; +@@ -3505,8 +3506,23 @@ static inline bool zone_watermark_fast(s + if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx]) + return true; + +- return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, +- free_pages); ++ if (__zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, ++ free_pages)) ++ return true; ++ /* ++ * Ignore watermark boosting for GFP_ATOMIC order-0 allocations ++ * when checking the min watermark. The min watermark is the ++ * point where boosting is ignored so that kswapd is woken up ++ * when below the low watermark. ++ */ ++ if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost ++ && ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) { ++ mark = z->_watermark[WMARK_MIN]; ++ return __zone_watermark_ok(z, order, mark, classzone_idx, ++ alloc_flags, free_pages); ++ } ++ ++ return false; + } + + bool zone_watermark_ok_safe(struct zone *z, unsigned int order, +@@ -3647,7 +3663,8 @@ retry: + + mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); + if (!zone_watermark_fast(zone, order, mark, +- ac_classzone_idx(ac), alloc_flags)) { ++ ac_classzone_idx(ac), alloc_flags, ++ gfp_mask)) { + int ret; + + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT diff --git a/queue-5.4/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch b/queue-5.4/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch new file mode 100644 index 00000000000..36b1deddc7a --- /dev/null +++ b/queue-5.4/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch @@ -0,0 +1,159 @@ +From bfe8cc1db02ab243c62780f17fc57f65bde0afe1 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Sat, 21 Nov 2020 22:17:15 -0800 +Subject: mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault() + +From: Gerald Schaefer + +commit bfe8cc1db02ab243c62780f17fc57f65bde0afe1 upstream. + +Alexander reported a syzkaller / KASAN finding on s390, see below for +complete output. + +In do_huge_pmd_anonymous_page(), the pre-allocated pagetable will be +freed in some cases. In the case of userfaultfd_missing(), this will +happen after calling handle_userfault(), which might have released the +mmap_lock. Therefore, the following pte_free(vma->vm_mm, pgtable) will +access an unstable vma->vm_mm, which could have been freed or re-used +already. + +For all architectures other than s390 this will go w/o any negative +impact, because pte_free() simply frees the page and ignores the +passed-in mm. The implementation for SPARC32 would also access +mm->page_table_lock for pte_free(), but there is no THP support in +SPARC32, so the buggy code path will not be used there. + +For s390, the mm->context.pgtable_list is being used to maintain the 2K +pagetable fragments, and operating on an already freed or even re-used +mm could result in various more or less subtle bugs due to list / +pagetable corruption. + +Fix this by calling pte_free() before handle_userfault(), similar to how +it is already done in __do_huge_pmd_anonymous_page() for the WRITE / +non-huge_zero_page case. + +Commit 6b251fc96cf2c ("userfaultfd: call handle_userfault() for +userfaultfd_missing() faults") actually introduced both, the +do_huge_pmd_anonymous_page() and also __do_huge_pmd_anonymous_page() +changes wrt to calling handle_userfault(), but only in the latter case +it put the pte_free() before calling handle_userfault(). + + BUG: KASAN: use-after-free in do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 + Read of size 8 at addr 00000000962d6988 by task syz-executor.0/9334 + + CPU: 1 PID: 9334 Comm: syz-executor.0 Not tainted 5.10.0-rc1-syzkaller-07083-g4c9720875573 #0 + Hardware name: IBM 3906 M04 701 (KVM/Linux) + Call Trace: + do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 + create_huge_pmd mm/memory.c:4256 [inline] + __handle_mm_fault+0xe6e/0x1068 mm/memory.c:4480 + handle_mm_fault+0x288/0x748 mm/memory.c:4607 + do_exception+0x394/0xae0 arch/s390/mm/fault.c:479 + do_dat_exception+0x34/0x80 arch/s390/mm/fault.c:567 + pgm_check_handler+0x1da/0x22c arch/s390/kernel/entry.S:706 + copy_from_user_mvcos arch/s390/lib/uaccess.c:111 [inline] + raw_copy_from_user+0x3a/0x88 arch/s390/lib/uaccess.c:174 + _copy_from_user+0x48/0xa8 lib/usercopy.c:16 + copy_from_user include/linux/uaccess.h:192 [inline] + __do_sys_sigaltstack kernel/signal.c:4064 [inline] + __s390x_sys_sigaltstack+0xc8/0x240 kernel/signal.c:4060 + system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 + + Allocated by task 9334: + slab_alloc_node mm/slub.c:2891 [inline] + slab_alloc mm/slub.c:2899 [inline] + kmem_cache_alloc+0x118/0x348 mm/slub.c:2904 + vm_area_dup+0x9c/0x2b8 kernel/fork.c:356 + __split_vma+0xba/0x560 mm/mmap.c:2742 + split_vma+0xca/0x108 mm/mmap.c:2800 + mlock_fixup+0x4ae/0x600 mm/mlock.c:550 + apply_vma_lock_flags+0x2c6/0x398 mm/mlock.c:619 + do_mlock+0x1aa/0x718 mm/mlock.c:711 + __do_sys_mlock2 mm/mlock.c:738 [inline] + __s390x_sys_mlock2+0x86/0xa8 mm/mlock.c:728 + system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 + + Freed by task 9333: + slab_free mm/slub.c:3142 [inline] + kmem_cache_free+0x7c/0x4b8 mm/slub.c:3158 + __vma_adjust+0x7b2/0x2508 mm/mmap.c:960 + vma_merge+0x87e/0xce0 mm/mmap.c:1209 + userfaultfd_release+0x412/0x6b8 fs/userfaultfd.c:868 + __fput+0x22c/0x7a8 fs/file_table.c:281 + task_work_run+0x200/0x320 kernel/task_work.c:151 + tracehook_notify_resume include/linux/tracehook.h:188 [inline] + do_notify_resume+0x100/0x148 arch/s390/kernel/signal.c:538 + system_call+0xe6/0x28c arch/s390/kernel/entry.S:416 + + The buggy address belongs to the object at 00000000962d6948 which belongs to the cache vm_area_struct of size 200 + The buggy address is located 64 bytes inside of 200-byte region [00000000962d6948, 00000000962d6a10) + The buggy address belongs to the page: page:00000000313a09fe refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x962d6 flags: 0x3ffff00000000200(slab) + raw: 3ffff00000000200 000040000257e080 0000000c0000000c 000000008020ba00 + raw: 0000000000000000 000f001e00000000 ffffffff00000001 0000000096959501 + page dumped because: kasan: bad access detected + page->mem_cgroup:0000000096959501 + + Memory state around the buggy address: + 00000000962d6880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00000000962d6900: 00 fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb + >00000000962d6980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + 00000000962d6a00: fb fb fc fc fc fc fc fc fc fc 00 00 00 00 00 00 + 00000000962d6a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ================================================================== + +Fixes: 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") +Reported-by: Alexander Egorenkov +Signed-off-by: Gerald Schaefer +Signed-off-by: Andrew Morton +Cc: Andrea Arcangeli +Cc: Heiko Carstens +Cc: [4.3+] +Link: https://lkml.kernel.org/r/20201110190329.11920-1-gerald.schaefer@linux.ibm.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -722,7 +722,6 @@ vm_fault_t do_huge_pmd_anonymous_page(st + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; +- bool set; + vm_fault_t ret; + pgtable = pte_alloc_one(vma->vm_mm); + if (unlikely(!pgtable)) +@@ -735,25 +734,25 @@ vm_fault_t do_huge_pmd_anonymous_page(st + } + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + ret = 0; +- set = false; + if (pmd_none(*vmf->pmd)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) { + spin_unlock(vmf->ptl); ++ pte_free(vma->vm_mm, pgtable); + } else if (userfaultfd_missing(vma)) { + spin_unlock(vmf->ptl); ++ pte_free(vma->vm_mm, pgtable); + ret = handle_userfault(vmf, VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + } else { + set_huge_zero_page(pgtable, vma->vm_mm, vma, + haddr, vmf->pmd, zero_page); + spin_unlock(vmf->ptl); +- set = true; + } +- } else ++ } else { + spin_unlock(vmf->ptl); +- if (!set) + pte_free(vma->vm_mm, pgtable); ++ } + return ret; + } + gfp = alloc_hugepage_direct_gfpmask(vma); diff --git a/queue-5.4/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch b/queue-5.4/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch new file mode 100644 index 00000000000..b0095986bf2 --- /dev/null +++ b/queue-5.4/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch @@ -0,0 +1,63 @@ +From 60d53566100abde4acc5504b524bc97f89015690 Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Thu, 12 Nov 2020 15:36:56 +0200 +Subject: mmc: sdhci-pci: Prefer SDR25 timing for High Speed mode for BYT-based Intel controllers + +From: Adrian Hunter + +commit 60d53566100abde4acc5504b524bc97f89015690 upstream. + +A UHS setting of SDR25 can give better results for High Speed mode. +This is because there is no setting corresponding to high speed. Currently +SDHCI sets no value, which means zero which is also the setting for SDR12. +There was an attempt to change this in sdhci.c but it caused problems for +some drivers, so it was reverted and the change was made to sdhci-brcmstb +in commit 2fefc7c5f7d16e ("mmc: sdhci-brcmstb: Fix incorrect switch to HS +mode"). Several other drivers also do this. + +Signed-off-by: Adrian Hunter +Cc: stable@vger.kernel.org # v5.4+ +Link: https://lore.kernel.org/r/20201112133656.20317-1-adrian.hunter@intel.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci-pci-core.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/host/sdhci-pci-core.c ++++ b/drivers/mmc/host/sdhci-pci-core.c +@@ -669,6 +669,15 @@ static void sdhci_intel_set_power(struct + } + } + ++static void sdhci_intel_set_uhs_signaling(struct sdhci_host *host, ++ unsigned int timing) ++{ ++ /* Set UHS timing to SDR25 for High Speed mode */ ++ if (timing == MMC_TIMING_MMC_HS || timing == MMC_TIMING_SD_HS) ++ timing = MMC_TIMING_UHS_SDR25; ++ sdhci_set_uhs_signaling(host, timing); ++} ++ + #define INTEL_HS400_ES_REG 0x78 + #define INTEL_HS400_ES_BIT BIT(0) + +@@ -725,7 +734,7 @@ static const struct sdhci_ops sdhci_inte + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, +- .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + }; + +@@ -735,7 +744,7 @@ static const struct sdhci_ops sdhci_inte + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_cqhci_reset, +- .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + .irq = sdhci_cqhci_irq, + }; diff --git a/queue-5.4/ptrace-set-pf_superpriv-when-checking-capability.patch b/queue-5.4/ptrace-set-pf_superpriv-when-checking-capability.patch new file mode 100644 index 00000000000..ab0efe8b438 --- /dev/null +++ b/queue-5.4/ptrace-set-pf_superpriv-when-checking-capability.patch @@ -0,0 +1,86 @@ +From cf23705244c947151179f929774fabf71e239eee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Fri, 30 Oct 2020 13:38:48 +0100 +Subject: ptrace: Set PF_SUPERPRIV when checking capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit cf23705244c947151179f929774fabf71e239eee upstream. + +Commit 69f594a38967 ("ptrace: do not audit capability check when outputing +/proc/pid/stat") replaced the use of ns_capable() with +has_ns_capability{,_noaudit}() which doesn't set PF_SUPERPRIV. + +Commit 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in +ptrace_has_cap()") replaced has_ns_capability{,_noaudit}() with +security_capable(), which doesn't set PF_SUPERPRIV neither. + +Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a +new ns_capable_noaudit() helper is available. Let's use it! + +As a result, the signature of ptrace_has_cap() is restored to its original one. + +Cc: Christian Brauner +Cc: Eric Paris +Cc: Jann Horn +Cc: Kees Cook +Cc: Oleg Nesterov +Cc: Serge E. Hallyn +Cc: Tyler Hicks +Cc: stable@vger.kernel.org +Fixes: 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") +Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") +Signed-off-by: Mickaël Salaün +Reviewed-by: Jann Horn +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20201030123849.770769-2-mic@digikod.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/ptrace.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct ta + return ret; + } + +-static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, +- unsigned int mode) ++static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) + { +- int ret; +- + if (mode & PTRACE_MODE_NOAUDIT) +- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT); +- else +- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE); +- +- return ret == 0; ++ return ns_capable_noaudit(ns, CAP_SYS_PTRACE); ++ return ns_capable(ns, CAP_SYS_PTRACE); + } + + /* Returns 0 on success, -errno on denial. */ +@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct ta + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) + goto ok; +- if (ptrace_has_cap(cred, tcred->user_ns, mode)) ++ if (ptrace_has_cap(tcred->user_ns, mode)) + goto ok; + rcu_read_unlock(); + return -EPERM; +@@ -345,7 +339,7 @@ ok: + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && +- !ptrace_has_cap(cred, mm->user_ns, mode))) ++ !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; + + return security_ptrace_access_check(task, mode); diff --git a/queue-5.4/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch b/queue-5.4/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch new file mode 100644 index 00000000000..0e555ea88df --- /dev/null +++ b/queue-5.4/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch @@ -0,0 +1,43 @@ +From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001 +From: Thomas Richter +Date: Wed, 11 Nov 2020 16:26:25 +0100 +Subject: s390/cpum_sf.c: fix file permission for cpum_sfb_size + +From: Thomas Richter + +commit 78d732e1f326f74f240d416af9484928303d9951 upstream. + +This file is installed by the s390 CPU Measurement sampling +facility device driver to export supported minimum and +maximum sample buffer sizes. +This file is read by lscpumf tool to display the details +of the device driver capabilities. The lscpumf tool might +be invoked by a non-root user. In this case it does not +print anything because the file contents can not be read. + +Fix this by allowing read access for all users. Reading +the file contents is ok, changing the file contents is +left to the root user only. + +For further reference and details see: + [1] https://github.com/ibm-s390-tools/s390-tools/issues/97 + +Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur") +Cc: # 3.14 +Signed-off-by: Thomas Richter +Acked-by: Sumanth Korikkar +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/perf_cpum_sf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kernel/perf_cpum_sf.c ++++ b/arch/s390/kernel/perf_cpum_sf.c +@@ -2217,4 +2217,4 @@ out: + } + + arch_initcall(init_cpum_sampling_pmu); +-core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); ++core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/queue-5.4/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch b/queue-5.4/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch new file mode 100644 index 00000000000..66c91f50f52 --- /dev/null +++ b/queue-5.4/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch @@ -0,0 +1,47 @@ +From 6f117cb854a44a79898d844e6ae3fd23bd94e786 Mon Sep 17 00:00:00 2001 +From: Stefan Haberland +Date: Mon, 16 Nov 2020 16:23:47 +0100 +Subject: s390/dasd: fix null pointer dereference for ERP requests + +From: Stefan Haberland + +commit 6f117cb854a44a79898d844e6ae3fd23bd94e786 upstream. + +When requeueing all requests on the device request queue to the blocklayer +we might get to an ERP (error recovery) request that is a copy of an +original CQR. + +Those requests do not have blocklayer request information or a pointer to +the dasd_queue set. When trying to access those data it will lead to a +null pointer dereference in dasd_requeue_all_requests(). + +Fix by checking if the request is an ERP request that can simply be +ignored. The blocklayer request will be requeued by the original CQR that +is on the device queue right behind the ERP request. + +Fixes: 9487cfd3430d ("s390/dasd: fix handling of internal requests") +Cc: #4.16 +Signed-off-by: Stefan Haberland +Reviewed-by: Jan Hoeppner +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/block/dasd.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/s390/block/dasd.c ++++ b/drivers/s390/block/dasd.c +@@ -2980,6 +2980,12 @@ static int _dasd_requeue_request(struct + + if (!block) + return -EINVAL; ++ /* ++ * If the request is an ERP request there is nothing to requeue. ++ * This will be done with the remaining original request. ++ */ ++ if (cqr->refers) ++ return 0; + spin_lock_irq(&cqr->dq->lock); + req = (struct request *) cqr->callback_data; + blk_mq_requeue_request(req, false); diff --git a/queue-5.4/seccomp-set-pf_superpriv-when-checking-capability.patch b/queue-5.4/seccomp-set-pf_superpriv-when-checking-capability.patch new file mode 100644 index 00000000000..49c99fc693d --- /dev/null +++ b/queue-5.4/seccomp-set-pf_superpriv-when-checking-capability.patch @@ -0,0 +1,56 @@ +From fb14528e443646dd3fd02df4437fcf5265b66baa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Fri, 30 Oct 2020 13:38:49 +0100 +Subject: seccomp: Set PF_SUPERPRIV when checking capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit fb14528e443646dd3fd02df4437fcf5265b66baa upstream. + +Replace the use of security_capable(current_cred(), ...) with +ns_capable_noaudit() which set PF_SUPERPRIV. + +Since commit 98f368e9e263 ("kernel: Add noaudit variant of +ns_capable()"), a new ns_capable_noaudit() helper is available. Let's +use it! + +Cc: Jann Horn +Cc: Kees Cook +Cc: Tyler Hicks +Cc: Will Drewry +Cc: stable@vger.kernel.org +Fixes: e2cfabdfd075 ("seccomp: add system call filtering using BPF") +Signed-off-by: Mickaël Salaün +Reviewed-by: Jann Horn +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20201030123849.770769-3-mic@digikod.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/seccomp.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -37,7 +37,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -453,8 +453,7 @@ static struct seccomp_filter *seccomp_pr + * behavior of privileged children. + */ + if (!task_no_new_privs(current) && +- security_capable(current_cred(), current_user_ns(), +- CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) ++ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + /* Allocate a new seccomp_filter */ diff --git a/queue-5.4/series b/queue-5.4/series index 9b64d898207..17f4db08e49 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -138,3 +138,20 @@ regulator-pfuze100-limit-pfuze-support-disable-sw-to-pfuze-100-200.patch regulator-fix-memory-leak-with-repeated-set_machine_constraints.patch regulator-avoid-resolve_supply-infinite-recursion.patch regulator-workaround-self-referent-regulators.patch +xtensa-fix-tlbtemp-area-placement.patch +xtensa-disable-preemption-around-cache-alias-management-calls.patch +mac80211-minstrel-remove-deferred-sampling-code.patch +mac80211-minstrel-fix-tx-status-processing-corner-case.patch +mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch +s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch +s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch +drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch +drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch +drm-i915-handle-max_bpc-16.patch +mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch +ptrace-set-pf_superpriv-when-checking-capability.patch +seccomp-set-pf_superpriv-when-checking-capability.patch +x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch +mm-memcg-slab-fix-root-memcg-vmstats.patch +mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch +mm-page_alloc-skip-waternark_boost-for-atomic-order-0-allocations.patch diff --git a/queue-5.4/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch b/queue-5.4/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch new file mode 100644 index 00000000000..dd3b1e8857b --- /dev/null +++ b/queue-5.4/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch @@ -0,0 +1,173 @@ +From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Fri, 13 Nov 2020 09:59:23 +0800 +Subject: x86/microcode/intel: Check patch signature before saving microcode for early loading + +From: Chen Yu + +commit 1a371e67dc77125736cc56d3a0893f06b75855b6 upstream. + +Currently, scan_microcode() leverages microcode_matches() to check +if the microcode matches the CPU by comparing the family and model. +However, the processor stepping and flags of the microcode signature +should also be considered when saving a microcode patch for early +update. + +Use find_matching_signature() in scan_microcode() and get rid of the +now-unused microcode_matches() which is a good cleanup in itself. + +Complete the verification of the patch being saved for early loading in +save_microcode_patch() directly. This needs to be done there too because +save_mc_for_early() will call save_microcode_patch() too. + +The second reason why this needs to be done is because the loader still +tries to support, at least hypothetically, mixed-steppings systems and +thus adds all patches to the cache that belong to the same CPU model +albeit with different steppings. + +For example: + + microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6 + microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23 + microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 + microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23 + microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 + microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23 + +The patch which is being saved for early loading, however, can only be +the one which fits the CPU this runs on so do the signature verification +before saving. + + [ bp: Do signature verification in save_microcode_patch() + and rewrite commit message. ] + +Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") +Signed-off-by: Chen Yu +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535 +Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/intel.c | 63 +++++----------------------------- + 1 file changed, 10 insertions(+), 53 deletions(-) + +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, + return find_matching_signature(mc, csig, cpf); + } + +-/* +- * Given CPU signature and a microcode patch, this function finds if the +- * microcode patch has matching family and model with the CPU. +- * +- * %true - if there's a match +- * %false - otherwise +- */ +-static bool microcode_matches(struct microcode_header_intel *mc_header, +- unsigned long sig) +-{ +- unsigned long total_size = get_totalsize(mc_header); +- unsigned long data_size = get_datasize(mc_header); +- struct extended_sigtable *ext_header; +- unsigned int fam_ucode, model_ucode; +- struct extended_signature *ext_sig; +- unsigned int fam, model; +- int ext_sigcount, i; +- +- fam = x86_family(sig); +- model = x86_model(sig); +- +- fam_ucode = x86_family(mc_header->sig); +- model_ucode = x86_model(mc_header->sig); +- +- if (fam == fam_ucode && model == model_ucode) +- return true; +- +- /* Look for ext. headers: */ +- if (total_size <= data_size + MC_HEADER_SIZE) +- return false; +- +- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; +- ext_sig = (void *)ext_header + EXT_HEADER_SIZE; +- ext_sigcount = ext_header->count; +- +- for (i = 0; i < ext_sigcount; i++) { +- fam_ucode = x86_family(ext_sig->sig); +- model_ucode = x86_model(ext_sig->sig); +- +- if (fam == fam_ucode && model == model_ucode) +- return true; +- +- ext_sig++; +- } +- return false; +-} +- + static struct ucode_patch *memdup_patch(void *data, unsigned int size) + { + struct ucode_patch *p; +@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch( + return p; + } + +-static void save_microcode_patch(void *data, unsigned int size) ++static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) + { + struct microcode_header_intel *mc_hdr, *mc_saved_hdr; + struct ucode_patch *iter, *tmp, *p = NULL; +@@ -210,6 +163,9 @@ static void save_microcode_patch(void *d + if (!p) + return; + ++ if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) ++ return; ++ + /* + * Save for early loading. On 32-bit, that needs to be a physical + * address as the APs are running from physical addresses, before +@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, + + size -= mc_size; + +- if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { ++ if (!find_matching_signature(data, uci->cpu_sig.sig, ++ uci->cpu_sig.pf)) { + data += mc_size; + continue; + } + + if (save) { +- save_microcode_patch(data, mc_size); ++ save_microcode_patch(uci, data, mc_size); + goto next; + } + +@@ -483,14 +440,14 @@ static void show_saved_mc(void) + * Save this microcode patch. It will be loaded early when a CPU is + * hot-added or resumes. + */ +-static void save_mc_for_early(u8 *mc, unsigned int size) ++static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) + { + /* Synchronization during CPU hotplug. */ + static DEFINE_MUTEX(x86_cpu_microcode_mutex); + + mutex_lock(&x86_cpu_microcode_mutex); + +- save_microcode_patch(mc, size); ++ save_microcode_patch(uci, mc, size); + show_saved_mc(); + + mutex_unlock(&x86_cpu_microcode_mutex); +@@ -934,7 +891,7 @@ static enum ucode_state generic_load_mic + * permanent memory. So it will be loaded early when a CPU is hot added + * or resumes. + */ +- save_mc_for_early(new_mc, new_mc_size); ++ save_mc_for_early(uci, new_mc, new_mc_size); + + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); diff --git a/queue-5.4/xtensa-disable-preemption-around-cache-alias-management-calls.patch b/queue-5.4/xtensa-disable-preemption-around-cache-alias-management-calls.patch new file mode 100644 index 00000000000..733fb5ffde2 --- /dev/null +++ b/queue-5.4/xtensa-disable-preemption-around-cache-alias-management-calls.patch @@ -0,0 +1,110 @@ +From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Mon, 16 Nov 2020 01:38:59 -0800 +Subject: xtensa: disable preemption around cache alias management calls + +From: Max Filippov + +commit 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 upstream. + +Although cache alias management calls set up and tear down TLB entries +and fast_second_level_miss is able to restore TLB entry should it be +evicted they absolutely cannot preempt each other because they use the +same TLBTEMP area for different purposes. +Disable preemption around all cache alias management calls to enforce +that. + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/mm/cache.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/arch/xtensa/mm/cache.c ++++ b/arch/xtensa/mm/cache.c +@@ -71,8 +71,10 @@ static inline void kmap_invalidate_coher + kvaddr = TLBTEMP_BASE_1 + + (page_to_phys(page) & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __invalidate_dcache_page_alias(kvaddr, + page_to_phys(page)); ++ preempt_enable(); + } + } + } +@@ -157,6 +159,7 @@ void flush_dcache_page(struct page *page + if (!alias && !mapping) + return; + ++ preempt_disable(); + virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(virt, phys); + +@@ -167,6 +170,7 @@ void flush_dcache_page(struct page *page + + if (mapping) + __invalidate_icache_page_alias(virt, phys); ++ preempt_enable(); + } + + /* There shouldn't be an entry in the cache for this page anymore. */ +@@ -200,8 +204,10 @@ void local_flush_cache_page(struct vm_ar + unsigned long phys = page_to_phys(pfn_to_page(pfn)); + unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(virt, phys); + __invalidate_icache_page_alias(virt, phys); ++ preempt_enable(); + } + EXPORT_SYMBOL(local_flush_cache_page); + +@@ -228,11 +234,13 @@ update_mmu_cache(struct vm_area_struct * + unsigned long phys = page_to_phys(page); + unsigned long tmp; + ++ preempt_disable(); + tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(tmp, phys); + tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(tmp, phys); + __invalidate_icache_page_alias(tmp, phys); ++ preempt_enable(); + + clear_bit(PG_arch_1, &page->flags); + } +@@ -266,7 +274,9 @@ void copy_to_user_page(struct vm_area_st + + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(t, phys); ++ preempt_enable(); + } + + /* Copy data */ +@@ -281,9 +291,11 @@ void copy_to_user_page(struct vm_area_st + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __flush_invalidate_dcache_range((unsigned long) dst, len); + if ((vma->vm_flags & VM_EXEC) != 0) + __invalidate_icache_page_alias(t, phys); ++ preempt_enable(); + + } else if ((vma->vm_flags & VM_EXEC) != 0) { + __flush_dcache_range((unsigned long)dst,len); +@@ -305,7 +317,9 @@ extern void copy_from_user_page(struct v + + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(t, phys); ++ preempt_enable(); + } + + memcpy(dst, src, len); diff --git a/queue-5.4/xtensa-fix-tlbtemp-area-placement.patch b/queue-5.4/xtensa-fix-tlbtemp-area-placement.patch new file mode 100644 index 00000000000..a2f2b5e9ce4 --- /dev/null +++ b/queue-5.4/xtensa-fix-tlbtemp-area-placement.patch @@ -0,0 +1,86 @@ +From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Mon, 16 Nov 2020 01:25:56 -0800 +Subject: xtensa: fix TLBTEMP area placement + +From: Max Filippov + +commit 481535c5b41d191b22775a6873de5ec0e1cdced1 upstream. + +fast_second_level_miss handler for the TLBTEMP area has an assumption +that page table directory entry for the TLBTEMP address range is 0. For +it to be true the TLBTEMP area must be aligned to 4MB boundary and not +share its 4MB region with anything that may use a page table. This is +not true currently: TLBTEMP shares space with vmalloc space which +results in the following kinds of runtime errors when +fast_second_level_miss loads page table directory entry for the vmalloc +space instead of fixing up the TLBTEMP area: + + Unable to handle kernel paging request at virtual address c7ff0e00 + pc = d0009275, ra = 90009478 + Oops: sig: 9 [#1] PREEMPT + CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58 + Workqueue: xprtiod xs_stream_data_receive_workfn + a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000 + a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90 + pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00 + lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010 + Call Trace: + xs_stream_data_receive_workfn+0x43c/0x770 + process_one_work+0x1a1/0x324 + worker_thread+0x1cc/0x3c0 + kthread+0x10d/0x124 + ret_from_kernel_thread+0xc/0x18 + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/xtensa/mmu.rst | 9 ++++++--- + arch/xtensa/include/asm/pgtable.h | 2 +- + 2 files changed, 7 insertions(+), 4 deletions(-) + +--- a/Documentation/xtensa/mmu.rst ++++ b/Documentation/xtensa/mmu.rst +@@ -82,7 +82,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +@@ -124,7 +125,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +@@ -167,7 +169,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +--- a/arch/xtensa/include/asm/pgtable.h ++++ b/arch/xtensa/include/asm/pgtable.h +@@ -70,7 +70,7 @@ + */ + #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000) + #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF) +-#define TLBTEMP_BASE_1 (VMALLOC_END + 1) ++#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000) + #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) + #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE + #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) -- 2.47.3