From: Greg Kroah-Hartman Date: Mon, 23 Nov 2020 10:05:05 +0000 (+0100) Subject: 5.9-stable patches X-Git-Tag: v4.4.246~5 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a1acb334d3007799cdc2274fa740905019595c72;p=thirdparty%2Fkernel%2Fstable-queue.git 5.9-stable patches added patches: blk-cgroup-fix-a-hd_struct-leak-in-blkcg_fill_root_iostats.patch drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch drm-i915-handle-max_bpc-16.patch drm-i915-tgl-fix-media-power-gate-sequence.patch fanotify-fix-logic-of-reporting-name-info-with-watched-parent.patch gfs2-fix-regression-in-freeze_go_sync.patch io_uring-don-t-double-complete-failed-reissue-request.patch mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch mac80211-minstrel-fix-tx-status-processing-corner-case.patch mac80211-minstrel-remove-deferred-sampling-code.patch mm-fix-readahead_page_batch-for-retry-entries.patch mm-memcg-slab-fix-root-memcg-vmstats.patch mm-never-attempt-async-page-lock-if-we-ve-transferred-data-already.patch mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch mmc-sdhci-of-arasan-allow-configuring-zero-tap-values.patch mmc-sdhci-of-arasan-issue-dll-reset-explicitly.patch mmc-sdhci-of-arasan-use-mask-writes-for-tap-delays.patch mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch ptrace-set-pf_superpriv-when-checking-capability.patch s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch s390-fix-system-call-exit-path.patch seccomp-set-pf_superpriv-when-checking-capability.patch x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch xtensa-disable-preemption-around-cache-alias-management-calls.patch xtensa-fix-tlbtemp-area-placement.patch --- diff --git a/queue-5.9/blk-cgroup-fix-a-hd_struct-leak-in-blkcg_fill_root_iostats.patch b/queue-5.9/blk-cgroup-fix-a-hd_struct-leak-in-blkcg_fill_root_iostats.patch new file mode 100644 index 00000000000..c50c5e9b394 --- /dev/null +++ b/queue-5.9/blk-cgroup-fix-a-hd_struct-leak-in-blkcg_fill_root_iostats.patch @@ -0,0 +1,31 @@ +From b7131ee0bac5e5df73e4098e77bbddb3a31d06ff Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Sat, 14 Nov 2020 19:12:46 +0100 +Subject: blk-cgroup: fix a hd_struct leak in blkcg_fill_root_iostats + +From: Christoph Hellwig + +commit b7131ee0bac5e5df73e4098e77bbddb3a31d06ff upstream. + +disk_get_part needs to be paired with a disk_put_part. + +Cc: stable@vger.kernel.org +Fixes: ef45fe470e1 ("blk-cgroup: show global disk stats in root cgroup io.stat") +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-cgroup.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -840,6 +840,7 @@ static void blkcg_fill_root_iostats(void + blkg_iostat_set(&blkg->iostat.cur, &tmp); + u64_stats_update_end(&blkg->iostat.sync); + } ++ disk_put_part(part); + } + } + diff --git a/queue-5.9/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch b/queue-5.9/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch new file mode 100644 index 00000000000..6d102939e23 --- /dev/null +++ b/queue-5.9/drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch @@ -0,0 +1,67 @@ +From 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 Mon Sep 17 00:00:00 2001 +From: Chris Co +Date: Tue, 10 Nov 2020 19:01:18 +0000 +Subject: Drivers: hv: vmbus: Allow cleanup of VMBUS_CONNECT_CPU if disconnected + +From: Chris Co + +commit 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 upstream. + +When invoking kexec() on a Linux guest running on a Hyper-V host, the +kernel panics. + + RIP: 0010:cpuhp_issue_call+0x137/0x140 + Call Trace: + __cpuhp_remove_state_cpuslocked+0x99/0x100 + __cpuhp_remove_state+0x1c/0x30 + hv_kexec_handler+0x23/0x30 [hv_vmbus] + hv_machine_shutdown+0x1e/0x30 + machine_shutdown+0x10/0x20 + kernel_kexec+0x6d/0x96 + __do_sys_reboot+0x1ef/0x230 + __x64_sys_reboot+0x1d/0x20 + do_syscall_64+0x6b/0x3d8 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +This was due to hv_synic_cleanup() callback returning -EBUSY to +cpuhp_issue_call() when tearing down the VMBUS_CONNECT_CPU, even +if the vmbus_connection.conn_state = DISCONNECTED. hv_synic_cleanup() +should succeed in the case where vmbus_connection.conn_state +is DISCONNECTED. + +Fix is to add an extra condition to test for +vmbus_connection.conn_state == CONNECTED on the VMBUS_CONNECT_CPU and +only return early if true. This way the kexec() path can still shut +everything down while preserving the initial behavior of preventing +CPU offlining on the VMBUS_CONNECT_CPU while the VM is running. + +Fixes: 8a857c55420f29 ("Drivers: hv: vmbus: Always handle the VMBus messages on CPU0") +Signed-off-by: Chris Co +Reviewed-by: Andrea Parri (Microsoft) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201110190118.15596-1-chrco@linux.microsoft.com +Signed-off-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hv/hv.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -244,9 +244,13 @@ int hv_synic_cleanup(unsigned int cpu) + + /* + * Hyper-V does not provide a way to change the connect CPU once +- * it is set; we must prevent the connect CPU from going offline. ++ * it is set; we must prevent the connect CPU from going offline ++ * while the VM is running normally. But in the panic or kexec() ++ * path where the vmbus is already disconnected, the CPU must be ++ * allowed to shut down. + */ +- if (cpu == VMBUS_CONNECT_CPU) ++ if (cpu == VMBUS_CONNECT_CPU && ++ vmbus_connection.conn_state == CONNECTED) + return -EBUSY; + + /* diff --git a/queue-5.9/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch b/queue-5.9/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch new file mode 100644 index 00000000000..60fecce5f93 --- /dev/null +++ b/queue-5.9/drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch @@ -0,0 +1,34 @@ +From 728321e53045d2668bf2b8627a8d61bc2c480d3b Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Fri, 13 Nov 2020 02:21:19 -0500 +Subject: drm/amd/display: Add missing pflip irq for dcn2.0 + +From: Alex Deucher + +commit 728321e53045d2668bf2b8627a8d61bc2c480d3b upstream. + +If we have more than 4 displays we will run +into dummy irq calls or flip timout issues. + +Reviewed-by: Nicholas Kazlauskas +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c ++++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c +@@ -299,8 +299,8 @@ irq_source_info_dcn20[DAL_IRQ_SOURCES_NU + pflip_int_entry(1), + pflip_int_entry(2), + pflip_int_entry(3), +- [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), +- [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), ++ pflip_int_entry(4), ++ pflip_int_entry(5), + [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), + gpio_pad_int_entry(0), + gpio_pad_int_entry(1), diff --git a/queue-5.9/drm-i915-handle-max_bpc-16.patch b/queue-5.9/drm-i915-handle-max_bpc-16.patch new file mode 100644 index 00000000000..838af58d25e --- /dev/null +++ b/queue-5.9/drm-i915-handle-max_bpc-16.patch @@ -0,0 +1,57 @@ +From d2e3fce9ddafe689c6f7cb355f23560637e30b9d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Tue, 10 Nov 2020 23:04:47 +0200 +Subject: drm/i915: Handle max_bpc==16 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit d2e3fce9ddafe689c6f7cb355f23560637e30b9d upstream. + +EDID can declare the maximum supported bpc up to 16, +and apparently there are displays that do so. Currently +we assume 12 bpc is tha max. Fix the assumption and +toss in a MISSING_CASE() for any other value we don't +expect to see. + +This fixes modesets with a display with EDID max bpc > 12. +Previously any modeset would just silently fail on platforms +that didn't otherwise limit this via the max_bpc property. +In particular we don't add the max_bpc property to HDMI +ports on gmch platforms, and thus we would see the raw +max_bpc coming from the EDID. + +I suppose we could already adjust this to also allow 16bpc, +but seeing as no current platform supports that there is +little point. + +Cc: stable@vger.kernel.org +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2632 +Signed-off-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20201110210447.27454-1-ville.syrjala@linux.intel.com +Reviewed-by: José Roberto de Souza +(cherry picked from commit 2ca5a7b85b0c2b97ef08afbd7799b022e29f192e) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/display/intel_display.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/display/intel_display.c ++++ b/drivers/gpu/drm/i915/display/intel_display.c +@@ -12819,10 +12819,11 @@ compute_sink_pipe_bpp(const struct drm_c + case 10 ... 11: + bpp = 10 * 3; + break; +- case 12: ++ case 12 ... 16: + bpp = 12 * 3; + break; + default: ++ MISSING_CASE(conn_state->max_bpc); + return -EINVAL; + } + diff --git a/queue-5.9/drm-i915-tgl-fix-media-power-gate-sequence.patch b/queue-5.9/drm-i915-tgl-fix-media-power-gate-sequence.patch new file mode 100644 index 00000000000..7a3b534f644 --- /dev/null +++ b/queue-5.9/drm-i915-tgl-fix-media-power-gate-sequence.patch @@ -0,0 +1,133 @@ +From 85a12d7eb8fe449cf38f1aa9ead5ca744729a98f Mon Sep 17 00:00:00 2001 +From: Rodrigo Vivi +Date: Wed, 11 Nov 2020 09:09:36 -0500 +Subject: drm/i915/tgl: Fix Media power gate sequence. + +From: Rodrigo Vivi + +commit 85a12d7eb8fe449cf38f1aa9ead5ca744729a98f upstream. + +Some media power gates are disabled by default. commit 5d86923060fc +("drm/i915/tgl: Enable VD HCP/MFX sub-pipe power gating") +tried to enable it, but it duplicated an existent register. +So, the main PG setup sequences ended up overwriting it. + +So, let's now merge this to the main PG setup sequence. + +v2: (Chris): s/BIT/REG_BIT, remove useless comment, + remove useless =0, use the right gt, + remove rc6 sequence doubt from commit message. + +Fixes: 5d86923060fc ("drm/i915/tgl: Enable VD HCP/MFX sub-pipe power gating") +Cc: Lucas De Marchi +Cc: stable@vger.kernel.org#v5.5+ +Cc: Dale B Stimson +Signed-off-by: Rodrigo Vivi +Cc: Chris Wilson +Reviewed-by: Chris Wilson +Signed-off-by: Chris Wilson +Link: https://patchwork.freedesktop.org/patch/msgid/20201111072859.1186070-1-rodrigo.vivi@intel.com +(cherry picked from commit 695dc55b573985569259e18f8e6261a77924342b) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gt/intel_rc6.c | 22 +++++++++++++++++----- + drivers/gpu/drm/i915/i915_reg.h | 12 +++++------- + drivers/gpu/drm/i915/intel_pm.c | 13 ------------- + 3 files changed, 22 insertions(+), 25 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_rc6.c ++++ b/drivers/gpu/drm/i915/gt/intel_rc6.c +@@ -56,9 +56,12 @@ static inline void set(struct intel_unco + + static void gen11_rc6_enable(struct intel_rc6 *rc6) + { +- struct intel_uncore *uncore = rc6_to_uncore(rc6); ++ struct intel_gt *gt = rc6_to_gt(rc6); ++ struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; ++ u32 pg_enable; ++ int i; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); +@@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct inte + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); + +- set(uncore, GEN9_PG_ENABLE, +- GEN9_RENDER_PG_ENABLE | +- GEN9_MEDIA_PG_ENABLE | +- GEN11_MEDIA_SAMPLER_PG_ENABLE); ++ pg_enable = ++ GEN9_RENDER_PG_ENABLE | ++ GEN9_MEDIA_PG_ENABLE | ++ GEN11_MEDIA_SAMPLER_PG_ENABLE; ++ ++ if (INTEL_GEN(gt->i915) >= 12) { ++ for (i = 0; i < I915_MAX_VCS; i++) ++ if (HAS_ENGINE(gt, _VCS(i))) ++ pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | ++ VDN_MFX_POWERGATE_ENABLE(i)); ++ } ++ ++ set(uncore, GEN9_PG_ENABLE, pg_enable); + } + + static void gen9_rc6_enable(struct intel_rc6 *rc6) +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -8974,10 +8974,6 @@ enum { + #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) + #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) + +-#define POWERGATE_ENABLE _MMIO(0xa210) +-#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) +-#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) +- + #define GTFIFODBG _MMIO(0x120000) + #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) + #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) +@@ -9117,9 +9113,11 @@ enum { + #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4) + #define GEN9_RENDER_PG_IDLE_HYSTERESIS _MMIO(0xA0C8) + #define GEN9_PG_ENABLE _MMIO(0xA210) +-#define GEN9_RENDER_PG_ENABLE REG_BIT(0) +-#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) +-#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) ++#define GEN9_RENDER_PG_ENABLE REG_BIT(0) ++#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) ++#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) ++#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) ++#define VDN_MFX_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) + #define GEN8_PUSHBUS_CONTROL _MMIO(0xA248) + #define GEN8_PUSHBUS_ENABLE _MMIO(0xA250) + #define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C) +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -7124,23 +7124,10 @@ static void icl_init_clock_gating(struct + + static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) + { +- u32 vd_pg_enable = 0; +- unsigned int i; +- + /* Wa_1409120013:tgl */ + I915_WRITE(ILK_DPFC_CHICKEN, + ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); + +- /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ +- for (i = 0; i < I915_MAX_VCS; i++) { +- if (HAS_ENGINE(&dev_priv->gt, _VCS(i))) +- vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | +- VDN_MFX_POWERGATE_ENABLE(i); +- } +- +- I915_WRITE(POWERGATE_ENABLE, +- I915_READ(POWERGATE_ENABLE) | vd_pg_enable); +- + /* Wa_1409825376:tgl (pre-prod)*/ + if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) + I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | diff --git a/queue-5.9/fanotify-fix-logic-of-reporting-name-info-with-watched-parent.patch b/queue-5.9/fanotify-fix-logic-of-reporting-name-info-with-watched-parent.patch new file mode 100644 index 00000000000..5cfcaec5472 --- /dev/null +++ b/queue-5.9/fanotify-fix-logic-of-reporting-name-info-with-watched-parent.patch @@ -0,0 +1,91 @@ +From 7372e79c9eb9d7034e498721eb2861ae4fdbc618 Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Sun, 8 Nov 2020 12:59:06 +0200 +Subject: fanotify: fix logic of reporting name info with watched parent + +From: Amir Goldstein + +commit 7372e79c9eb9d7034e498721eb2861ae4fdbc618 upstream. + +The victim inode's parent and name info is required when an event +needs to be delivered to a group interested in filename info OR +when the inode's parent is interested in an event on its children. + +Let us call the first condition 'parent_needed' and the second +condition 'parent_interested'. + +In fsnotify_parent(), the condition where the inode's parent is +interested in some events on its children, but not necessarily +interested the specific event is called 'parent_watched'. + +fsnotify_parent() tests the condition (!parent_watched && !parent_needed) +for sending the event without parent and name info, which is correct. + +It then wrongly assumes that parent_watched implies !parent_needed +and tests the condition (parent_watched && !parent_interested) +for sending the event without parent and name info, which is wrong, +because parent may still be needed by some group. + +For example, after initializing a group with FAN_REPORT_DFID_NAME and +adding a FAN_MARK_MOUNT with FAN_OPEN mask, open events on non-directory +children of "testdir" are delivered with file name info. + +After adding another mark to the same group on the parent "testdir" +with FAN_CLOSE|FAN_EVENT_ON_CHILD mask, open events on non-directory +children of "testdir" are no longer delivered with file name info. + +Fix the logic and use auxiliary variables to clarify the conditions. + +Fixes: 9b93f33105f5 ("fsnotify: send event with parent/name info to sb/mount/non-dir marks") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201108105906.8493-1-amir73il@gmail.com +Signed-off-by: Amir Goldstein +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/notify/fsnotify.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/fs/notify/fsnotify.c ++++ b/fs/notify/fsnotify.c +@@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *den + struct inode *inode = d_inode(dentry); + struct dentry *parent; + bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; ++ bool parent_needed, parent_interested; + __u32 p_mask; + struct inode *p_inode = NULL; + struct name_snapshot name; +@@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *den + return 0; + + parent = NULL; +- if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask)) ++ parent_needed = fsnotify_event_needs_parent(inode, mnt, mask); ++ if (!parent_watched && !parent_needed) + goto notify; + + /* Does parent inode care about events on children? */ +@@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *den + + /* + * Include parent/name in notification either if some notification +- * groups require parent info (!parent_watched case) or the parent is +- * interested in this event. ++ * groups require parent info or the parent is interested in this event. + */ +- if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) { ++ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; ++ if (parent_needed || parent_interested) { + /* When notifying parent, child should be passed as data */ + WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); + + /* Notify both parent and child with child name info */ + take_dentry_name_snapshot(&name, dentry); + file_name = &name.name; +- if (parent_watched) ++ if (parent_interested) + mask |= FS_EVENT_ON_CHILD; + } + diff --git a/queue-5.9/gfs2-fix-regression-in-freeze_go_sync.patch b/queue-5.9/gfs2-fix-regression-in-freeze_go_sync.patch new file mode 100644 index 00000000000..7c7625e87e7 --- /dev/null +++ b/queue-5.9/gfs2-fix-regression-in-freeze_go_sync.patch @@ -0,0 +1,60 @@ +From 20b329129009caf1c646152abe09b697227e1c37 Mon Sep 17 00:00:00 2001 +From: Bob Peterson +Date: Wed, 18 Nov 2020 08:54:31 -0500 +Subject: gfs2: Fix regression in freeze_go_sync + +From: Bob Peterson + +commit 20b329129009caf1c646152abe09b697227e1c37 upstream. + +Patch 541656d3a513 ("gfs2: freeze should work on read-only mounts") changed +the check for glock state in function freeze_go_sync() from "gl->gl_state +== LM_ST_SHARED" to "gl->gl_req == LM_ST_EXCLUSIVE". That's wrong and it +regressed gfs2's freeze/thaw mechanism because it caused only the freezing +node (which requests the glock in EX) to queue freeze work. + +All nodes go through this go_sync code path during the freeze to drop their +SHared hold on the freeze glock, allowing the freezing node to acquire it +in EXclusive mode. But all the nodes must freeze access to the file system +locally, so they ALL must queue freeze work. The freeze_work calls +freeze_func, which makes a request to reacquire the freeze glock in SH, +effectively blocking until the thaw from the EX holder. Once thawed, the +freezing node drops its EX hold on the freeze glock, then the (blocked) +freeze_func reacquires the freeze glock in SH again (on all nodes, including +the freezer) so all nodes go back to a thawed state. + +This patch changes the check back to gl_state == LM_ST_SHARED like it was +prior to 541656d3a513. + +Fixes: 541656d3a513 ("gfs2: freeze should work on read-only mounts") +Cc: stable@vger.kernel.org # v5.8+ +Signed-off-by: Bob Peterson +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/glops.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/gfs2/glops.c ++++ b/fs/gfs2/glops.c +@@ -540,7 +540,18 @@ static int freeze_go_sync(struct gfs2_gl + int error = 0; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + +- if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) { ++ /* ++ * We need to check gl_state == LM_ST_SHARED here and not gl_req == ++ * LM_ST_EXCLUSIVE. That's because when any node does a freeze, ++ * all the nodes should have the freeze glock in SH mode and they all ++ * call do_xmote: One for EX and the others for UN. They ALL must ++ * freeze locally, and they ALL must queue freeze work. The freeze_work ++ * calls freeze_func, which tries to reacquire the freeze glock in SH, ++ * effectively waiting for the thaw on the node who holds it in EX. ++ * Once thawed, the work func acquires the freeze glock in ++ * SH and everybody goes back to thawed. ++ */ ++ if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) { + atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); + error = freeze_super(sdp->sd_vfs); + if (error) { diff --git a/queue-5.9/io_uring-don-t-double-complete-failed-reissue-request.patch b/queue-5.9/io_uring-don-t-double-complete-failed-reissue-request.patch new file mode 100644 index 00000000000..388979b7fc3 --- /dev/null +++ b/queue-5.9/io_uring-don-t-double-complete-failed-reissue-request.patch @@ -0,0 +1,35 @@ +From c993df5a688975bf9ce899706ca13d2bc8d6be25 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 17 Nov 2020 07:59:16 -0700 +Subject: io_uring: don't double complete failed reissue request + +From: Jens Axboe + +commit c993df5a688975bf9ce899706ca13d2bc8d6be25 upstream. + +Zorro reports that an xfstest test case is failing, and it turns out that +for the reissue path we can potentially issue a double completion on the +request for the failure path. There's an issue around the retry as well, +but for now, at least just make sure that we handle the error path +correctly. + +Cc: stable@vger.kernel.org +Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") +Reported-by: Zorro Lang +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + fs/io_uring.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2359,7 +2359,6 @@ static bool io_resubmit_prep(struct io_k + } + end_req: + req_set_fail_links(req); +- io_req_complete(req, ret); + return false; + } + #endif diff --git a/queue-5.9/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch b/queue-5.9/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch new file mode 100644 index 00000000000..9223cb9b367 --- /dev/null +++ b/queue-5.9/mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch @@ -0,0 +1,72 @@ +From 7bc40aedf24d31d8bea80e1161e996ef4299fb10 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Thu, 12 Nov 2020 11:22:04 +0100 +Subject: mac80211: free sta in sta_info_insert_finish() on errors + +From: Johannes Berg + +commit 7bc40aedf24d31d8bea80e1161e996ef4299fb10 upstream. + +If sta_info_insert_finish() fails, we currently keep the station +around and free it only in the caller, but there's only one such +caller and it always frees it immediately. + +As syzbot found, another consequence of this split is that we can +put things that sleep only into __cleanup_single_sta() and not in +sta_info_free(), but this is the only place that requires such of +sta_info_free() now. + +Change this to free the station in sta_info_insert_finish(), in +which case we can still sleep. This will also let us unify the +cleanup code later. + +Cc: stable@vger.kernel.org +Fixes: dcd479e10a05 ("mac80211: always wind down STA state") +Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com +Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com +Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com +Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com +Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com +Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/sta_info.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -705,7 +705,7 @@ static int sta_info_insert_finish(struct + out_drop_sta: + local->num_sta--; + synchronize_net(); +- __cleanup_single_sta(sta); ++ cleanup_single_sta(sta); + out_err: + mutex_unlock(&local->sta_mtx); + kfree(sinfo); +@@ -724,19 +724,13 @@ int sta_info_insert_rcu(struct sta_info + + err = sta_info_insert_check(sta); + if (err) { ++ sta_info_free(local, sta); + mutex_unlock(&local->sta_mtx); + rcu_read_lock(); +- goto out_free; ++ return err; + } + +- err = sta_info_insert_finish(sta); +- if (err) +- goto out_free; +- +- return 0; +- out_free: +- sta_info_free(local, sta); +- return err; ++ return sta_info_insert_finish(sta); + } + + int sta_info_insert(struct sta_info *sta) diff --git a/queue-5.9/mac80211-minstrel-fix-tx-status-processing-corner-case.patch b/queue-5.9/mac80211-minstrel-fix-tx-status-processing-corner-case.patch new file mode 100644 index 00000000000..827e0345648 --- /dev/null +++ b/queue-5.9/mac80211-minstrel-fix-tx-status-processing-corner-case.patch @@ -0,0 +1,37 @@ +From b2911a84396f72149dce310a3b64d8948212c1b3 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Wed, 11 Nov 2020 19:33:59 +0100 +Subject: mac80211: minstrel: fix tx status processing corner case + +From: Felix Fietkau + +commit b2911a84396f72149dce310a3b64d8948212c1b3 upstream. + +Some drivers fill the status rate list without setting the rate index after +the final rate to -1. minstrel_ht already deals with this, but minstrel +doesn't, which causes it to get stuck at the lowest rate on these drivers. + +Fix this by checking the count as well. + +Cc: stable@vger.kernel.org +Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") +Signed-off-by: Felix Fietkau +Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/rc80211_minstrel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mac80211/rc80211_minstrel.c ++++ b/net/mac80211/rc80211_minstrel.c +@@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ie + success = !!(info->flags & IEEE80211_TX_STAT_ACK); + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { +- if (ar[i].idx < 0) ++ if (ar[i].idx < 0 || !ar[i].count) + break; + + ndx = rix_to_ndx(mi, ar[i].idx); diff --git a/queue-5.9/mac80211-minstrel-remove-deferred-sampling-code.patch b/queue-5.9/mac80211-minstrel-remove-deferred-sampling-code.patch new file mode 100644 index 00000000000..436cd382ebe --- /dev/null +++ b/queue-5.9/mac80211-minstrel-remove-deferred-sampling-code.patch @@ -0,0 +1,108 @@ +From 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Wed, 11 Nov 2020 19:33:58 +0100 +Subject: mac80211: minstrel: remove deferred sampling code + +From: Felix Fietkau + +commit 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d upstream. + +Deferring sampling attempts to the second stage has some bad interactions +with drivers that process the rate table in hardware and use the probe flag +to indicate probing packets (e.g. most mt76 drivers). On affected drivers +it can lead to probing not working at all. + +If the link conditions turn worse, it might not be such a good idea to +do a lot of sampling for lower rates in this case. + +Fix this by simply skipping the sample attempt instead of deferring it, +but keep the checks that would allow it to be sampled if it was skipped +too often, but only if it has less than 95% success probability. + +Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing +packets. + +Cc: stable@vger.kernel.org +Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") +Signed-off-by: Felix Fietkau +Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/rc80211_minstrel.c | 25 ++++--------------------- + net/mac80211/rc80211_minstrel.h | 1 - + 2 files changed, 4 insertions(+), 22 deletions(-) + +--- a/net/mac80211/rc80211_minstrel.c ++++ b/net/mac80211/rc80211_minstrel.c +@@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ie + mi->r[ndx].stats.success += success; + } + +- if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) +- mi->sample_packets++; +- +- if (mi->sample_deferred > 0) +- mi->sample_deferred--; +- + if (time_after(jiffies, mi->last_stats_update + + mp->update_interval / (mp->new_avg ? 2 : 1))) + minstrel_update_stats(mp, mi); +@@ -367,7 +361,7 @@ minstrel_get_rate(void *priv, struct iee + return; + + delta = (mi->total_packets * sampling_ratio / 100) - +- (mi->sample_packets + mi->sample_deferred / 2); ++ mi->sample_packets; + + /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; +@@ -376,7 +370,6 @@ minstrel_get_rate(void *priv, struct iee + return; + + if (mi->total_packets >= 10000) { +- mi->sample_deferred = 0; + mi->sample_packets = 0; + mi->total_packets = 0; + } else if (delta > mi->n_rates * 2) { +@@ -401,19 +394,8 @@ minstrel_get_rate(void *priv, struct iee + * rate sampling method should be used. + * Respect such rates that are not sampled for 20 interations. + */ +- if (mrr_capable && +- msr->perfect_tx_time > mr->perfect_tx_time && +- msr->stats.sample_skipped < 20) { +- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark +- * packets that have the sampling rate deferred to the +- * second MRR stage. Increase the sample counter only +- * if the deferred sample rate was actually used. +- * Use the sample_deferred counter to make sure that +- * the sampling is not done in large bursts */ +- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; +- rate++; +- mi->sample_deferred++; +- } else { ++ if (msr->perfect_tx_time < mr->perfect_tx_time || ++ msr->stats.sample_skipped >= 20) { + if (!msr->sample_limit) + return; + +@@ -433,6 +415,7 @@ minstrel_get_rate(void *priv, struct iee + + rate->idx = mi->r[ndx].rix; + rate->count = minstrel_get_retry_count(&mi->r[ndx], info); ++ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } + + +--- a/net/mac80211/rc80211_minstrel.h ++++ b/net/mac80211/rc80211_minstrel.h +@@ -126,7 +126,6 @@ struct minstrel_sta_info { + u8 max_prob_rate; + unsigned int total_packets; + unsigned int sample_packets; +- int sample_deferred; + + unsigned int sample_row; + unsigned int sample_column; diff --git a/queue-5.9/mm-fix-readahead_page_batch-for-retry-entries.patch b/queue-5.9/mm-fix-readahead_page_batch-for-retry-entries.patch new file mode 100644 index 00000000000..a4d80a72b12 --- /dev/null +++ b/queue-5.9/mm-fix-readahead_page_batch-for-retry-entries.patch @@ -0,0 +1,66 @@ +From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Sat, 21 Nov 2020 22:17:08 -0800 +Subject: mm: fix readahead_page_batch for retry entries + +From: Matthew Wilcox (Oracle) + +commit 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d upstream. + +Both btrfs and fuse have reported faults caused by seeing a retry entry +instead of the page they were looking for. This was caused by a missing +check in the iterator. + +As can be seen in the below panic log, the accessing 0x402 causes a +panic. In the xarray.h, 0x402 means RETRY_ENTRY. + + BUG: kernel NULL pointer dereference, address: 0000000000000402 + CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 + Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 + RIP: 0010:fuse_readahead+0x152/0x470 [fuse] + Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 + RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 + RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 + RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 + RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 + R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 + FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 + Call Trace: + read_pages+0x83/0x270 + page_cache_readahead_unbounded+0x197/0x230 + generic_file_buffered_read+0x57a/0xa20 + new_sync_read+0x112/0x1a0 + vfs_read+0xf8/0x180 + ksys_read+0x5f/0xe0 + do_syscall_64+0x33/0x80 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 042124cc64c3 ("mm: add new readahead_control API") +Reported-by: David Sterba +Reported-by: Wonhyuk Yang +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Andrew Morton +Cc: +Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org +Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/pagemap.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -794,6 +794,8 @@ static inline unsigned int __readahead_b + xas_set(&xas, rac->_index); + rcu_read_lock(); + xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { ++ if (xas_retry(&xas, page)) ++ continue; + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); + array[i++] = page; diff --git a/queue-5.9/mm-memcg-slab-fix-root-memcg-vmstats.patch b/queue-5.9/mm-memcg-slab-fix-root-memcg-vmstats.patch new file mode 100644 index 00000000000..a61a6715269 --- /dev/null +++ b/queue-5.9/mm-memcg-slab-fix-root-memcg-vmstats.patch @@ -0,0 +1,62 @@ +From 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Sat, 21 Nov 2020 22:17:12 -0800 +Subject: mm: memcg/slab: fix root memcg vmstats + +From: Muchun Song + +commit 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb upstream. + +If we reparent the slab objects to the root memcg, when we free the slab +object, we need to update the per-memcg vmstats to keep it correct for +the root memcg. Now this at least affects the vmstat of +NR_KERNEL_STACK_KB for !CONFIG_VMAP_STACK when the thread stack size is +smaller than the PAGE_SIZE. + +David said: + "I assume that without this fix that the root memcg's vmstat would + always be inflated if we reparented" + +Fixes: ec9f02384f60 ("mm: workingset: fix vmstat counters for shadow nodes") +Signed-off-by: Muchun Song +Signed-off-by: Andrew Morton +Reviewed-by: Shakeel Butt +Acked-by: Roman Gushchin +Acked-by: Johannes Weiner +Acked-by: David Rientjes +Cc: Michal Hocko +Cc: Vladimir Davydov +Cc: Christopher Lameter +Cc: Pekka Enberg +Cc: Joonsoo Kim +Cc: Roman Gushchin +Cc: Vlastimil Babka +Cc: Yafang Shao +Cc: Chris Down +Cc: [5.3+] +Link: https://lkml.kernel.org/r/20201110031015.15715-1-songmuchun@bytedance.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -872,8 +872,13 @@ void __mod_lruvec_slab_state(void *p, en + rcu_read_lock(); + memcg = mem_cgroup_from_obj(p); + +- /* Untracked pages have no memcg, no lruvec. Update only the node */ +- if (!memcg || memcg == root_mem_cgroup) { ++ /* ++ * Untracked pages have no memcg, no lruvec. Update only the ++ * node. If we reparent the slab objects to the root memcg, ++ * when we free the slab object, we need to update the per-memcg ++ * vmstats to keep it correct for the root memcg. ++ */ ++ if (!memcg) { + __mod_node_page_state(pgdat, idx, val); + } else { + lruvec = mem_cgroup_lruvec(memcg, pgdat); diff --git a/queue-5.9/mm-never-attempt-async-page-lock-if-we-ve-transferred-data-already.patch b/queue-5.9/mm-never-attempt-async-page-lock-if-we-ve-transferred-data-already.patch new file mode 100644 index 00000000000..fe023546dd0 --- /dev/null +++ b/queue-5.9/mm-never-attempt-async-page-lock-if-we-ve-transferred-data-already.patch @@ -0,0 +1,62 @@ +From 0abed7c69b956d135cb6d320c350b2adb213e7d8 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 16 Nov 2020 13:36:24 -0700 +Subject: mm: never attempt async page lock if we've transferred data already + +From: Jens Axboe + +commit 0abed7c69b956d135cb6d320c350b2adb213e7d8 upstream. + +We catch the case where we enter generic_file_buffered_read() with data +already transferred, but we also need to be careful not to allow an async +page lock if we're looping transferring data. If not, we could be +returning -EIOCBQUEUED instead of the transferred amount, and it could +result in double waitqueue additions as well. + +Cc: stable@vger.kernel.org # v5.9 +Fixes: 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/filemap.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -2327,10 +2327,15 @@ page_ok: + + page_not_up_to_date: + /* Get exclusive access to the page ... */ +- if (iocb->ki_flags & IOCB_WAITQ) ++ if (iocb->ki_flags & IOCB_WAITQ) { ++ if (written) { ++ put_page(page); ++ goto out; ++ } + error = lock_page_async(page, iocb->ki_waitq); +- else ++ } else { + error = lock_page_killable(page); ++ } + if (unlikely(error)) + goto readpage_error; + +@@ -2373,10 +2378,15 @@ readpage: + } + + if (!PageUptodate(page)) { +- if (iocb->ki_flags & IOCB_WAITQ) ++ if (iocb->ki_flags & IOCB_WAITQ) { ++ if (written) { ++ put_page(page); ++ goto out; ++ } + error = lock_page_async(page, iocb->ki_waitq); +- else ++ } else { + error = lock_page_killable(page); ++ } + + if (unlikely(error)) + goto readpage_error; diff --git a/queue-5.9/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch b/queue-5.9/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch new file mode 100644 index 00000000000..c974d39fd09 --- /dev/null +++ b/queue-5.9/mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch @@ -0,0 +1,159 @@ +From bfe8cc1db02ab243c62780f17fc57f65bde0afe1 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Sat, 21 Nov 2020 22:17:15 -0800 +Subject: mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault() + +From: Gerald Schaefer + +commit bfe8cc1db02ab243c62780f17fc57f65bde0afe1 upstream. + +Alexander reported a syzkaller / KASAN finding on s390, see below for +complete output. + +In do_huge_pmd_anonymous_page(), the pre-allocated pagetable will be +freed in some cases. In the case of userfaultfd_missing(), this will +happen after calling handle_userfault(), which might have released the +mmap_lock. Therefore, the following pte_free(vma->vm_mm, pgtable) will +access an unstable vma->vm_mm, which could have been freed or re-used +already. + +For all architectures other than s390 this will go w/o any negative +impact, because pte_free() simply frees the page and ignores the +passed-in mm. The implementation for SPARC32 would also access +mm->page_table_lock for pte_free(), but there is no THP support in +SPARC32, so the buggy code path will not be used there. + +For s390, the mm->context.pgtable_list is being used to maintain the 2K +pagetable fragments, and operating on an already freed or even re-used +mm could result in various more or less subtle bugs due to list / +pagetable corruption. + +Fix this by calling pte_free() before handle_userfault(), similar to how +it is already done in __do_huge_pmd_anonymous_page() for the WRITE / +non-huge_zero_page case. + +Commit 6b251fc96cf2c ("userfaultfd: call handle_userfault() for +userfaultfd_missing() faults") actually introduced both, the +do_huge_pmd_anonymous_page() and also __do_huge_pmd_anonymous_page() +changes wrt to calling handle_userfault(), but only in the latter case +it put the pte_free() before calling handle_userfault(). + + BUG: KASAN: use-after-free in do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 + Read of size 8 at addr 00000000962d6988 by task syz-executor.0/9334 + + CPU: 1 PID: 9334 Comm: syz-executor.0 Not tainted 5.10.0-rc1-syzkaller-07083-g4c9720875573 #0 + Hardware name: IBM 3906 M04 701 (KVM/Linux) + Call Trace: + do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 + create_huge_pmd mm/memory.c:4256 [inline] + __handle_mm_fault+0xe6e/0x1068 mm/memory.c:4480 + handle_mm_fault+0x288/0x748 mm/memory.c:4607 + do_exception+0x394/0xae0 arch/s390/mm/fault.c:479 + do_dat_exception+0x34/0x80 arch/s390/mm/fault.c:567 + pgm_check_handler+0x1da/0x22c arch/s390/kernel/entry.S:706 + copy_from_user_mvcos arch/s390/lib/uaccess.c:111 [inline] + raw_copy_from_user+0x3a/0x88 arch/s390/lib/uaccess.c:174 + _copy_from_user+0x48/0xa8 lib/usercopy.c:16 + copy_from_user include/linux/uaccess.h:192 [inline] + __do_sys_sigaltstack kernel/signal.c:4064 [inline] + __s390x_sys_sigaltstack+0xc8/0x240 kernel/signal.c:4060 + system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 + + Allocated by task 9334: + slab_alloc_node mm/slub.c:2891 [inline] + slab_alloc mm/slub.c:2899 [inline] + kmem_cache_alloc+0x118/0x348 mm/slub.c:2904 + vm_area_dup+0x9c/0x2b8 kernel/fork.c:356 + __split_vma+0xba/0x560 mm/mmap.c:2742 + split_vma+0xca/0x108 mm/mmap.c:2800 + mlock_fixup+0x4ae/0x600 mm/mlock.c:550 + apply_vma_lock_flags+0x2c6/0x398 mm/mlock.c:619 + do_mlock+0x1aa/0x718 mm/mlock.c:711 + __do_sys_mlock2 mm/mlock.c:738 [inline] + __s390x_sys_mlock2+0x86/0xa8 mm/mlock.c:728 + system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 + + Freed by task 9333: + slab_free mm/slub.c:3142 [inline] + kmem_cache_free+0x7c/0x4b8 mm/slub.c:3158 + __vma_adjust+0x7b2/0x2508 mm/mmap.c:960 + vma_merge+0x87e/0xce0 mm/mmap.c:1209 + userfaultfd_release+0x412/0x6b8 fs/userfaultfd.c:868 + __fput+0x22c/0x7a8 fs/file_table.c:281 + task_work_run+0x200/0x320 kernel/task_work.c:151 + tracehook_notify_resume include/linux/tracehook.h:188 [inline] + do_notify_resume+0x100/0x148 arch/s390/kernel/signal.c:538 + system_call+0xe6/0x28c arch/s390/kernel/entry.S:416 + + The buggy address belongs to the object at 00000000962d6948 which belongs to the cache vm_area_struct of size 200 + The buggy address is located 64 bytes inside of 200-byte region [00000000962d6948, 00000000962d6a10) + The buggy address belongs to the page: page:00000000313a09fe refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x962d6 flags: 0x3ffff00000000200(slab) + raw: 3ffff00000000200 000040000257e080 0000000c0000000c 000000008020ba00 + raw: 0000000000000000 000f001e00000000 ffffffff00000001 0000000096959501 + page dumped because: kasan: bad access detected + page->mem_cgroup:0000000096959501 + + Memory state around the buggy address: + 00000000962d6880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00000000962d6900: 00 fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb + >00000000962d6980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + 00000000962d6a00: fb fb fc fc fc fc fc fc fc fc 00 00 00 00 00 00 + 00000000962d6a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ================================================================== + +Fixes: 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") +Reported-by: Alexander Egorenkov +Signed-off-by: Gerald Schaefer +Signed-off-by: Andrew Morton +Cc: Andrea Arcangeli +Cc: Heiko Carstens +Cc: [4.3+] +Link: https://lkml.kernel.org/r/20201110190329.11920-1-gerald.schaefer@linux.ibm.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -710,7 +710,6 @@ vm_fault_t do_huge_pmd_anonymous_page(st + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; +- bool set; + vm_fault_t ret; + pgtable = pte_alloc_one(vma->vm_mm); + if (unlikely(!pgtable)) +@@ -723,25 +722,25 @@ vm_fault_t do_huge_pmd_anonymous_page(st + } + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + ret = 0; +- set = false; + if (pmd_none(*vmf->pmd)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) { + spin_unlock(vmf->ptl); ++ pte_free(vma->vm_mm, pgtable); + } else if (userfaultfd_missing(vma)) { + spin_unlock(vmf->ptl); ++ pte_free(vma->vm_mm, pgtable); + ret = handle_userfault(vmf, VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + } else { + set_huge_zero_page(pgtable, vma->vm_mm, vma, + haddr, vmf->pmd, zero_page); + spin_unlock(vmf->ptl); +- set = true; + } +- } else ++ } else { + spin_unlock(vmf->ptl); +- if (!set) + pte_free(vma->vm_mm, pgtable); ++ } + return ret; + } + gfp = alloc_hugepage_direct_gfpmask(vma); diff --git a/queue-5.9/mmc-sdhci-of-arasan-allow-configuring-zero-tap-values.patch b/queue-5.9/mmc-sdhci-of-arasan-allow-configuring-zero-tap-values.patch new file mode 100644 index 00000000000..a22f41a8908 --- /dev/null +++ b/queue-5.9/mmc-sdhci-of-arasan-allow-configuring-zero-tap-values.patch @@ -0,0 +1,96 @@ +From 9e9534329306fcd7ea1b84f14860a3c04ebe7f1a Mon Sep 17 00:00:00 2001 +From: Manish Narani +Date: Mon, 16 Nov 2020 14:02:43 +0530 +Subject: mmc: sdhci-of-arasan: Allow configuring zero tap values + +From: Manish Narani + +commit 9e9534329306fcd7ea1b84f14860a3c04ebe7f1a upstream. + +Allow configuring the Output and Input tap values with zero to avoid +failures in some cases (one of them is SD boot mode) where the output +and input tap values may be already set to non-zero. + +Fixes: a5c8b2ae2e51 ("mmc: sdhci-of-arasan: Add support for ZynqMP Platform Tap Delays Setup") +Signed-off-by: Sai Krishna Potthuri +Signed-off-by: Manish Narani +Acked-by: Michal Simek +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1605515565-117562-2-git-send-email-manish.narani@xilinx.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci-of-arasan.c | 40 +++++++------------------------------ + 1 file changed, 8 insertions(+), 32 deletions(-) + +--- a/drivers/mmc/host/sdhci-of-arasan.c ++++ b/drivers/mmc/host/sdhci-of-arasan.c +@@ -600,14 +600,8 @@ static int sdhci_zynqmp_sdcardclk_set_ph + u8 tap_delay, tap_max = 0; + int ret; + +- /* +- * This is applicable for SDHCI_SPEC_300 and above +- * ZynqMP does not set phase for <=25MHz clock. +- * If degrees is zero, no need to do anything. +- */ +- if (host->version < SDHCI_SPEC_300 || +- host->timing == MMC_TIMING_LEGACY || +- host->timing == MMC_TIMING_UHS_SDR12 || !degrees) ++ /* This is applicable for SDHCI_SPEC_300 and above */ ++ if (host->version < SDHCI_SPEC_300) + return 0; + + switch (host->timing) { +@@ -668,14 +662,8 @@ static int sdhci_zynqmp_sampleclk_set_ph + u8 tap_delay, tap_max = 0; + int ret; + +- /* +- * This is applicable for SDHCI_SPEC_300 and above +- * ZynqMP does not set phase for <=25MHz clock. +- * If degrees is zero, no need to do anything. +- */ +- if (host->version < SDHCI_SPEC_300 || +- host->timing == MMC_TIMING_LEGACY || +- host->timing == MMC_TIMING_UHS_SDR12 || !degrees) ++ /* This is applicable for SDHCI_SPEC_300 and above */ ++ if (host->version < SDHCI_SPEC_300) + return 0; + + switch (host->timing) { +@@ -733,14 +721,8 @@ static int sdhci_versal_sdcardclk_set_ph + struct sdhci_host *host = sdhci_arasan->host; + u8 tap_delay, tap_max = 0; + +- /* +- * This is applicable for SDHCI_SPEC_300 and above +- * Versal does not set phase for <=25MHz clock. +- * If degrees is zero, no need to do anything. +- */ +- if (host->version < SDHCI_SPEC_300 || +- host->timing == MMC_TIMING_LEGACY || +- host->timing == MMC_TIMING_UHS_SDR12 || !degrees) ++ /* This is applicable for SDHCI_SPEC_300 and above */ ++ if (host->version < SDHCI_SPEC_300) + return 0; + + switch (host->timing) { +@@ -804,14 +786,8 @@ static int sdhci_versal_sampleclk_set_ph + struct sdhci_host *host = sdhci_arasan->host; + u8 tap_delay, tap_max = 0; + +- /* +- * This is applicable for SDHCI_SPEC_300 and above +- * Versal does not set phase for <=25MHz clock. +- * If degrees is zero, no need to do anything. +- */ +- if (host->version < SDHCI_SPEC_300 || +- host->timing == MMC_TIMING_LEGACY || +- host->timing == MMC_TIMING_UHS_SDR12 || !degrees) ++ /* This is applicable for SDHCI_SPEC_300 and above */ ++ if (host->version < SDHCI_SPEC_300) + return 0; + + switch (host->timing) { diff --git a/queue-5.9/mmc-sdhci-of-arasan-issue-dll-reset-explicitly.patch b/queue-5.9/mmc-sdhci-of-arasan-issue-dll-reset-explicitly.patch new file mode 100644 index 00000000000..13b4df0dbd2 --- /dev/null +++ b/queue-5.9/mmc-sdhci-of-arasan-issue-dll-reset-explicitly.patch @@ -0,0 +1,52 @@ +From d06d60d52ec0b0eef702dd3e7b4699f0b589ad0f Mon Sep 17 00:00:00 2001 +From: Manish Narani +Date: Mon, 16 Nov 2020 14:02:45 +0530 +Subject: mmc: sdhci-of-arasan: Issue DLL reset explicitly + +From: Manish Narani + +commit d06d60d52ec0b0eef702dd3e7b4699f0b589ad0f upstream. + +In the current implementation DLL reset will be issued for +each ITAP and OTAP setting inside ATF, this is creating issues +in some scenarios and this sequence is not inline with the TRM. +To fix the issue, DLL reset should be removed from the ATF and +host driver will request it explicitly. +This patch update host driver to explicitly request for DLL reset +before ITAP (assert DLL) and after OTAP (release DLL) settings. + +Fixes: a5c8b2ae2e51 ("mmc: sdhci-of-arasan: Add support for ZynqMP Platform Tap Delays Setup") +Signed-off-by: Sai Krishna Potthuri +Signed-off-by: Manish Narani +Acked-by: Michal Simek +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1605515565-117562-4-git-send-email-manish.narani@xilinx.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci-of-arasan.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/mmc/host/sdhci-of-arasan.c ++++ b/drivers/mmc/host/sdhci-of-arasan.c +@@ -635,6 +635,9 @@ static int sdhci_zynqmp_sdcardclk_set_ph + if (ret) + pr_err("Error setting Output Tap Delay\n"); + ++ /* Release DLL Reset */ ++ zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_RELEASE); ++ + return ret; + } + +@@ -669,6 +672,9 @@ static int sdhci_zynqmp_sampleclk_set_ph + if (host->version < SDHCI_SPEC_300) + return 0; + ++ /* Assert DLL Reset */ ++ zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_ASSERT); ++ + switch (host->timing) { + case MMC_TIMING_MMC_HS: + case MMC_TIMING_SD_HS: diff --git a/queue-5.9/mmc-sdhci-of-arasan-use-mask-writes-for-tap-delays.patch b/queue-5.9/mmc-sdhci-of-arasan-use-mask-writes-for-tap-delays.patch new file mode 100644 index 00000000000..16f3fb244ee --- /dev/null +++ b/queue-5.9/mmc-sdhci-of-arasan-use-mask-writes-for-tap-delays.patch @@ -0,0 +1,54 @@ +From d338c6d01dc614cad253d6c042501fa0eb242d5c Mon Sep 17 00:00:00 2001 +From: Manish Narani +Date: Mon, 16 Nov 2020 14:02:44 +0530 +Subject: mmc: sdhci-of-arasan: Use Mask writes for Tap delays + +From: Manish Narani + +commit d338c6d01dc614cad253d6c042501fa0eb242d5c upstream. + +Mask the ITAP and OTAP delay bits before updating with the new +tap value for Versal platform. + +Fixes: 1a470721c8f5 ("sdhci: arasan: Add support for Versal Tap Delays") +Signed-off-by: Sai Krishna Potthuri +Signed-off-by: Manish Narani +Acked-by: Michal Simek +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1605515565-117562-3-git-send-email-manish.narani@xilinx.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci-of-arasan.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/mmc/host/sdhci-of-arasan.c ++++ b/drivers/mmc/host/sdhci-of-arasan.c +@@ -30,7 +30,10 @@ + #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 + + #define SDHCI_ARASAN_ITAPDLY_REGISTER 0xF0F8 ++#define SDHCI_ARASAN_ITAPDLY_SEL_MASK 0xFF ++ + #define SDHCI_ARASAN_OTAPDLY_REGISTER 0xF0FC ++#define SDHCI_ARASAN_OTAPDLY_SEL_MASK 0x3F + + #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 + #define VENDOR_ENHANCED_STROBE BIT(0) +@@ -755,6 +758,7 @@ static int sdhci_versal_sdcardclk_set_ph + regval = sdhci_readl(host, SDHCI_ARASAN_OTAPDLY_REGISTER); + regval |= SDHCI_OTAPDLY_ENABLE; + sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER); ++ regval &= ~SDHCI_ARASAN_OTAPDLY_SEL_MASK; + regval |= tap_delay; + sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER); + } +@@ -822,6 +826,7 @@ static int sdhci_versal_sampleclk_set_ph + sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); + regval |= SDHCI_ITAPDLY_ENABLE; + sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); ++ regval &= ~SDHCI_ARASAN_ITAPDLY_SEL_MASK; + regval |= tap_delay; + sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); + regval &= ~SDHCI_ITAPDLY_CHGWIN; diff --git a/queue-5.9/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch b/queue-5.9/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch new file mode 100644 index 00000000000..13543508c8e --- /dev/null +++ b/queue-5.9/mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch @@ -0,0 +1,63 @@ +From 60d53566100abde4acc5504b524bc97f89015690 Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Thu, 12 Nov 2020 15:36:56 +0200 +Subject: mmc: sdhci-pci: Prefer SDR25 timing for High Speed mode for BYT-based Intel controllers + +From: Adrian Hunter + +commit 60d53566100abde4acc5504b524bc97f89015690 upstream. + +A UHS setting of SDR25 can give better results for High Speed mode. +This is because there is no setting corresponding to high speed. Currently +SDHCI sets no value, which means zero which is also the setting for SDR12. +There was an attempt to change this in sdhci.c but it caused problems for +some drivers, so it was reverted and the change was made to sdhci-brcmstb +in commit 2fefc7c5f7d16e ("mmc: sdhci-brcmstb: Fix incorrect switch to HS +mode"). Several other drivers also do this. + +Signed-off-by: Adrian Hunter +Cc: stable@vger.kernel.org # v5.4+ +Link: https://lore.kernel.org/r/20201112133656.20317-1-adrian.hunter@intel.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci-pci-core.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/host/sdhci-pci-core.c ++++ b/drivers/mmc/host/sdhci-pci-core.c +@@ -665,6 +665,15 @@ static void sdhci_intel_set_power(struct + } + } + ++static void sdhci_intel_set_uhs_signaling(struct sdhci_host *host, ++ unsigned int timing) ++{ ++ /* Set UHS timing to SDR25 for High Speed mode */ ++ if (timing == MMC_TIMING_MMC_HS || timing == MMC_TIMING_SD_HS) ++ timing = MMC_TIMING_UHS_SDR25; ++ sdhci_set_uhs_signaling(host, timing); ++} ++ + #define INTEL_HS400_ES_REG 0x78 + #define INTEL_HS400_ES_BIT BIT(0) + +@@ -721,7 +730,7 @@ static const struct sdhci_ops sdhci_inte + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, +- .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + }; + +@@ -731,7 +740,7 @@ static const struct sdhci_ops sdhci_inte + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_cqhci_reset, +- .set_uhs_signaling = sdhci_set_uhs_signaling, ++ .set_uhs_signaling = sdhci_intel_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + .irq = sdhci_cqhci_irq, + }; diff --git a/queue-5.9/ptrace-set-pf_superpriv-when-checking-capability.patch b/queue-5.9/ptrace-set-pf_superpriv-when-checking-capability.patch new file mode 100644 index 00000000000..ab0efe8b438 --- /dev/null +++ b/queue-5.9/ptrace-set-pf_superpriv-when-checking-capability.patch @@ -0,0 +1,86 @@ +From cf23705244c947151179f929774fabf71e239eee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Fri, 30 Oct 2020 13:38:48 +0100 +Subject: ptrace: Set PF_SUPERPRIV when checking capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit cf23705244c947151179f929774fabf71e239eee upstream. + +Commit 69f594a38967 ("ptrace: do not audit capability check when outputing +/proc/pid/stat") replaced the use of ns_capable() with +has_ns_capability{,_noaudit}() which doesn't set PF_SUPERPRIV. + +Commit 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in +ptrace_has_cap()") replaced has_ns_capability{,_noaudit}() with +security_capable(), which doesn't set PF_SUPERPRIV neither. + +Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a +new ns_capable_noaudit() helper is available. Let's use it! + +As a result, the signature of ptrace_has_cap() is restored to its original one. + +Cc: Christian Brauner +Cc: Eric Paris +Cc: Jann Horn +Cc: Kees Cook +Cc: Oleg Nesterov +Cc: Serge E. Hallyn +Cc: Tyler Hicks +Cc: stable@vger.kernel.org +Fixes: 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") +Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") +Signed-off-by: Mickaël Salaün +Reviewed-by: Jann Horn +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20201030123849.770769-2-mic@digikod.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/ptrace.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct ta + return ret; + } + +-static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, +- unsigned int mode) ++static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) + { +- int ret; +- + if (mode & PTRACE_MODE_NOAUDIT) +- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT); +- else +- ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE); +- +- return ret == 0; ++ return ns_capable_noaudit(ns, CAP_SYS_PTRACE); ++ return ns_capable(ns, CAP_SYS_PTRACE); + } + + /* Returns 0 on success, -errno on denial. */ +@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct ta + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) + goto ok; +- if (ptrace_has_cap(cred, tcred->user_ns, mode)) ++ if (ptrace_has_cap(tcred->user_ns, mode)) + goto ok; + rcu_read_unlock(); + return -EPERM; +@@ -345,7 +339,7 @@ ok: + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && +- !ptrace_has_cap(cred, mm->user_ns, mode))) ++ !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; + + return security_ptrace_access_check(task, mode); diff --git a/queue-5.9/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch b/queue-5.9/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch new file mode 100644 index 00000000000..9352685456b --- /dev/null +++ b/queue-5.9/s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch @@ -0,0 +1,43 @@ +From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001 +From: Thomas Richter +Date: Wed, 11 Nov 2020 16:26:25 +0100 +Subject: s390/cpum_sf.c: fix file permission for cpum_sfb_size + +From: Thomas Richter + +commit 78d732e1f326f74f240d416af9484928303d9951 upstream. + +This file is installed by the s390 CPU Measurement sampling +facility device driver to export supported minimum and +maximum sample buffer sizes. +This file is read by lscpumf tool to display the details +of the device driver capabilities. The lscpumf tool might +be invoked by a non-root user. In this case it does not +print anything because the file contents can not be read. + +Fix this by allowing read access for all users. Reading +the file contents is ok, changing the file contents is +left to the root user only. + +For further reference and details see: + [1] https://github.com/ibm-s390-tools/s390-tools/issues/97 + +Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur") +Cc: # 3.14 +Signed-off-by: Thomas Richter +Acked-by: Sumanth Korikkar +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/perf_cpum_sf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kernel/perf_cpum_sf.c ++++ b/arch/s390/kernel/perf_cpum_sf.c +@@ -2228,4 +2228,4 @@ out: + } + + arch_initcall(init_cpum_sampling_pmu); +-core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); ++core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/queue-5.9/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch b/queue-5.9/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch new file mode 100644 index 00000000000..66c91f50f52 --- /dev/null +++ b/queue-5.9/s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch @@ -0,0 +1,47 @@ +From 6f117cb854a44a79898d844e6ae3fd23bd94e786 Mon Sep 17 00:00:00 2001 +From: Stefan Haberland +Date: Mon, 16 Nov 2020 16:23:47 +0100 +Subject: s390/dasd: fix null pointer dereference for ERP requests + +From: Stefan Haberland + +commit 6f117cb854a44a79898d844e6ae3fd23bd94e786 upstream. + +When requeueing all requests on the device request queue to the blocklayer +we might get to an ERP (error recovery) request that is a copy of an +original CQR. + +Those requests do not have blocklayer request information or a pointer to +the dasd_queue set. When trying to access those data it will lead to a +null pointer dereference in dasd_requeue_all_requests(). + +Fix by checking if the request is an ERP request that can simply be +ignored. The blocklayer request will be requeued by the original CQR that +is on the device queue right behind the ERP request. + +Fixes: 9487cfd3430d ("s390/dasd: fix handling of internal requests") +Cc: #4.16 +Signed-off-by: Stefan Haberland +Reviewed-by: Jan Hoeppner +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/block/dasd.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/s390/block/dasd.c ++++ b/drivers/s390/block/dasd.c +@@ -2980,6 +2980,12 @@ static int _dasd_requeue_request(struct + + if (!block) + return -EINVAL; ++ /* ++ * If the request is an ERP request there is nothing to requeue. ++ * This will be done with the remaining original request. ++ */ ++ if (cqr->refers) ++ return 0; + spin_lock_irq(&cqr->dq->lock); + req = (struct request *) cqr->callback_data; + blk_mq_requeue_request(req, false); diff --git a/queue-5.9/s390-fix-system-call-exit-path.patch b/queue-5.9/s390-fix-system-call-exit-path.patch new file mode 100644 index 00000000000..b66cf3e01f9 --- /dev/null +++ b/queue-5.9/s390-fix-system-call-exit-path.patch @@ -0,0 +1,52 @@ +From ce9dfafe29bed86fe3cda330ac6072ce84e1ff81 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Tue, 3 Nov 2020 16:55:43 +0100 +Subject: s390: fix system call exit path + +From: Heiko Carstens + +commit ce9dfafe29bed86fe3cda330ac6072ce84e1ff81 upstream. + +The system call exit path is running with interrupts enabled while +checking for TIF/PIF/CIF bits which require special handling. If all +bits have been checked interrupts are disabled and the kernel exits to +user space. +The problem is that after checking all bits and before interrupts are +disabled bits can be set already again, due to interrupt handling. + +This means that the kernel can exit to user space with some +TIF/PIF/CIF bits set, which should never happen. E.g. TIF_NEED_RESCHED +might be set, which might lead to additional latencies, since that bit +will only be recognized with next exit to user space. + +Fix this by checking the corresponding bits only when interrupts are +disabled. + +Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") +Cc: # 5.8 +Acked-by: Sven Schnelle +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/entry.S | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -422,6 +422,7 @@ ENTRY(system_call) + #endif + LOCKDEP_SYS_EXIT + .Lsysc_tif: ++ DISABLE_INTS + TSTMSK __PT_FLAGS(%r11),_PIF_WORK + jnz .Lsysc_work + TSTMSK __TI_flags(%r12),_TIF_WORK +@@ -446,6 +447,7 @@ ENTRY(system_call) + # One of the work bits is on. Find out which one. + # + .Lsysc_work: ++ ENABLE_INTS + TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED + jo .Lsysc_reschedule + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART diff --git a/queue-5.9/seccomp-set-pf_superpriv-when-checking-capability.patch b/queue-5.9/seccomp-set-pf_superpriv-when-checking-capability.patch new file mode 100644 index 00000000000..4aa9d137baf --- /dev/null +++ b/queue-5.9/seccomp-set-pf_superpriv-when-checking-capability.patch @@ -0,0 +1,56 @@ +From fb14528e443646dd3fd02df4437fcf5265b66baa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= +Date: Fri, 30 Oct 2020 13:38:49 +0100 +Subject: seccomp: Set PF_SUPERPRIV when checking capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +commit fb14528e443646dd3fd02df4437fcf5265b66baa upstream. + +Replace the use of security_capable(current_cred(), ...) with +ns_capable_noaudit() which set PF_SUPERPRIV. + +Since commit 98f368e9e263 ("kernel: Add noaudit variant of +ns_capable()"), a new ns_capable_noaudit() helper is available. Let's +use it! + +Cc: Jann Horn +Cc: Kees Cook +Cc: Tyler Hicks +Cc: Will Drewry +Cc: stable@vger.kernel.org +Fixes: e2cfabdfd075 ("seccomp: add system call filtering using BPF") +Signed-off-by: Mickaël Salaün +Reviewed-by: Jann Horn +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20201030123849.770769-3-mic@digikod.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/seccomp.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -38,7 +38,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -554,8 +554,7 @@ static struct seccomp_filter *seccomp_pr + * behavior of privileged children. + */ + if (!task_no_new_privs(current) && +- security_capable(current_cred(), current_user_ns(), +- CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) ++ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + /* Allocate a new seccomp_filter */ diff --git a/queue-5.9/series b/queue-5.9/series index fe8338585a9..312da0d9835 100644 --- a/queue-5.9/series +++ b/queue-5.9/series @@ -223,3 +223,30 @@ regulator-pfuze100-limit-pfuze-support-disable-sw-to-pfuze-100-200.patch regulator-fix-memory-leak-with-repeated-set_machine_constraints.patch regulator-avoid-resolve_supply-infinite-recursion.patch regulator-workaround-self-referent-regulators.patch +gfs2-fix-regression-in-freeze_go_sync.patch +xtensa-fix-tlbtemp-area-placement.patch +xtensa-disable-preemption-around-cache-alias-management-calls.patch +mac80211-minstrel-remove-deferred-sampling-code.patch +mac80211-minstrel-fix-tx-status-processing-corner-case.patch +mac80211-free-sta-in-sta_info_insert_finish-on-errors.patch +s390-fix-system-call-exit-path.patch +s390-cpum_sf.c-fix-file-permission-for-cpum_sfb_size.patch +s390-dasd-fix-null-pointer-dereference-for-erp-requests.patch +drivers-hv-vmbus-allow-cleanup-of-vmbus_connect_cpu-if-disconnected.patch +drm-amd-display-add-missing-pflip-irq-for-dcn2.0.patch +drm-i915-handle-max_bpc-16.patch +drm-i915-tgl-fix-media-power-gate-sequence.patch +io_uring-don-t-double-complete-failed-reissue-request.patch +mmc-sdhci-pci-prefer-sdr25-timing-for-high-speed-mode-for-byt-based-intel-controllers.patch +mmc-sdhci-of-arasan-allow-configuring-zero-tap-values.patch +mmc-sdhci-of-arasan-use-mask-writes-for-tap-delays.patch +mmc-sdhci-of-arasan-issue-dll-reset-explicitly.patch +blk-cgroup-fix-a-hd_struct-leak-in-blkcg_fill_root_iostats.patch +ptrace-set-pf_superpriv-when-checking-capability.patch +seccomp-set-pf_superpriv-when-checking-capability.patch +fanotify-fix-logic-of-reporting-name-info-with-watched-parent.patch +x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch +mm-never-attempt-async-page-lock-if-we-ve-transferred-data-already.patch +mm-fix-readahead_page_batch-for-retry-entries.patch +mm-memcg-slab-fix-root-memcg-vmstats.patch +mm-userfaultfd-do-not-access-vma-vm_mm-after-calling-handle_userfault.patch diff --git a/queue-5.9/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch b/queue-5.9/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch new file mode 100644 index 00000000000..c04fccdfacf --- /dev/null +++ b/queue-5.9/x86-microcode-intel-check-patch-signature-before-saving-microcode-for-early-loading.patch @@ -0,0 +1,173 @@ +From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Fri, 13 Nov 2020 09:59:23 +0800 +Subject: x86/microcode/intel: Check patch signature before saving microcode for early loading + +From: Chen Yu + +commit 1a371e67dc77125736cc56d3a0893f06b75855b6 upstream. + +Currently, scan_microcode() leverages microcode_matches() to check +if the microcode matches the CPU by comparing the family and model. +However, the processor stepping and flags of the microcode signature +should also be considered when saving a microcode patch for early +update. + +Use find_matching_signature() in scan_microcode() and get rid of the +now-unused microcode_matches() which is a good cleanup in itself. + +Complete the verification of the patch being saved for early loading in +save_microcode_patch() directly. This needs to be done there too because +save_mc_for_early() will call save_microcode_patch() too. + +The second reason why this needs to be done is because the loader still +tries to support, at least hypothetically, mixed-steppings systems and +thus adds all patches to the cache that belong to the same CPU model +albeit with different steppings. + +For example: + + microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6 + microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23 + microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 + microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23 + microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 + microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23 + +The patch which is being saved for early loading, however, can only be +the one which fits the CPU this runs on so do the signature verification +before saving. + + [ bp: Do signature verification in save_microcode_patch() + and rewrite commit message. ] + +Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") +Signed-off-by: Chen Yu +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535 +Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/intel.c | 63 +++++----------------------------- + 1 file changed, 10 insertions(+), 53 deletions(-) + +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, + return find_matching_signature(mc, csig, cpf); + } + +-/* +- * Given CPU signature and a microcode patch, this function finds if the +- * microcode patch has matching family and model with the CPU. +- * +- * %true - if there's a match +- * %false - otherwise +- */ +-static bool microcode_matches(struct microcode_header_intel *mc_header, +- unsigned long sig) +-{ +- unsigned long total_size = get_totalsize(mc_header); +- unsigned long data_size = get_datasize(mc_header); +- struct extended_sigtable *ext_header; +- unsigned int fam_ucode, model_ucode; +- struct extended_signature *ext_sig; +- unsigned int fam, model; +- int ext_sigcount, i; +- +- fam = x86_family(sig); +- model = x86_model(sig); +- +- fam_ucode = x86_family(mc_header->sig); +- model_ucode = x86_model(mc_header->sig); +- +- if (fam == fam_ucode && model == model_ucode) +- return true; +- +- /* Look for ext. headers: */ +- if (total_size <= data_size + MC_HEADER_SIZE) +- return false; +- +- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; +- ext_sig = (void *)ext_header + EXT_HEADER_SIZE; +- ext_sigcount = ext_header->count; +- +- for (i = 0; i < ext_sigcount; i++) { +- fam_ucode = x86_family(ext_sig->sig); +- model_ucode = x86_model(ext_sig->sig); +- +- if (fam == fam_ucode && model == model_ucode) +- return true; +- +- ext_sig++; +- } +- return false; +-} +- + static struct ucode_patch *memdup_patch(void *data, unsigned int size) + { + struct ucode_patch *p; +@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch( + return p; + } + +-static void save_microcode_patch(void *data, unsigned int size) ++static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) + { + struct microcode_header_intel *mc_hdr, *mc_saved_hdr; + struct ucode_patch *iter, *tmp, *p = NULL; +@@ -210,6 +163,9 @@ static void save_microcode_patch(void *d + if (!p) + return; + ++ if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) ++ return; ++ + /* + * Save for early loading. On 32-bit, that needs to be a physical + * address as the APs are running from physical addresses, before +@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, + + size -= mc_size; + +- if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { ++ if (!find_matching_signature(data, uci->cpu_sig.sig, ++ uci->cpu_sig.pf)) { + data += mc_size; + continue; + } + + if (save) { +- save_microcode_patch(data, mc_size); ++ save_microcode_patch(uci, data, mc_size); + goto next; + } + +@@ -483,14 +440,14 @@ static void show_saved_mc(void) + * Save this microcode patch. It will be loaded early when a CPU is + * hot-added or resumes. + */ +-static void save_mc_for_early(u8 *mc, unsigned int size) ++static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) + { + /* Synchronization during CPU hotplug. */ + static DEFINE_MUTEX(x86_cpu_microcode_mutex); + + mutex_lock(&x86_cpu_microcode_mutex); + +- save_microcode_patch(mc, size); ++ save_microcode_patch(uci, mc, size); + show_saved_mc(); + + mutex_unlock(&x86_cpu_microcode_mutex); +@@ -935,7 +892,7 @@ static enum ucode_state generic_load_mic + * permanent memory. So it will be loaded early when a CPU is hot added + * or resumes. + */ +- save_mc_for_early(new_mc, new_mc_size); ++ save_mc_for_early(uci, new_mc, new_mc_size); + + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); diff --git a/queue-5.9/xtensa-disable-preemption-around-cache-alias-management-calls.patch b/queue-5.9/xtensa-disable-preemption-around-cache-alias-management-calls.patch new file mode 100644 index 00000000000..f38ca0c8eb1 --- /dev/null +++ b/queue-5.9/xtensa-disable-preemption-around-cache-alias-management-calls.patch @@ -0,0 +1,110 @@ +From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Mon, 16 Nov 2020 01:38:59 -0800 +Subject: xtensa: disable preemption around cache alias management calls + +From: Max Filippov + +commit 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 upstream. + +Although cache alias management calls set up and tear down TLB entries +and fast_second_level_miss is able to restore TLB entry should it be +evicted they absolutely cannot preempt each other because they use the +same TLBTEMP area for different purposes. +Disable preemption around all cache alias management calls to enforce +that. + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/xtensa/mm/cache.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/arch/xtensa/mm/cache.c ++++ b/arch/xtensa/mm/cache.c +@@ -70,8 +70,10 @@ static inline void kmap_invalidate_coher + kvaddr = TLBTEMP_BASE_1 + + (page_to_phys(page) & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __invalidate_dcache_page_alias(kvaddr, + page_to_phys(page)); ++ preempt_enable(); + } + } + } +@@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page + if (!alias && !mapping) + return; + ++ preempt_disable(); + virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(virt, phys); + +@@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page + + if (mapping) + __invalidate_icache_page_alias(virt, phys); ++ preempt_enable(); + } + + /* There shouldn't be an entry in the cache for this page anymore. */ +@@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_ar + unsigned long phys = page_to_phys(pfn_to_page(pfn)); + unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(virt, phys); + __invalidate_icache_page_alias(virt, phys); ++ preempt_enable(); + } + EXPORT_SYMBOL(local_flush_cache_page); + +@@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * + unsigned long phys = page_to_phys(page); + unsigned long tmp; + ++ preempt_disable(); + tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(tmp, phys); + tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(tmp, phys); + __invalidate_icache_page_alias(tmp, phys); ++ preempt_enable(); + + clear_bit(PG_arch_1, &page->flags); + } +@@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_st + + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(t, phys); ++ preempt_enable(); + } + + /* Copy data */ +@@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_st + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + ++ preempt_disable(); + __flush_invalidate_dcache_range((unsigned long) dst, len); + if ((vma->vm_flags & VM_EXEC) != 0) + __invalidate_icache_page_alias(t, phys); ++ preempt_enable(); + + } else if ((vma->vm_flags & VM_EXEC) != 0) { + __flush_dcache_range((unsigned long)dst,len); +@@ -304,7 +316,9 @@ extern void copy_from_user_page(struct v + + if (alias) { + unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); ++ preempt_disable(); + __flush_invalidate_dcache_page_alias(t, phys); ++ preempt_enable(); + } + + memcpy(dst, src, len); diff --git a/queue-5.9/xtensa-fix-tlbtemp-area-placement.patch b/queue-5.9/xtensa-fix-tlbtemp-area-placement.patch new file mode 100644 index 00000000000..5c5e7393564 --- /dev/null +++ b/queue-5.9/xtensa-fix-tlbtemp-area-placement.patch @@ -0,0 +1,86 @@ +From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001 +From: Max Filippov +Date: Mon, 16 Nov 2020 01:25:56 -0800 +Subject: xtensa: fix TLBTEMP area placement + +From: Max Filippov + +commit 481535c5b41d191b22775a6873de5ec0e1cdced1 upstream. + +fast_second_level_miss handler for the TLBTEMP area has an assumption +that page table directory entry for the TLBTEMP address range is 0. For +it to be true the TLBTEMP area must be aligned to 4MB boundary and not +share its 4MB region with anything that may use a page table. This is +not true currently: TLBTEMP shares space with vmalloc space which +results in the following kinds of runtime errors when +fast_second_level_miss loads page table directory entry for the vmalloc +space instead of fixing up the TLBTEMP area: + + Unable to handle kernel paging request at virtual address c7ff0e00 + pc = d0009275, ra = 90009478 + Oops: sig: 9 [#1] PREEMPT + CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58 + Workqueue: xprtiod xs_stream_data_receive_workfn + a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000 + a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90 + pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00 + lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010 + Call Trace: + xs_stream_data_receive_workfn+0x43c/0x770 + process_one_work+0x1a1/0x324 + worker_thread+0x1cc/0x3c0 + kthread+0x10d/0x124 + ret_from_kernel_thread+0xc/0x18 + +Cc: stable@vger.kernel.org +Signed-off-by: Max Filippov +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/xtensa/mmu.rst | 9 ++++++--- + arch/xtensa/include/asm/pgtable.h | 2 +- + 2 files changed, 7 insertions(+), 4 deletions(-) + +--- a/Documentation/xtensa/mmu.rst ++++ b/Documentation/xtensa/mmu.rst +@@ -82,7 +82,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +@@ -124,7 +125,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +@@ -167,7 +169,8 @@ Default MMUv2-compatible layout:: + +------------------+ + | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB + +------------------+ VMALLOC_END +- | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE ++ +------------------+ ++ | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE + | remap area 1 | + +------------------+ + | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE +--- a/arch/xtensa/include/asm/pgtable.h ++++ b/arch/xtensa/include/asm/pgtable.h +@@ -69,7 +69,7 @@ + */ + #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000) + #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF) +-#define TLBTEMP_BASE_1 (VMALLOC_END + 1) ++#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000) + #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) + #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE + #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE)