From 4b0ac53a9e07289f9986e27ca9f1d933b5feeae7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 17 Nov 2024 21:37:48 +0100 Subject: [PATCH] 5.15-stable patches added patches: drm-bridge-tc358768-fix-dsi-command-tx.patch kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch mmc-sunxi-mmc-fix-a100-compatible-description.patch nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch ocfs2-uncache-inode-which-has-failed-entering-the-group.patch revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch --- ...m-bridge-tc358768-fix-dsi-command-tx.patch | 72 ++++++++++ ...-l2-is-active-but-with-vpid-disabled.patch | 110 +++++++++++++++ ...guest-host-mode-behind-config_broken.patch | 59 +++++++++ ...reference-in-alloc_pages_bulk_noprof.patch | 59 +++++++++ ...-mmc-fix-a100-compatible-description.patch | 57 ++++++++ ...ref-in-block_dirty_buffer-tracepoint.patch | 100 ++++++++++++++ ...ref-in-block_touch_buffer-tracepoint.patch | 63 +++++++++ ...ubsan-warning-in-ocfs2_verify_volume.patch | 125 ++++++++++++++++++ ...-which-has-failed-entering-the-group.patch | 91 +++++++++++++ ...-operation-with-pages-bigger-than-4k.patch | 47 +++++++ queue-5.15/series | 12 ++ ...et-with-unaligned-starting-iotlb-map.patch | 54 ++++++++ ...n-sme-system-when-config_ima_kexec-y.patch | 117 ++++++++++++++++ 13 files changed, 966 insertions(+) create mode 100644 queue-5.15/drm-bridge-tc358768-fix-dsi-command-tx.patch create mode 100644 queue-5.15/kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch create mode 100644 queue-5.15/kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch create mode 100644 queue-5.15/mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch create mode 100644 queue-5.15/mmc-sunxi-mmc-fix-a100-compatible-description.patch create mode 100644 queue-5.15/nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch create mode 100644 queue-5.15/nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch create mode 100644 queue-5.15/ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch create mode 100644 queue-5.15/ocfs2-uncache-inode-which-has-failed-entering-the-group.patch create mode 100644 queue-5.15/revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch create mode 100644 queue-5.15/vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch create mode 100644 queue-5.15/x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch diff --git a/queue-5.15/drm-bridge-tc358768-fix-dsi-command-tx.patch b/queue-5.15/drm-bridge-tc358768-fix-dsi-command-tx.patch new file mode 100644 index 00000000000..42d85f3a211 --- /dev/null +++ b/queue-5.15/drm-bridge-tc358768-fix-dsi-command-tx.patch @@ -0,0 +1,72 @@ +From 32c4514455b2b8fde506f8c0962f15c7e4c26f1d Mon Sep 17 00:00:00 2001 +From: Francesco Dolcini +Date: Thu, 26 Sep 2024 16:12:46 +0200 +Subject: drm/bridge: tc358768: Fix DSI command tx + +From: Francesco Dolcini + +commit 32c4514455b2b8fde506f8c0962f15c7e4c26f1d upstream. + +Wait for the command transmission to be completed in the DSI transfer +function polling for the dc_start bit to go back to idle state after the +transmission is started. + +This is documented in the datasheet and failures to do so lead to +commands corruption. + +Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") +Cc: stable@vger.kernel.org +Signed-off-by: Francesco Dolcini +Reviewed-by: Neil Armstrong +Link: https://lore.kernel.org/r/20240926141246.48282-1-francesco@dolcini.it +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240926141246.48282-1-francesco@dolcini.it +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/bridge/tc358768.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/bridge/tc358768.c ++++ b/drivers/gpu/drm/bridge/tc358768.c +@@ -124,6 +124,9 @@ + #define TC358768_DSI_CONFW_MODE_CLR (6 << 29) + #define TC358768_DSI_CONFW_ADDR_DSI_CONTROL (0x3 << 24) + ++/* TC358768_DSICMD_TX (0x0600) register */ ++#define TC358768_DSI_CMDTX_DC_START BIT(0) ++ + static const char * const tc358768_supplies[] = { + "vddc", "vddmipi", "vddio" + }; +@@ -227,6 +230,21 @@ static void tc358768_update_bits(struct + tc358768_write(priv, reg, tmp); + } + ++static void tc358768_dsicmd_tx(struct tc358768_priv *priv) ++{ ++ u32 val; ++ ++ /* start transfer */ ++ tc358768_write(priv, TC358768_DSICMD_TX, TC358768_DSI_CMDTX_DC_START); ++ if (priv->error) ++ return; ++ ++ /* wait transfer completion */ ++ priv->error = regmap_read_poll_timeout(priv->regmap, TC358768_DSICMD_TX, val, ++ (val & TC358768_DSI_CMDTX_DC_START) == 0, ++ 100, 100000); ++} ++ + static int tc358768_sw_reset(struct tc358768_priv *priv) + { + /* Assert Reset */ +@@ -507,8 +525,7 @@ static ssize_t tc358768_dsi_host_transfe + } + } + +- /* start transfer */ +- tc358768_write(priv, TC358768_DSICMD_TX, 1); ++ tc358768_dsicmd_tx(priv); + + ret = tc358768_clear_error(priv); + if (ret) diff --git a/queue-5.15/kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch b/queue-5.15/kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch new file mode 100644 index 00000000000..af96c8c7f02 --- /dev/null +++ b/queue-5.15/kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch @@ -0,0 +1,110 @@ +From 2657b82a78f18528bef56dc1b017158490970873 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 31 Oct 2024 13:20:11 -0700 +Subject: KVM: nVMX: Treat vpid01 as current if L2 is active, but with VPID disabled + +From: Sean Christopherson + +commit 2657b82a78f18528bef56dc1b017158490970873 upstream. + +When getting the current VPID, e.g. to emulate a guest TLB flush, return +vpid01 if L2 is running but with VPID disabled, i.e. if VPID is disabled +in vmcs12. Architecturally, if VPID is disabled, then the guest and host +effectively share VPID=0. KVM emulates this behavior by using vpid01 when +running an L2 with VPID disabled (see prepare_vmcs02_early_rare()), and so +KVM must also treat vpid01 as the current VPID while L2 is active. + +Unconditionally treating vpid02 as the current VPID when L2 is active +causes KVM to flush TLB entries for vpid02 instead of vpid01, which +results in TLB entries from L1 being incorrectly preserved across nested +VM-Enter to L2 (L2=>L1 isn't problematic, because the TLB flush after +nested VM-Exit flushes vpid01). + +The bug manifests as failures in the vmx_apicv_test KVM-Unit-Test, as KVM +incorrectly retains TLB entries for the APIC-access page across a nested +VM-Enter. + +Opportunisticaly add comments at various touchpoints to explain the +architectural requirements, and also why KVM uses vpid01 instead of vpid02. + +All credit goes to Chao, who root caused the issue and identified the fix. + +Link: https://lore.kernel.org/all/ZwzczkIlYGX+QXJz@intel.com +Fixes: 2b4a5a5d5688 ("KVM: nVMX: Flush current VPID (L1 vs. L2) for KVM_REQ_TLB_FLUSH_GUEST") +Cc: stable@vger.kernel.org +Cc: Like Xu +Debugged-by: Chao Gao +Reviewed-by: Chao Gao +Tested-by: Chao Gao +Link: https://lore.kernel.org/r/20241031202011.1580522-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 30 +++++++++++++++++++++++++----- + arch/x86/kvm/vmx/vmx.c | 2 +- + 2 files changed, 26 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -1158,11 +1158,14 @@ static void nested_vmx_transition_tlb_fl + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* +- * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings +- * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a +- * full TLB flush from the guest's perspective. This is required even +- * if VPID is disabled in the host as KVM may need to synchronize the +- * MMU in response to the guest TLB flush. ++ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the ++ * same VPID as the host, and so architecturally, linear and combined ++ * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM ++ * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2, ++ * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This ++ * is required if VPID is disabled in KVM, as a TLB flush (there are no ++ * VPIDs) still occurs from L1's perspective, and KVM may need to ++ * synchronize the MMU in response to the guest TLB flush. + * + * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. + * EPT is a special snowflake, as guest-physical mappings aren't +@@ -2189,6 +2192,17 @@ static void prepare_vmcs02_early_rare(st + + vmcs_write64(VMCS_LINK_POINTER, -1ull); + ++ /* ++ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the ++ * same VPID as the host. Emulate this behavior by using vpid01 for L2 ++ * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter ++ * and VM-Exit are architecturally required to flush VPID=0, but *only* ++ * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the ++ * required flushes), but doing so would cause KVM to over-flush. E.g. ++ * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled, ++ * and then runs L2 X again, then KVM can and should retain TLB entries ++ * for VPID12=1. ++ */ + if (enable_vpid) { + if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); +@@ -5591,6 +5605,12 @@ static int handle_invvpid(struct kvm_vcp + return nested_vmx_fail(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + ++ /* ++ * Always flush the effective vpid02, i.e. never flush the current VPID ++ * and never explicitly flush vpid01. INVVPID targets a VPID, not a ++ * VMCS, and so whether or not the current vmcs12 has VPID enabled is ++ * irrelevant (and there may not be a loaded vmcs12). ++ */ + vpid02 = nested_get_vpid02(vcpu); + switch (type) { + case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -3019,7 +3019,7 @@ static void vmx_flush_tlb_all(struct kvm + + static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) + { +- if (is_guest_mode(vcpu)) ++ if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu))) + return nested_get_vpid02(vcpu); + return to_vmx(vcpu)->vpid; + } diff --git a/queue-5.15/kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch b/queue-5.15/kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch new file mode 100644 index 00000000000..5db17fe53db --- /dev/null +++ b/queue-5.15/kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch @@ -0,0 +1,59 @@ +From aa0d42cacf093a6fcca872edc954f6f812926a17 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 1 Nov 2024 11:50:30 -0700 +Subject: KVM: VMX: Bury Intel PT virtualization (guest/host mode) behind CONFIG_BROKEN + +From: Sean Christopherson + +commit aa0d42cacf093a6fcca872edc954f6f812926a17 upstream. + +Hide KVM's pt_mode module param behind CONFIG_BROKEN, i.e. disable support +for virtualizing Intel PT via guest/host mode unless BROKEN=y. There are +myriad bugs in the implementation, some of which are fatal to the guest, +and others which put the stability and health of the host at risk. + +For guest fatalities, the most glaring issue is that KVM fails to ensure +tracing is disabled, and *stays* disabled prior to VM-Enter, which is +necessary as hardware disallows loading (the guest's) RTIT_CTL if tracing +is enabled (enforced via a VMX consistency check). Per the SDM: + + If the logical processor is operating with Intel PT enabled (if + IA32_RTIT_CTL.TraceEn = 1) at the time of VM entry, the "load + IA32_RTIT_CTL" VM-entry control must be 0. + +On the host side, KVM doesn't validate the guest CPUID configuration +provided by userspace, and even worse, uses the guest configuration to +decide what MSRs to save/load at VM-Enter and VM-Exit. E.g. configuring +guest CPUID to enumerate more address ranges than are supported in hardware +will result in KVM trying to passthrough, save, and load non-existent MSRs, +which generates a variety of WARNs, ToPA ERRORs in the host, a potential +deadlock, etc. + +Fixes: f99e3daf94ff ("KVM: x86: Add Intel PT virtualization work mode") +Cc: stable@vger.kernel.org +Cc: Adrian Hunter +Signed-off-by: Sean Christopherson +Reviewed-by: Xiaoyao Li +Tested-by: Adrian Hunter +Message-ID: <20241101185031.1799556-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -200,9 +200,11 @@ module_param(ple_window_shrink, uint, 04 + static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; + module_param(ple_window_max, uint, 0444); + +-/* Default is SYSTEM mode, 1 for host-guest mode */ ++/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */ + int __read_mostly pt_mode = PT_MODE_SYSTEM; ++#ifdef CONFIG_BROKEN + module_param(pt_mode, int, S_IRUGO); ++#endif + + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); diff --git a/queue-5.15/mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch b/queue-5.15/mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch new file mode 100644 index 00000000000..979189461e1 --- /dev/null +++ b/queue-5.15/mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch @@ -0,0 +1,59 @@ +From 8ce41b0f9d77cca074df25afd39b86e2ee3aa68e Mon Sep 17 00:00:00 2001 +From: Jinjiang Tu +Date: Wed, 13 Nov 2024 16:32:35 +0800 +Subject: mm: fix NULL pointer dereference in alloc_pages_bulk_noprof + +From: Jinjiang Tu + +commit 8ce41b0f9d77cca074df25afd39b86e2ee3aa68e upstream. + +We triggered a NULL pointer dereference for ac.preferred_zoneref->zone in +alloc_pages_bulk_noprof() when the task is migrated between cpusets. + +When cpuset is enabled, in prepare_alloc_pages(), ac->nodemask may be +¤t->mems_allowed. when first_zones_zonelist() is called to find +preferred_zoneref, the ac->nodemask may be modified concurrently if the +task is migrated between different cpusets. Assuming we have 2 NUMA Node, +when traversing Node1 in ac->zonelist, the nodemask is 2, and when +traversing Node2 in ac->zonelist, the nodemask is 1. As a result, the +ac->preferred_zoneref points to NULL zone. + +In alloc_pages_bulk_noprof(), for_each_zone_zonelist_nodemask() finds a +allowable zone and calls zonelist_node_idx(ac.preferred_zoneref), leading +to NULL pointer dereference. + +__alloc_pages_noprof() fixes this issue by checking NULL pointer in commit +ea57485af8f4 ("mm, page_alloc: fix check for NULL preferred_zone") and +commit df76cee6bbeb ("mm, page_alloc: remove redundant checks from alloc +fastpath"). + +To fix it, check NULL pointer for preferred_zoneref->zone. + +Link: https://lkml.kernel.org/r/20241113083235.166798-1-tujinjiang@huawei.com +Fixes: 387ba26fb1cb ("mm/page_alloc: add a bulk page allocator") +Signed-off-by: Jinjiang Tu +Reviewed-by: Vlastimil Babka +Cc: Alexander Lobakin +Cc: David Hildenbrand +Cc: Kefeng Wang +Cc: Mel Gorman +Cc: Nanyong Sun +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -5336,7 +5336,8 @@ unsigned long __alloc_pages_bulk(gfp_t g + gfp = alloc_gfp; + + /* Find an allowed local zone that meets the low watermark. */ +- for_each_zone_zonelist_nodemask(zone, z, ac.zonelist, ac.highest_zoneidx, ac.nodemask) { ++ z = ac.preferred_zoneref; ++ for_next_zone_zonelist_nodemask(zone, z, ac.highest_zoneidx, ac.nodemask) { + unsigned long mark; + + if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) && diff --git a/queue-5.15/mmc-sunxi-mmc-fix-a100-compatible-description.patch b/queue-5.15/mmc-sunxi-mmc-fix-a100-compatible-description.patch new file mode 100644 index 00000000000..d867ff89f39 --- /dev/null +++ b/queue-5.15/mmc-sunxi-mmc-fix-a100-compatible-description.patch @@ -0,0 +1,57 @@ +From 85b580afc2c215394e08974bf033de9face94955 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Thu, 7 Nov 2024 01:42:40 +0000 +Subject: mmc: sunxi-mmc: Fix A100 compatible description + +From: Andre Przywara + +commit 85b580afc2c215394e08974bf033de9face94955 upstream. + +It turns out that the Allwinner A100/A133 SoC only supports 8K DMA +blocks (13 bits wide), for both the SD/SDIO and eMMC instances. +And while this alone would make a trivial fix, the H616 falls back to +the A100 compatible string, so we have to now match the H616 compatible +string explicitly against the description advertising 64K DMA blocks. + +As the A100 is now compatible with the D1 description, let the A100 +compatible string point to that block instead, and introduce an explicit +match against the H616 string, pointing to the old description. +Also remove the redundant setting of clk_delays to NULL on the way. + +Fixes: 3536b82e5853 ("mmc: sunxi: add support for A100 mmc controller") +Cc: stable@vger.kernel.org +Signed-off-by: Andre Przywara +Tested-by: Parthiban Nallathambi +Reviewed-by: Chen-Yu Tsai +Message-ID: <20241107014240.24669-1-andre.przywara@arm.com> +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sunxi-mmc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/mmc/host/sunxi-mmc.c ++++ b/drivers/mmc/host/sunxi-mmc.c +@@ -1183,10 +1183,9 @@ static const struct sunxi_mmc_cfg sun50i + .needs_new_timings = true, + }; + +-static const struct sunxi_mmc_cfg sun50i_a100_cfg = { ++static const struct sunxi_mmc_cfg sun50i_h616_cfg = { + .idma_des_size_bits = 16, + .idma_des_shift = 2, +- .clk_delays = NULL, + .can_calibrate = true, + .mask_data0 = true, + .needs_new_timings = true, +@@ -1208,8 +1207,9 @@ static const struct of_device_id sunxi_m + { .compatible = "allwinner,sun9i-a80-mmc", .data = &sun9i_a80_cfg }, + { .compatible = "allwinner,sun50i-a64-mmc", .data = &sun50i_a64_cfg }, + { .compatible = "allwinner,sun50i-a64-emmc", .data = &sun50i_a64_emmc_cfg }, +- { .compatible = "allwinner,sun50i-a100-mmc", .data = &sun50i_a100_cfg }, ++ { .compatible = "allwinner,sun50i-a100-mmc", .data = &sun20i_d1_cfg }, + { .compatible = "allwinner,sun50i-a100-emmc", .data = &sun50i_a100_emmc_cfg }, ++ { .compatible = "allwinner,sun50i-h616-mmc", .data = &sun50i_h616_cfg }, + { /* sentinel */ } + }; + MODULE_DEVICE_TABLE(of, sunxi_mmc_of_match); diff --git a/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch b/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch new file mode 100644 index 00000000000..e517ccdf2c7 --- /dev/null +++ b/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch @@ -0,0 +1,100 @@ +From 2026559a6c4ce34db117d2db8f710fe2a9420d5a Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Thu, 7 Nov 2024 01:07:33 +0900 +Subject: nilfs2: fix null-ptr-deref in block_dirty_buffer tracepoint + +From: Ryusuke Konishi + +commit 2026559a6c4ce34db117d2db8f710fe2a9420d5a upstream. + +When using the "block:block_dirty_buffer" tracepoint, mark_buffer_dirty() +may cause a NULL pointer dereference, or a general protection fault when +KASAN is enabled. + +This happens because, since the tracepoint was added in +mark_buffer_dirty(), it references the dev_t member bh->b_bdev->bd_dev +regardless of whether the buffer head has a pointer to a block_device +structure. + +In the current implementation, nilfs_grab_buffer(), which grabs a buffer +to read (or create) a block of metadata, including b-tree node blocks, +does not set the block device, but instead does so only if the buffer is +not in the "uptodate" state for each of its caller block reading +functions. However, if the uptodate flag is set on a folio/page, and the +buffer heads are detached from it by try_to_free_buffers(), and new buffer +heads are then attached by create_empty_buffers(), the uptodate flag may +be restored to each buffer without the block device being set to +bh->b_bdev, and mark_buffer_dirty() may be called later in that state, +resulting in the bug mentioned above. + +Fix this issue by making nilfs_grab_buffer() always set the block device +of the super block structure to the buffer head, regardless of the state +of the buffer's uptodate flag. + +Link: https://lkml.kernel.org/r/20241106160811.3316-3-konishi.ryusuke@gmail.com +Fixes: 5305cb830834 ("block: add block_{touch|dirty}_buffer tracepoint") +Signed-off-by: Ryusuke Konishi +Cc: Tejun Heo +Cc: Ubisectech Sirius +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/btnode.c | 2 -- + fs/nilfs2/gcinode.c | 4 +--- + fs/nilfs2/mdt.c | 1 - + fs/nilfs2/page.c | 1 + + 4 files changed, 2 insertions(+), 6 deletions(-) + +--- a/fs/nilfs2/btnode.c ++++ b/fs/nilfs2/btnode.c +@@ -68,7 +68,6 @@ nilfs_btnode_create_block(struct address + goto failed; + } + memset(bh->b_data, 0, i_blocksize(inode)); +- bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = blocknr; + set_buffer_mapped(bh); + set_buffer_uptodate(bh); +@@ -133,7 +132,6 @@ int nilfs_btnode_submit_block(struct add + goto found; + } + set_buffer_mapped(bh); +- bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = pblocknr; /* set block address for read */ + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); +--- a/fs/nilfs2/gcinode.c ++++ b/fs/nilfs2/gcinode.c +@@ -83,10 +83,8 @@ int nilfs_gccache_submit_read_data(struc + goto out; + } + +- if (!buffer_mapped(bh)) { +- bh->b_bdev = inode->i_sb->s_bdev; ++ if (!buffer_mapped(bh)) + set_buffer_mapped(bh); +- } + bh->b_blocknr = pbn; + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); +--- a/fs/nilfs2/mdt.c ++++ b/fs/nilfs2/mdt.c +@@ -89,7 +89,6 @@ static int nilfs_mdt_create_block(struct + if (buffer_uptodate(bh)) + goto failed_bh; + +- bh->b_bdev = sb->s_bdev; + err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); + if (likely(!err)) { + get_bh(bh); +--- a/fs/nilfs2/page.c ++++ b/fs/nilfs2/page.c +@@ -63,6 +63,7 @@ struct buffer_head *nilfs_grab_buffer(st + put_page(page); + return NULL; + } ++ bh->b_bdev = inode->i_sb->s_bdev; + return bh; + } + diff --git a/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch b/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch new file mode 100644 index 00000000000..e7341d83ce4 --- /dev/null +++ b/queue-5.15/nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch @@ -0,0 +1,63 @@ +From cd45e963e44b0f10d90b9e6c0e8b4f47f3c92471 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Thu, 7 Nov 2024 01:07:32 +0900 +Subject: nilfs2: fix null-ptr-deref in block_touch_buffer tracepoint + +From: Ryusuke Konishi + +commit cd45e963e44b0f10d90b9e6c0e8b4f47f3c92471 upstream. + +Patch series "nilfs2: fix null-ptr-deref bugs on block tracepoints". + +This series fixes null pointer dereference bugs that occur when using +nilfs2 and two block-related tracepoints. + + +This patch (of 2): + +It has been reported that when using "block:block_touch_buffer" +tracepoint, touch_buffer() called from __nilfs_get_folio_block() causes a +NULL pointer dereference, or a general protection fault when KASAN is +enabled. + +This happens because since the tracepoint was added in touch_buffer(), it +references the dev_t member bh->b_bdev->bd_dev regardless of whether the +buffer head has a pointer to a block_device structure. In the current +implementation, the block_device structure is set after the function +returns to the caller. + +Here, touch_buffer() is used to mark the folio/page that owns the buffer +head as accessed, but the common search helper for folio/page used by the +caller function was optimized to mark the folio/page as accessed when it +was reimplemented a long time ago, eliminating the need to call +touch_buffer() here in the first place. + +So this solves the issue by eliminating the touch_buffer() call itself. + +Link: https://lkml.kernel.org/r/20241106160811.3316-1-konishi.ryusuke@gmail.com +Link: https://lkml.kernel.org/r/20241106160811.3316-2-konishi.ryusuke@gmail.com +Fixes: 5305cb830834 ("block: add block_{touch|dirty}_buffer tracepoint") +Signed-off-by: Ryusuke Konishi +Reported-by: Ubisectech Sirius +Closes: https://lkml.kernel.org/r/86bd3013-887e-4e38-960f-ca45c657f032.bugreport@valiantsec.com +Reported-by: syzbot+9982fb8d18eba905abe2@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=9982fb8d18eba905abe2 +Tested-by: syzbot+9982fb8d18eba905abe2@syzkaller.appspotmail.com +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/page.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/nilfs2/page.c ++++ b/fs/nilfs2/page.c +@@ -39,7 +39,6 @@ __nilfs_get_page_block(struct page *page + first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); + bh = nilfs_page_get_nth_block(page, block - first_block); + +- touch_buffer(bh); + wait_on_buffer(bh); + return bh; + } diff --git a/queue-5.15/ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch b/queue-5.15/ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch new file mode 100644 index 00000000000..3e6edf3103c --- /dev/null +++ b/queue-5.15/ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch @@ -0,0 +1,125 @@ +From 23aab037106d46e6168ce1214a958ce9bf317f2e Mon Sep 17 00:00:00 2001 +From: Dmitry Antipov +Date: Wed, 6 Nov 2024 12:21:00 +0300 +Subject: ocfs2: fix UBSAN warning in ocfs2_verify_volume() + +From: Dmitry Antipov + +commit 23aab037106d46e6168ce1214a958ce9bf317f2e upstream. + +Syzbot has reported the following splat triggered by UBSAN: + +UBSAN: shift-out-of-bounds in fs/ocfs2/super.c:2336:10 +shift exponent 32768 is too large for 32-bit type 'int' +CPU: 2 UID: 0 PID: 5255 Comm: repro Not tainted 6.12.0-rc4-syzkaller-00047-gc2ee9f594da8 #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 +Call Trace: + + dump_stack_lvl+0x241/0x360 + ? __pfx_dump_stack_lvl+0x10/0x10 + ? __pfx__printk+0x10/0x10 + ? __asan_memset+0x23/0x50 + ? lockdep_init_map_type+0xa1/0x910 + __ubsan_handle_shift_out_of_bounds+0x3c8/0x420 + ocfs2_fill_super+0xf9c/0x5750 + ? __pfx_ocfs2_fill_super+0x10/0x10 + ? __pfx_validate_chain+0x10/0x10 + ? __pfx_validate_chain+0x10/0x10 + ? validate_chain+0x11e/0x5920 + ? __lock_acquire+0x1384/0x2050 + ? __pfx_validate_chain+0x10/0x10 + ? string+0x26a/0x2b0 + ? widen_string+0x3a/0x310 + ? string+0x26a/0x2b0 + ? bdev_name+0x2b1/0x3c0 + ? pointer+0x703/0x1210 + ? __pfx_pointer+0x10/0x10 + ? __pfx_format_decode+0x10/0x10 + ? __lock_acquire+0x1384/0x2050 + ? vsnprintf+0x1ccd/0x1da0 + ? snprintf+0xda/0x120 + ? __pfx_lock_release+0x10/0x10 + ? do_raw_spin_lock+0x14f/0x370 + ? __pfx_snprintf+0x10/0x10 + ? set_blocksize+0x1f9/0x360 + ? sb_set_blocksize+0x98/0xf0 + ? setup_bdev_super+0x4e6/0x5d0 + mount_bdev+0x20c/0x2d0 + ? __pfx_ocfs2_fill_super+0x10/0x10 + ? __pfx_mount_bdev+0x10/0x10 + ? vfs_parse_fs_string+0x190/0x230 + ? __pfx_vfs_parse_fs_string+0x10/0x10 + legacy_get_tree+0xf0/0x190 + ? __pfx_ocfs2_mount+0x10/0x10 + vfs_get_tree+0x92/0x2b0 + do_new_mount+0x2be/0xb40 + ? __pfx_do_new_mount+0x10/0x10 + __se_sys_mount+0x2d6/0x3c0 + ? __pfx___se_sys_mount+0x10/0x10 + ? do_syscall_64+0x100/0x230 + ? __x64_sys_mount+0x20/0xc0 + do_syscall_64+0xf3/0x230 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f37cae96fda +Code: 48 8b 0d 51 ce 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1e ce 0c 00 f7 d8 64 89 01 48 +RSP: 002b:00007fff6c1aa228 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 +RAX: ffffffffffffffda RBX: 00007fff6c1aa240 RCX: 00007f37cae96fda +RDX: 00000000200002c0 RSI: 0000000020000040 RDI: 00007fff6c1aa240 +RBP: 0000000000000004 R08: 00007fff6c1aa280 R09: 0000000000000000 +R10: 00000000000008c0 R11: 0000000000000206 R12: 00000000000008c0 +R13: 00007fff6c1aa280 R14: 0000000000000003 R15: 0000000001000000 + + +For a really damaged superblock, the value of 'i_super.s_blocksize_bits' +may exceed the maximum possible shift for an underlying 'int'. So add an +extra check whether the aforementioned field represents the valid block +size, which is 512 bytes, 1K, 2K, or 4K. + +Link: https://lkml.kernel.org/r/20241106092100.2661330-1-dmantipov@yandex.ru +Fixes: ccd979bdbce9 ("[PATCH] OCFS2: The Second Oracle Cluster Filesystem") +Signed-off-by: Dmitry Antipov +Reported-by: syzbot+56f7cd1abe4b8e475180@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=56f7cd1abe4b8e475180 +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/super.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -2325,6 +2325,7 @@ static int ocfs2_verify_volume(struct oc + struct ocfs2_blockcheck_stats *stats) + { + int status = -EAGAIN; ++ u32 blksz_bits; + + if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, + strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { +@@ -2339,11 +2340,15 @@ static int ocfs2_verify_volume(struct oc + goto out; + } + status = -EINVAL; +- if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { ++ /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */ ++ blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); ++ if (blksz_bits < 9 || blksz_bits > 12) { + mlog(ML_ERROR, "found superblock with incorrect block " +- "size: found %u, should be %u\n", +- 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits), +- blksz); ++ "size bits: found %u, should be 9, 10, 11, or 12\n", ++ blksz_bits); ++ } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) { ++ mlog(ML_ERROR, "found superblock with incorrect block " ++ "size: found %u, should be %u\n", 1 << blksz_bits, blksz); + } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != + OCFS2_MAJOR_REV_LEVEL || + le16_to_cpu(di->id2.i_super.s_minor_rev_level) != diff --git a/queue-5.15/ocfs2-uncache-inode-which-has-failed-entering-the-group.patch b/queue-5.15/ocfs2-uncache-inode-which-has-failed-entering-the-group.patch new file mode 100644 index 00000000000..83d2444d74f --- /dev/null +++ b/queue-5.15/ocfs2-uncache-inode-which-has-failed-entering-the-group.patch @@ -0,0 +1,91 @@ +From 737f34137844d6572ab7d473c998c7f977ff30eb Mon Sep 17 00:00:00 2001 +From: Dmitry Antipov +Date: Thu, 14 Nov 2024 07:38:44 +0300 +Subject: ocfs2: uncache inode which has failed entering the group + +From: Dmitry Antipov + +commit 737f34137844d6572ab7d473c998c7f977ff30eb upstream. + +Syzbot has reported the following BUG: + +kernel BUG at fs/ocfs2/uptodate.c:509! +... +Call Trace: + + ? __die_body+0x5f/0xb0 + ? die+0x9e/0xc0 + ? do_trap+0x15a/0x3a0 + ? ocfs2_set_new_buffer_uptodate+0x145/0x160 + ? do_error_trap+0x1dc/0x2c0 + ? ocfs2_set_new_buffer_uptodate+0x145/0x160 + ? __pfx_do_error_trap+0x10/0x10 + ? handle_invalid_op+0x34/0x40 + ? ocfs2_set_new_buffer_uptodate+0x145/0x160 + ? exc_invalid_op+0x38/0x50 + ? asm_exc_invalid_op+0x1a/0x20 + ? ocfs2_set_new_buffer_uptodate+0x2e/0x160 + ? ocfs2_set_new_buffer_uptodate+0x144/0x160 + ? ocfs2_set_new_buffer_uptodate+0x145/0x160 + ocfs2_group_add+0x39f/0x15a0 + ? __pfx_ocfs2_group_add+0x10/0x10 + ? __pfx_lock_acquire+0x10/0x10 + ? mnt_get_write_access+0x68/0x2b0 + ? __pfx_lock_release+0x10/0x10 + ? rcu_read_lock_any_held+0xb7/0x160 + ? __pfx_rcu_read_lock_any_held+0x10/0x10 + ? smack_log+0x123/0x540 + ? mnt_get_write_access+0x68/0x2b0 + ? mnt_get_write_access+0x68/0x2b0 + ? mnt_get_write_access+0x226/0x2b0 + ocfs2_ioctl+0x65e/0x7d0 + ? __pfx_ocfs2_ioctl+0x10/0x10 + ? smack_file_ioctl+0x29e/0x3a0 + ? __pfx_smack_file_ioctl+0x10/0x10 + ? lockdep_hardirqs_on_prepare+0x43d/0x780 + ? __pfx_lockdep_hardirqs_on_prepare+0x10/0x10 + ? __pfx_ocfs2_ioctl+0x10/0x10 + __se_sys_ioctl+0xfb/0x170 + do_syscall_64+0xf3/0x230 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +... + + +When 'ioctl(OCFS2_IOC_GROUP_ADD, ...)' has failed for the particular +inode in 'ocfs2_verify_group_and_input()', corresponding buffer head +remains cached and subsequent call to the same 'ioctl()' for the same +inode issues the BUG() in 'ocfs2_set_new_buffer_uptodate()' (trying +to cache the same buffer head of that inode). Fix this by uncaching +the buffer head with 'ocfs2_remove_from_cache()' on error path in +'ocfs2_group_add()'. + +Link: https://lkml.kernel.org/r/20241114043844.111847-1-dmantipov@yandex.ru +Fixes: 7909f2bf8353 ("[PATCH 2/2] ocfs2: Implement group add for online resize") +Signed-off-by: Dmitry Antipov +Reported-by: syzbot+453873f1588c2d75b447@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=453873f1588c2d75b447 +Reviewed-by: Joseph Qi +Cc: Dmitry Antipov +Cc: Joel Becker +Cc: Mark Fasheh +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/resize.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ocfs2/resize.c ++++ b/fs/ocfs2/resize.c +@@ -566,6 +566,8 @@ out_commit: + ocfs2_commit_trans(osb, handle); + + out_free_group_bh: ++ if (ret < 0) ++ ocfs2_remove_from_cache(INODE_CACHE(inode), group_bh); + brelse(group_bh); + + out_unlock: diff --git a/queue-5.15/revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch b/queue-5.15/revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch new file mode 100644 index 00000000000..0b2b278fd9d --- /dev/null +++ b/queue-5.15/revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch @@ -0,0 +1,47 @@ +From 1635e407a4a64d08a8517ac59ca14ad4fc785e75 Mon Sep 17 00:00:00 2001 +From: Aurelien Jarno +Date: Sun, 10 Nov 2024 12:46:36 +0100 +Subject: Revert "mmc: dw_mmc: Fix IDMAC operation with pages bigger than 4K" + +From: Aurelien Jarno + +commit 1635e407a4a64d08a8517ac59ca14ad4fc785e75 upstream. + +The commit 8396c793ffdf ("mmc: dw_mmc: Fix IDMAC operation with pages +bigger than 4K") increased the max_req_size, even for 4K pages, causing +various issues: +- Panic booting the kernel/rootfs from an SD card on Rockchip RK3566 +- Panic booting the kernel/rootfs from an SD card on StarFive JH7100 +- "swiotlb buffer is full" and data corruption on StarFive JH7110 + +At this stage no fix have been found, so it's probably better to just +revert the change. + +This reverts commit 8396c793ffdf28bb8aee7cfe0891080f8cab7890. + +Cc: stable@vger.kernel.org +Cc: Sam Protsenko +Fixes: 8396c793ffdf ("mmc: dw_mmc: Fix IDMAC operation with pages bigger than 4K") +Closes: https://lore.kernel.org/linux-mmc/614692b4-1dbe-31b8-a34d-cb6db1909bb7@w6rz.net/ +Closes: https://lore.kernel.org/linux-mmc/CAC8uq=Ppnmv98mpa1CrWLawWoPnu5abtU69v-=G-P7ysATQ2Pw@mail.gmail.com/ +Signed-off-by: Aurelien Jarno +Message-ID: <20241110114700.622372-1-aurelien@aurel32.net> +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/dw_mmc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/host/dw_mmc.c ++++ b/drivers/mmc/host/dw_mmc.c +@@ -2903,8 +2903,8 @@ static int dw_mci_init_slot(struct dw_mc + if (host->use_dma == TRANS_MODE_IDMAC) { + mmc->max_segs = host->ring_size; + mmc->max_blk_size = 65535; +- mmc->max_req_size = DW_MCI_DESC_DATA_LENGTH * host->ring_size; +- mmc->max_seg_size = mmc->max_req_size; ++ mmc->max_seg_size = 0x1000; ++ mmc->max_req_size = mmc->max_seg_size * host->ring_size; + mmc->max_blk_count = mmc->max_req_size / 512; + } else if (host->use_dma == TRANS_MODE_EDMAC) { + mmc->max_segs = 64; diff --git a/queue-5.15/series b/queue-5.15/series index 9f87bcd6bfb..fd2cdf256c5 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -5,3 +5,15 @@ net-mlx5e-ktls-fix-incorrect-page-refcounting.patch net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch samples-pktgen-correct-dev-to-dev.patch arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch +x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch +mm-fix-null-pointer-dereference-in-alloc_pages_bulk_noprof.patch +ocfs2-uncache-inode-which-has-failed-entering-the-group.patch +vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch +kvm-nvmx-treat-vpid01-as-current-if-l2-is-active-but-with-vpid-disabled.patch +kvm-vmx-bury-intel-pt-virtualization-guest-host-mode-behind-config_broken.patch +nilfs2-fix-null-ptr-deref-in-block_touch_buffer-tracepoint.patch +ocfs2-fix-ubsan-warning-in-ocfs2_verify_volume.patch +nilfs2-fix-null-ptr-deref-in-block_dirty_buffer-tracepoint.patch +revert-mmc-dw_mmc-fix-idmac-operation-with-pages-bigger-than-4k.patch +mmc-sunxi-mmc-fix-a100-compatible-description.patch +drm-bridge-tc358768-fix-dsi-command-tx.patch diff --git a/queue-5.15/vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch b/queue-5.15/vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch new file mode 100644 index 00000000000..85491c7ffb7 --- /dev/null +++ b/queue-5.15/vdpa-mlx5-fix-pa-offset-with-unaligned-starting-iotlb-map.patch @@ -0,0 +1,54 @@ +From 29ce8b8a4fa74e841342c8b8f8941848a3c6f29f Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Mon, 21 Oct 2024 16:40:39 +0300 +Subject: vdpa/mlx5: Fix PA offset with unaligned starting iotlb map + +From: Si-Wei Liu + +commit 29ce8b8a4fa74e841342c8b8f8941848a3c6f29f upstream. + +When calculating the physical address range based on the iotlb and mr +[start,end) ranges, the offset of mr->start relative to map->start +is not taken into account. This leads to some incorrect and duplicate +mappings. + +For the case when mr->start < map->start the code is already correct: +the range in [mr->start, map->start) was handled by a different +iteration. + +Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") +Cc: stable@vger.kernel.org +Signed-off-by: Si-Wei Liu +Signed-off-by: Dragos Tatulea +Message-Id: <20241021134040.975221-2-dtatulea@nvidia.com> +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vdpa/mlx5/core/mr.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/vdpa/mlx5/core/mr.c ++++ b/drivers/vdpa/mlx5/core/mr.c +@@ -232,7 +232,7 @@ static int map_direct_mr(struct mlx5_vdp + struct page *pg; + unsigned int nsg; + int sglen; +- u64 pa; ++ u64 pa, offset; + u64 paend; + struct scatterlist *sg; + struct device *dma = mvdev->vdev.dma_dev; +@@ -255,8 +255,10 @@ static int map_direct_mr(struct mlx5_vdp + sg = mr->sg_head.sgl; + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { +- paend = map->addr + maplen(map, mr); +- for (pa = map->addr; pa < paend; pa += sglen) { ++ offset = mr->start > map->start ? mr->start - map->start : 0; ++ pa = map->addr + offset; ++ paend = map->addr + offset + maplen(map, mr); ++ for (; pa < paend; pa += sglen) { + pg = pfn_to_page(__phys_to_pfn(pa)); + if (!sg) { + mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", diff --git a/queue-5.15/x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch b/queue-5.15/x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch new file mode 100644 index 00000000000..3a22abe9fad --- /dev/null +++ b/queue-5.15/x86-mm-fix-a-kdump-kernel-failure-on-sme-system-when-config_ima_kexec-y.patch @@ -0,0 +1,117 @@ +From 8d9ffb2fe65a6c4ef114e8d4f947958a12751bbe Mon Sep 17 00:00:00 2001 +From: Baoquan He +Date: Wed, 11 Sep 2024 16:16:15 +0800 +Subject: x86/mm: Fix a kdump kernel failure on SME system when CONFIG_IMA_KEXEC=y +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Baoquan He + +commit 8d9ffb2fe65a6c4ef114e8d4f947958a12751bbe upstream. + +The kdump kernel is broken on SME systems with CONFIG_IMA_KEXEC=y enabled. +Debugging traced the issue back to + + b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec"). + +Testing was previously not conducted on SME systems with CONFIG_IMA_KEXEC +enabled, which led to the oversight, with the following incarnation: + +... + ima: No TPM chip found, activating TPM-bypass! + Loading compiled-in module X.509 certificates + Loaded X.509 cert 'Build time autogenerated kernel key: 18ae0bc7e79b64700122bb1d6a904b070fef2656' + ima: Allocated hash algorithm: sha256 + Oops: general protection fault, probably for non-canonical address 0xcfacfdfe6660003e: 0000 [#1] PREEMPT SMP NOPTI + CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc2+ #14 + Hardware name: Dell Inc. PowerEdge R7425/02MJ3T, BIOS 1.20.0 05/03/2023 + RIP: 0010:ima_restore_measurement_list + Call Trace: + + ? show_trace_log_lvl + ? show_trace_log_lvl + ? ima_load_kexec_buffer + ? __die_body.cold + ? die_addr + ? exc_general_protection + ? asm_exc_general_protection + ? ima_restore_measurement_list + ? vprintk_emit + ? ima_load_kexec_buffer + ima_load_kexec_buffer + ima_init + ? __pfx_init_ima + init_ima + ? __pfx_init_ima + do_one_initcall + do_initcalls + ? __pfx_kernel_init + kernel_init_freeable + kernel_init + ret_from_fork + ? __pfx_kernel_init + ret_from_fork_asm + + Modules linked in: + ---[ end trace 0000000000000000 ]--- + ... + Kernel panic - not syncing: Fatal exception + Kernel Offset: disabled + Rebooting in 10 seconds.. + +Adding debug printks showed that the stored addr and size of ima_kexec buffer +are not decrypted correctly like: + + ima: ima_load_kexec_buffer, buffer:0xcfacfdfe6660003e, size:0xe48066052d5df359 + +Three types of setup_data info + + — SETUP_EFI, + - SETUP_IMA, and + - SETUP_RNG_SEED + +are passed to the kexec/kdump kernel. Only the ima_kexec buffer +experienced incorrect decryption. Debugging identified a bug in +early_memremap_is_setup_data(), where an incorrect range calculation +occurred due to the len variable in struct setup_data ended up only +representing the length of the data field, excluding the struct's size, +and thus leading to miscalculation. + +Address a similar issue in memremap_is_setup_data() while at it. + + [ bp: Heavily massage. ] + +Fixes: b3c72fc9a78e ("x86/boot: Introduce setup_indirect") +Signed-off-by: Baoquan He +Signed-off-by: Borislav Petkov (AMD) +Acked-by: Tom Lendacky +Cc: +Link: https://lore.kernel.org/r/20240911081615.262202-3-bhe@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/ioremap.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/mm/ioremap.c ++++ b/arch/x86/mm/ioremap.c +@@ -641,7 +641,8 @@ static bool memremap_is_setup_data(resou + paddr_next = data->next; + len = data->len; + +- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { ++ if ((phys_addr > paddr) && ++ (phys_addr < (paddr + sizeof(struct setup_data) + len))) { + memunmap(data); + return true; + } +@@ -703,7 +704,8 @@ static bool __init early_memremap_is_set + paddr_next = data->next; + len = data->len; + +- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { ++ if ((phys_addr > paddr) && ++ (phys_addr < (paddr + sizeof(struct setup_data) + len))) { + early_memunmap(data, sizeof(*data)); + return true; + } -- 2.47.2