6.17-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)
diff --git a/queue-6.17/asoc-da7213-convert-to-define_runtime_dev_pm_ops.patch b/queue-6.17/asoc-da7213-convert-to-define_runtime_dev_pm_ops.patch

new file mode 100644 (file)

index 0000000..4f23e24
--- /dev/null
+++ b/queue-6.17/asoc-da7213-convert-to-define_runtime_dev_pm_ops.patch
@@ -0,0 +1,40 @@
+From stable+bounces-195444-greg=kroah.com@vger.kernel.org Fri Nov 21 03:09:36 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Nov 2025 21:04:05 -0500
+Subject: ASoC: da7213: Convert to DEFINE_RUNTIME_DEV_PM_OPS()
+To: stable@vger.kernel.org
+Cc: Geert Uytterhoeven <geert+renesas@glider.be>, Mark Brown <broonie@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251121020406.2340125-1-sashal@kernel.org>
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+[ Upstream commit 2aa28b748fc967a2f2566c06bdad155fba8af7d8 ]
+
+Convert the Dialog DA7213 CODEC driver from an open-coded dev_pm_ops
+structure to DEFINE_RUNTIME_DEV_PM_OPS(), to simplify the code.
+
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Link: https://patch.msgid.link/0c001e0f7658c2d5f33faea963d6ca64f60ccea8.1756999876.git.geert+renesas@glider.be
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 249d96b492ef ("ASoC: da7213: Use component driver suspend/resume")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/codecs/da7213.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/sound/soc/codecs/da7213.c
++++ b/sound/soc/codecs/da7213.c
+@@ -2247,10 +2247,8 @@ static int da7213_runtime_resume(struct
+       return regcache_sync(da7213->regmap);
+ }
+ 
+-static const struct dev_pm_ops da7213_pm = {
+-      RUNTIME_PM_OPS(da7213_runtime_suspend, da7213_runtime_resume, NULL)
+-      SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+-};
++static DEFINE_RUNTIME_DEV_PM_OPS(da7213_pm, da7213_runtime_suspend,
++                               da7213_runtime_resume, NULL);
+ 
+ static const struct i2c_device_id da7213_i2c_id[] = {
+       { "da7213" },
diff --git a/queue-6.17/asoc-da7213-use-component-driver-suspend-resume.patch b/queue-6.17/asoc-da7213-use-component-driver-suspend-resume.patch

new file mode 100644 (file)

index 0000000..f2f7fc5
--- /dev/null
+++ b/queue-6.17/asoc-da7213-use-component-driver-suspend-resume.patch
@@ -0,0 +1,147 @@
+From stable+bounces-195445-greg=kroah.com@vger.kernel.org Fri Nov 21 03:09:42 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Nov 2025 21:04:06 -0500
+Subject: ASoC: da7213: Use component driver suspend/resume
+To: stable@vger.kernel.org
+Cc: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>, Mark Brown <broonie@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251121020406.2340125-2-sashal@kernel.org>
+
+From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+[ Upstream commit 249d96b492efb7a773296ab2c62179918301c146 ]
+
+Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(),
+and snd_soc_pm_ops is associated with the soc_driver (defined in
+sound/soc/soc-core.c), and there is no parent-child relationship between
+the soc_driver and the DA7213 codec driver, the power management subsystem
+does not enforce a specific suspend/resume order between the DA7213 driver
+and the soc_driver.
+
+Because of this, the different codec component functionalities, called from
+snd_soc_resume() to reconfigure various functions, can race with the
+DA7213 struct dev_pm_ops::resume function, leading to misapplied
+configuration. This occasionally results in clipped sound.
+
+Fix this by dropping the struct dev_pm_ops::{suspend, resume} and use
+instead struct snd_soc_component_driver::{suspend, resume}. This ensures
+the proper configuration sequence is handled by the ASoC subsystem.
+
+Cc: stable@vger.kernel.org
+Fixes: 431e040065c8 ("ASoC: da7213: Add suspend to RAM support")
+Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+Link: https://patch.msgid.link/20251104114914.2060603-1-claudiu.beznea.uj@bp.renesas.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/codecs/da7213.c |   69 +++++++++++++++++++++++++++++-----------------
+ sound/soc/codecs/da7213.h |    1 
+ 2 files changed, 45 insertions(+), 25 deletions(-)
+
+--- a/sound/soc/codecs/da7213.c
++++ b/sound/soc/codecs/da7213.c
+@@ -2124,11 +2124,50 @@ static int da7213_probe(struct snd_soc_c
+       return 0;
+ }
+ 
++static int da7213_runtime_suspend(struct device *dev)
++{
++      struct da7213_priv *da7213 = dev_get_drvdata(dev);
++
++      regcache_cache_only(da7213->regmap, true);
++      regcache_mark_dirty(da7213->regmap);
++      regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies);
++
++      return 0;
++}
++
++static int da7213_runtime_resume(struct device *dev)
++{
++      struct da7213_priv *da7213 = dev_get_drvdata(dev);
++      int ret;
++
++      ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies);
++      if (ret < 0)
++              return ret;
++      regcache_cache_only(da7213->regmap, false);
++      return regcache_sync(da7213->regmap);
++}
++
++static int da7213_suspend(struct snd_soc_component *component)
++{
++      struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component);
++
++      return da7213_runtime_suspend(da7213->dev);
++}
++
++static int da7213_resume(struct snd_soc_component *component)
++{
++      struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component);
++
++      return da7213_runtime_resume(da7213->dev);
++}
++
+ static const struct snd_soc_component_driver soc_component_dev_da7213 = {
+       .probe                  = da7213_probe,
+       .set_bias_level         = da7213_set_bias_level,
+       .controls               = da7213_snd_controls,
+       .num_controls           = ARRAY_SIZE(da7213_snd_controls),
++      .suspend                = da7213_suspend,
++      .resume                 = da7213_resume,
+       .dapm_widgets           = da7213_dapm_widgets,
+       .num_dapm_widgets       = ARRAY_SIZE(da7213_dapm_widgets),
+       .dapm_routes            = da7213_audio_map,
+@@ -2175,6 +2214,8 @@ static int da7213_i2c_probe(struct i2c_c
+       if (!da7213->fin_min_rate)
+               return -EINVAL;
+ 
++      da7213->dev = &i2c->dev;
++
+       i2c_set_clientdata(i2c, da7213);
+ 
+       /* Get required supplies */
+@@ -2224,31 +2265,9 @@ static void da7213_i2c_remove(struct i2c
+       pm_runtime_disable(&i2c->dev);
+ }
+ 
+-static int da7213_runtime_suspend(struct device *dev)
+-{
+-      struct da7213_priv *da7213 = dev_get_drvdata(dev);
+-
+-      regcache_cache_only(da7213->regmap, true);
+-      regcache_mark_dirty(da7213->regmap);
+-      regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies);
+-
+-      return 0;
+-}
+-
+-static int da7213_runtime_resume(struct device *dev)
+-{
+-      struct da7213_priv *da7213 = dev_get_drvdata(dev);
+-      int ret;
+-
+-      ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies);
+-      if (ret < 0)
+-              return ret;
+-      regcache_cache_only(da7213->regmap, false);
+-      return regcache_sync(da7213->regmap);
+-}
+-
+-static DEFINE_RUNTIME_DEV_PM_OPS(da7213_pm, da7213_runtime_suspend,
+-                               da7213_runtime_resume, NULL);
++static const struct dev_pm_ops da7213_pm = {
++      RUNTIME_PM_OPS(da7213_runtime_suspend, da7213_runtime_resume, NULL)
++};
+ 
+ static const struct i2c_device_id da7213_i2c_id[] = {
+       { "da7213" },
+--- a/sound/soc/codecs/da7213.h
++++ b/sound/soc/codecs/da7213.h
+@@ -595,6 +595,7 @@ enum da7213_supplies {
+ /* Codec private data */
+ struct da7213_priv {
+       struct regmap *regmap;
++      struct device *dev;
+       struct mutex ctrl_lock;
+       struct regulator_bulk_data supplies[DA7213_NUM_SUPPLIES];
+       struct clk *mclk;
diff --git a/queue-6.17/kvm-vmx-inject-ud-if-guest-tries-to-execute-seamcall-or-tdcall.patch b/queue-6.17/kvm-vmx-inject-ud-if-guest-tries-to-execute-seamcall-or-tdcall.patch

new file mode 100644 (file)

index 0000000..74fbe68
--- /dev/null
+++ b/queue-6.17/kvm-vmx-inject-ud-if-guest-tries-to-execute-seamcall-or-tdcall.patch
@@ -0,0 +1,109 @@
+From stable+bounces-195398-greg=kroah.com@vger.kernel.org Thu Nov 20 20:07:31 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Nov 2025 14:07:07 -0500
+Subject: KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL
+To: stable@vger.kernel.org
+Cc: Sean Christopherson <seanjc@google.com>, Kai Huang <kai.huang@intel.com>, Xiaoyao Li <xiaoyao.li@intel.com>, Rick Edgecombe <rick.p.edgecombe@intel.com>, Dan Williams <dan.j.williams@intel.com>, Binbin Wu <binbin.wu@linux.intel.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251120190708.2275081-3-sashal@kernel.org>
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 9d7dfb95da2cb5c1287df2f3468bcb70d8b31087 ]
+
+Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD
+guest attempts to execute SEAMCALL or TDCALL.  Neither SEAMCALL nor TDCALL
+is gated by any software enablement other than VMXON, and so will generate
+a VM-Exit instead of e.g. a native #UD when executed from the guest kernel.
+
+Note!  No unprivileged DoS of the L1 kernel is possible as TDCALL and
+SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX
+non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a
+nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can
+crash itself, but not L1.
+
+Note #2!  The Intel® Trust Domain CPU Architectural Extensions spec's
+pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit,
+but that appears to be a documentation bug (likely because the CPL > 0
+check was incorrectly bundled with other lower-priority #GP checks).
+Testing on SPR and EMR shows that the CPL > 0 check is performed before
+the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode.
+
+Note #3!  The aforementioned Trust Domain spec uses confusing pseudocode
+that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM"
+specifically means in SEAM Root Mode, i.e. in the TDX-Module.  The long-
+form description explicitly states that SEAMCALL generates an exit when
+executed in "SEAM VMX non-root operation".  But that's a moot point as the
+TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as
+documented in the "Unconditionally Blocked Instructions" section of the
+TDX-Module base specification.
+
+Cc: stable@vger.kernel.org
+Cc: Kai Huang <kai.huang@intel.com>
+Cc: Xiaoyao Li <xiaoyao.li@intel.com>
+Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Binbin Wu <binbin.wu@linux.intel.com>
+Reviewed-by: Kai Huang <kai.huang@intel.com>
+Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
+Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Link: https://lore.kernel.org/r/20251016182148.69085-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/uapi/asm/vmx.h |    1 +
+ arch/x86/kvm/vmx/nested.c       |    8 ++++++++
+ arch/x86/kvm/vmx/vmx.c          |    8 ++++++++
+ 3 files changed, 17 insertions(+)
+
+--- a/arch/x86/include/uapi/asm/vmx.h
++++ b/arch/x86/include/uapi/asm/vmx.h
+@@ -93,6 +93,7 @@
+ #define EXIT_REASON_TPAUSE              68
+ #define EXIT_REASON_BUS_LOCK            74
+ #define EXIT_REASON_NOTIFY              75
++#define EXIT_REASON_SEAMCALL            76
+ #define EXIT_REASON_TDCALL              77
+ #define EXIT_REASON_MSR_READ_IMM        84
+ #define EXIT_REASON_MSR_WRITE_IMM       85
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6587,6 +6587,14 @@ static bool nested_vmx_l1_wants_exit(str
+       case EXIT_REASON_NOTIFY:
+               /* Notify VM exit is not exposed to L1 */
+               return false;
++      case EXIT_REASON_SEAMCALL:
++      case EXIT_REASON_TDCALL:
++              /*
++               * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
++               * virtualized by KVM for L1 hypervisors, i.e. L1 should
++               * never want or expect such an exit.
++               */
++              return false;
+       default:
+               return true;
+       }
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -5953,6 +5953,12 @@ static int handle_vmx_instruction(struct
+       return 1;
+ }
+ 
++static int handle_tdx_instruction(struct kvm_vcpu *vcpu)
++{
++      kvm_queue_exception(vcpu, UD_VECTOR);
++      return 1;
++}
++
+ #ifndef CONFIG_X86_SGX_KVM
+ static int handle_encls(struct kvm_vcpu *vcpu)
+ {
+@@ -6078,6 +6084,8 @@ static int (*kvm_vmx_exit_handlers[])(st
+       [EXIT_REASON_ENCLS]                   = handle_encls,
+       [EXIT_REASON_BUS_LOCK]                = handle_bus_lock_vmexit,
+       [EXIT_REASON_NOTIFY]                  = handle_notify,
++      [EXIT_REASON_SEAMCALL]                = handle_tdx_instruction,
++      [EXIT_REASON_TDCALL]                  = handle_tdx_instruction,
+       [EXIT_REASON_MSR_READ_IMM]            = handle_rdmsr_imm,
+       [EXIT_REASON_MSR_WRITE_IMM]           = handle_wrmsr_imm,
+ };
diff --git a/queue-6.17/kvm-x86-add-support-for-rdmsr-wrmsrns-w-immediate-on-intel.patch b/queue-6.17/kvm-x86-add-support-for-rdmsr-wrmsrns-w-immediate-on-intel.patch

new file mode 100644 (file)

index 0000000..a94403b
--- /dev/null
+++ b/queue-6.17/kvm-x86-add-support-for-rdmsr-wrmsrns-w-immediate-on-intel.patch
@@ -0,0 +1,310 @@
+From stable+bounces-195397-greg=kroah.com@vger.kernel.org Thu Nov 20 20:07:25 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Nov 2025 14:07:06 -0500
+Subject: KVM: x86: Add support for RDMSR/WRMSRNS w/ immediate on Intel
+To: stable@vger.kernel.org
+Cc: Xin Li <xin@zytor.com>, Sean Christopherson <seanjc@google.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251120190708.2275081-2-sashal@kernel.org>
+
+From: Xin Li <xin@zytor.com>
+
+[ Upstream commit 885df2d2109a60f84d84639ce6d95a91045f6c45 ]
+
+Add support for the immediate forms of RDMSR and WRMSRNS (currently
+Intel-only).  The immediate variants are only valid in 64-bit mode, and
+use a single general purpose register for the data (the register is also
+encoded in the instruction, i.e. not implicit like regular RDMSR/WRMSR).
+
+The immediate variants are primarily motivated by performance, not code
+size: by having the MSR index in an immediate, it is available *much*
+earlier in the CPU pipeline, which allows hardware much more leeway about
+how a particular MSR is handled.
+
+Intel VMX support for the immediate forms of MSR accesses communicates
+exit information to the host as follows:
+
+  1) The immediate form of RDMSR uses VM-Exit Reason 84.
+
+  2) The immediate form of WRMSRNS uses VM-Exit Reason 85.
+
+  3) For both VM-Exit reasons 84 and 85, the Exit Qualification field is
+     set to the MSR index that triggered the VM-Exit.
+
+  4) Bits 3 ~ 6 of the VM-Exit Instruction Information field are set to
+     the register encoding used by the immediate form of the instruction,
+     i.e. the destination register for RDMSR, and the source for WRMSRNS.
+
+  5) The VM-Exit Instruction Length field records the size of the
+     immediate form of the MSR instruction.
+
+To deal with userspace RDMSR exits, stash the destination register in a
+new kvm_vcpu_arch field, similar to cui_linear_rip, pio, etc.
+Alternatively, the register could be saved in kvm_run.msr or re-retrieved
+from the VMCS, but the former would require sanitizing the value to ensure
+userspace doesn't clobber the value to an out-of-bounds index, and the
+latter would require a new one-off kvm_x86_ops hook.
+
+Don't bother adding support for the instructions in KVM's emulator, as the
+only way for RDMSR/WRMSR to be encountered is if KVM is emulating large
+swaths of code due to invalid guest state, and a vCPU cannot have invalid
+guest state while in 64-bit mode.
+
+Signed-off-by: Xin Li (Intel) <xin@zytor.com>
+[sean: minor tweaks, massage and expand changelog]
+Link: https://lore.kernel.org/r/20250805202224.1475590-5-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Stable-dep-of: 9d7dfb95da2c ("KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    3 ++
+ arch/x86/include/uapi/asm/vmx.h |    6 +++-
+ arch/x86/kvm/vmx/nested.c       |   13 ++++++++-
+ arch/x86/kvm/vmx/vmx.c          |   21 +++++++++++++++
+ arch/x86/kvm/vmx/vmx.h          |    5 +++
+ arch/x86/kvm/x86.c              |   55 ++++++++++++++++++++++++++++++++--------
+ 6 files changed, 90 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -926,6 +926,7 @@ struct kvm_vcpu_arch {
+       bool emulate_regs_need_sync_from_vcpu;
+       int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
+       unsigned long cui_linear_rip;
++      int cui_rdmsr_imm_reg;
+ 
+       gpa_t time;
+       s8  pvclock_tsc_shift;
+@@ -2155,7 +2156,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu,
+ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
++int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
++int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
+ int kvm_emulate_invd(struct kvm_vcpu *vcpu);
+ int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
+--- a/arch/x86/include/uapi/asm/vmx.h
++++ b/arch/x86/include/uapi/asm/vmx.h
+@@ -94,6 +94,8 @@
+ #define EXIT_REASON_BUS_LOCK            74
+ #define EXIT_REASON_NOTIFY              75
+ #define EXIT_REASON_TDCALL              77
++#define EXIT_REASON_MSR_READ_IMM        84
++#define EXIT_REASON_MSR_WRITE_IMM       85
+ 
+ #define VMX_EXIT_REASONS \
+       { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
+@@ -158,7 +160,9 @@
+       { EXIT_REASON_TPAUSE,                "TPAUSE" }, \
+       { EXIT_REASON_BUS_LOCK,              "BUS_LOCK" }, \
+       { EXIT_REASON_NOTIFY,                "NOTIFY" }, \
+-      { EXIT_REASON_TDCALL,                "TDCALL" }
++      { EXIT_REASON_TDCALL,                "TDCALL" }, \
++      { EXIT_REASON_MSR_READ_IMM,          "MSR_READ_IMM" }, \
++      { EXIT_REASON_MSR_WRITE_IMM,         "MSR_WRITE_IMM" }
+ 
+ #define VMX_EXIT_REASON_FLAGS \
+       { VMX_EXIT_REASONS_FAILED_VMENTRY,      "FAILED_VMENTRY" }
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6216,19 +6216,26 @@ static bool nested_vmx_exit_handled_msr(
+                                       struct vmcs12 *vmcs12,
+                                       union vmx_exit_reason exit_reason)
+ {
+-      u32 msr_index = kvm_rcx_read(vcpu);
++      u32 msr_index;
+       gpa_t bitmap;
+ 
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+               return true;
+ 
++      if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM ||
++          exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
++              msr_index = vmx_get_exit_qual(vcpu);
++      else
++              msr_index = kvm_rcx_read(vcpu);
++
+       /*
+        * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
+        * for the four combinations of read/write and low/high MSR numbers.
+        * First we need to figure out which of the four to use:
+        */
+       bitmap = vmcs12->msr_bitmap;
+-      if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
++      if (exit_reason.basic == EXIT_REASON_MSR_WRITE ||
++          exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+               bitmap += 2048;
+       if (msr_index >= 0xc0000000) {
+               msr_index -= 0xc0000000;
+@@ -6527,6 +6534,8 @@ static bool nested_vmx_l1_wants_exit(str
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
+       case EXIT_REASON_MSR_READ:
+       case EXIT_REASON_MSR_WRITE:
++      case EXIT_REASON_MSR_READ_IMM:
++      case EXIT_REASON_MSR_WRITE_IMM:
+               return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
+       case EXIT_REASON_INVALID_STATE:
+               return true;
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6003,6 +6003,23 @@ static int handle_notify(struct kvm_vcpu
+       return 1;
+ }
+ 
++static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu)
++{
++      return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO));
++}
++
++static int handle_rdmsr_imm(struct kvm_vcpu *vcpu)
++{
++      return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
++                                   vmx_get_msr_imm_reg(vcpu));
++}
++
++static int handle_wrmsr_imm(struct kvm_vcpu *vcpu)
++{
++      return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
++                                   vmx_get_msr_imm_reg(vcpu));
++}
++
+ /*
+  * The exit handlers return 1 if the exit was handled fully and guest execution
+  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
+@@ -6061,6 +6078,8 @@ static int (*kvm_vmx_exit_handlers[])(st
+       [EXIT_REASON_ENCLS]                   = handle_encls,
+       [EXIT_REASON_BUS_LOCK]                = handle_bus_lock_vmexit,
+       [EXIT_REASON_NOTIFY]                  = handle_notify,
++      [EXIT_REASON_MSR_READ_IMM]            = handle_rdmsr_imm,
++      [EXIT_REASON_MSR_WRITE_IMM]           = handle_wrmsr_imm,
+ };
+ 
+ static const int kvm_vmx_max_exit_handlers =
+@@ -6495,6 +6514,8 @@ static int __vmx_handle_exit(struct kvm_
+ #ifdef CONFIG_MITIGATION_RETPOLINE
+       if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
+               return kvm_emulate_wrmsr(vcpu);
++      else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
++              return handle_wrmsr_imm(vcpu);
+       else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
+               return handle_preemption_timer(vcpu);
+       else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -706,6 +706,11 @@ static inline bool vmx_guest_state_valid
+ 
+ void dump_vmcs(struct kvm_vcpu *vcpu);
+ 
++static inline int vmx_get_instr_info_reg(u32 vmx_instr_info)
++{
++      return (vmx_instr_info >> 3) & 0xf;
++}
++
+ static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
+ {
+       return (vmx_instr_info >> 28) & 0xf;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1997,6 +1997,15 @@ static int complete_fast_rdmsr(struct kv
+       return complete_fast_msr_access(vcpu);
+ }
+ 
++static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu)
++{
++      if (!vcpu->run->msr.error)
++              kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg,
++                                 vcpu->run->msr.data);
++
++      return complete_fast_msr_access(vcpu);
++}
++
+ static u64 kvm_msr_reason(int r)
+ {
+       switch (r) {
+@@ -2031,39 +2040,53 @@ static int kvm_msr_user_space(struct kvm
+       return 1;
+ }
+ 
+-int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
++static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg,
++                             int (*complete_rdmsr)(struct kvm_vcpu *))
+ {
+-      u32 msr = kvm_rcx_read(vcpu);
+       u64 data;
+       int r;
+ 
+       r = kvm_get_msr_with_filter(vcpu, msr, &data);
+-
+       if (!r) {
+               trace_kvm_msr_read(msr, data);
+ 
+-              kvm_rax_write(vcpu, data & -1u);
+-              kvm_rdx_write(vcpu, (data >> 32) & -1u);
++              if (reg < 0) {
++                      kvm_rax_write(vcpu, data & -1u);
++                      kvm_rdx_write(vcpu, (data >> 32) & -1u);
++              } else {
++                      kvm_register_write(vcpu, reg, data);
++              }
+       } else {
+               /* MSR read failed? See if we should ask user space */
+               if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
+-                                     complete_fast_rdmsr, r))
++                                     complete_rdmsr, r))
+                       return 0;
+               trace_kvm_msr_read_ex(msr);
+       }
+ 
+       return kvm_x86_call(complete_emulated_msr)(vcpu, r);
+ }
++
++int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
++{
++      return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1,
++                                 complete_fast_rdmsr);
++}
+ EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
+ 
+-int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
++int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
++{
++      vcpu->arch.cui_rdmsr_imm_reg = reg;
++
++      return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm);
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm);
++
++static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+ {
+-      u32 msr = kvm_rcx_read(vcpu);
+-      u64 data = kvm_read_edx_eax(vcpu);
+       int r;
+ 
+       r = kvm_set_msr_with_filter(vcpu, msr, data);
+-
+       if (!r) {
+               trace_kvm_msr_write(msr, data);
+       } else {
+@@ -2079,8 +2102,20 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *v
+ 
+       return kvm_x86_call(complete_emulated_msr)(vcpu, r);
+ }
++
++int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
++{
++      return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu),
++                                 kvm_read_edx_eax(vcpu));
++}
+ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+ 
++int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
++{
++      return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm);
++
+ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
+ {
+       return kvm_skip_emulated_instruction(vcpu);
diff --git a/queue-6.17/kvm-x86-rename-local-ecx-variables-to-msr-and-pmc-as-appropriate.patch b/queue-6.17/kvm-x86-rename-local-ecx-variables-to-msr-and-pmc-as-appropriate.patch

new file mode 100644 (file)

index 0000000..942ad6e
--- /dev/null
+++ b/queue-6.17/kvm-x86-rename-local-ecx-variables-to-msr-and-pmc-as-appropriate.patch
@@ -0,0 +1,101 @@
+From stable+bounces-195396-greg=kroah.com@vger.kernel.org Thu Nov 20 20:07:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Nov 2025 14:07:05 -0500
+Subject: KVM: x86: Rename local "ecx" variables to "msr" and "pmc" as appropriate
+To: stable@vger.kernel.org
+Cc: Sean Christopherson <seanjc@google.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251120190708.2275081-1-sashal@kernel.org>
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit ec400f6c2f2703cb6c698dd00b28cfdb8ee5cdcc ]
+
+Rename "ecx" variables in {RD,WR}MSR and RDPMC helpers to "msr" and "pmc"
+respectively, in anticipation of adding support for the immediate variants
+of RDMSR and WRMSRNS, and to better document what the variables hold
+(versus where the data originated).
+
+No functional change intended.
+
+Link: https://lore.kernel.org/r/20250805202224.1475590-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Stable-dep-of: 9d7dfb95da2c ("KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1579,10 +1579,10 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
+ 
+ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
+ {
+-      u32 ecx = kvm_rcx_read(vcpu);
++      u32 pmc = kvm_rcx_read(vcpu);
+       u64 data;
+ 
+-      if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
++      if (kvm_pmu_rdpmc(vcpu, pmc, &data)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+@@ -2033,23 +2033,23 @@ static int kvm_msr_user_space(struct kvm
+ 
+ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+ {
+-      u32 ecx = kvm_rcx_read(vcpu);
++      u32 msr = kvm_rcx_read(vcpu);
+       u64 data;
+       int r;
+ 
+-      r = kvm_get_msr_with_filter(vcpu, ecx, &data);
++      r = kvm_get_msr_with_filter(vcpu, msr, &data);
+ 
+       if (!r) {
+-              trace_kvm_msr_read(ecx, data);
++              trace_kvm_msr_read(msr, data);
+ 
+               kvm_rax_write(vcpu, data & -1u);
+               kvm_rdx_write(vcpu, (data >> 32) & -1u);
+       } else {
+               /* MSR read failed? See if we should ask user space */
+-              if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
++              if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
+                                      complete_fast_rdmsr, r))
+                       return 0;
+-              trace_kvm_msr_read_ex(ecx);
++              trace_kvm_msr_read_ex(msr);
+       }
+ 
+       return kvm_x86_call(complete_emulated_msr)(vcpu, r);
+@@ -2058,23 +2058,23 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
+ 
+ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+ {
+-      u32 ecx = kvm_rcx_read(vcpu);
++      u32 msr = kvm_rcx_read(vcpu);
+       u64 data = kvm_read_edx_eax(vcpu);
+       int r;
+ 
+-      r = kvm_set_msr_with_filter(vcpu, ecx, data);
++      r = kvm_set_msr_with_filter(vcpu, msr, data);
+ 
+       if (!r) {
+-              trace_kvm_msr_write(ecx, data);
++              trace_kvm_msr_write(msr, data);
+       } else {
+               /* MSR write failed? See if we should ask user space */
+-              if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
++              if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data,
+                                      complete_fast_msr_access, r))
+                       return 0;
+               /* Signal all other negative errors to userspace */
+               if (r < 0)
+                       return r;
+-              trace_kvm_msr_write_ex(ecx, data);
++              trace_kvm_msr_write_ex(msr, data);
+       }
+ 
+       return kvm_x86_call(complete_emulated_msr)(vcpu, r);
diff --git a/queue-6.17/mm-huge_memory-do-not-change-split_huge_page-target-order-silently.patch b/queue-6.17/mm-huge_memory-do-not-change-split_huge_page-target-order-silently.patch

new file mode 100644 (file)

index 0000000..0d3398e
--- /dev/null
+++ b/queue-6.17/mm-huge_memory-do-not-change-split_huge_page-target-order-silently.patch
@@ -0,0 +1,200 @@
+From 77008e1b2ef73249bceb078a321a3ff6bc087afb Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Thu, 16 Oct 2025 21:36:30 -0400
+Subject: mm/huge_memory: do not change split_huge_page*() target order silently
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit 77008e1b2ef73249bceb078a321a3ff6bc087afb upstream.
+
+Page cache folios from a file system that support large block size (LBS)
+can have minimal folio order greater than 0, thus a high order folio might
+not be able to be split down to order-0.  Commit e220917fa507 ("mm: split
+a folio in minimum folio order chunks") bumps the target order of
+split_huge_page*() to the minimum allowed order when splitting a LBS
+folio.  This causes confusion for some split_huge_page*() callers like
+memory failure handling code, since they expect after-split folios all
+have order-0 when split succeeds but in reality get min_order_for_split()
+order folios and give warnings.
+
+Fix it by failing a split if the folio cannot be split to the target
+order.  Rename try_folio_split() to try_folio_split_to_order() to reflect
+the added new_order parameter.  Remove its unused list parameter.
+
+[The test poisons LBS folios, which cannot be split to order-0 folios, and
+also tries to poison all memory.  The non split LBS folios take more
+memory than the test anticipated, leading to OOM.  The patch fixed the
+kernel warning and the test needs some change to avoid OOM.]
+
+Link: https://lkml.kernel.org/r/20251017013630.139907-1-ziy@nvidia.com
+Fixes: e220917fa507 ("mm: split a folio in minimum folio order chunks")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Reported-by: syzbot+e6367ea2fdab6ed46056@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/68d2c943.a70a0220.1b52b.02b3.GAE@google.com/
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
+Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jane Chu <jane.chu@oracle.com>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Mariano Pache <npache@redhat.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h |   55 ++++++++++++++++++++----------------------------
+ mm/huge_memory.c        |    9 -------
+ mm/truncate.c           |    6 +++--
+ 3 files changed, 28 insertions(+), 42 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -354,45 +354,30 @@ bool non_uniform_split_supported(struct
+ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
+               struct list_head *list);
+ /*
+- * try_folio_split - try to split a @folio at @page using non uniform split.
++ * try_folio_split_to_order - try to split a @folio at @page to @new_order using
++ * non uniform split.
+  * @folio: folio to be split
+- * @page: split to order-0 at the given page
+- * @list: store the after-split folios
++ * @page: split to @new_order at the given page
++ * @new_order: the target split order
+  *
+- * Try to split a @folio at @page using non uniform split to order-0, if
+- * non uniform split is not supported, fall back to uniform split.
++ * Try to split a @folio at @page using non uniform split to @new_order, if
++ * non uniform split is not supported, fall back to uniform split. After-split
++ * folios are put back to LRU list. Use min_order_for_split() to get the lower
++ * bound of @new_order.
+  *
+  * Return: 0: split is successful, otherwise split failed.
+  */
+-static inline int try_folio_split(struct folio *folio, struct page *page,
+-              struct list_head *list)
++static inline int try_folio_split_to_order(struct folio *folio,
++              struct page *page, unsigned int new_order)
+ {
+-      int ret = min_order_for_split(folio);
+-
+-      if (ret < 0)
+-              return ret;
+-
+-      if (!non_uniform_split_supported(folio, 0, false))
+-              return split_huge_page_to_list_to_order(&folio->page, list,
+-                              ret);
+-      return folio_split(folio, ret, page, list);
++      if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
++              return split_huge_page_to_list_to_order(&folio->page, NULL,
++                              new_order);
++      return folio_split(folio, new_order, page, NULL);
+ }
+ static inline int split_huge_page(struct page *page)
+ {
+-      struct folio *folio = page_folio(page);
+-      int ret = min_order_for_split(folio);
+-
+-      if (ret < 0)
+-              return ret;
+-
+-      /*
+-       * split_huge_page() locks the page before splitting and
+-       * expects the same page that has been split to be locked when
+-       * returned. split_folio(page_folio(page)) cannot be used here
+-       * because it converts the page to folio and passes the head
+-       * page to be split.
+-       */
+-      return split_huge_page_to_list_to_order(page, NULL, ret);
++      return split_huge_page_to_list_to_order(page, NULL, 0);
+ }
+ void deferred_split_folio(struct folio *folio, bool partially_mapped);
+ 
+@@ -560,13 +545,19 @@ static inline int split_huge_page(struct
+       return 0;
+ }
+ 
++static inline int min_order_for_split(struct folio *folio)
++{
++      VM_WARN_ON_ONCE_FOLIO(1, folio);
++      return -EINVAL;
++}
++
+ static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
+ {
+       return 0;
+ }
+ 
+-static inline int try_folio_split(struct folio *folio, struct page *page,
+-              struct list_head *list)
++static inline int try_folio_split_to_order(struct folio *folio,
++              struct page *page, unsigned int new_order)
+ {
+       return 0;
+ }
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -3680,8 +3680,6 @@ static int __folio_split(struct folio *f
+ 
+               min_order = mapping_min_folio_order(folio->mapping);
+               if (new_order < min_order) {
+-                      VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+-                                   min_order);
+                       ret = -EINVAL;
+                       goto out;
+               }
+@@ -4016,12 +4014,7 @@ int min_order_for_split(struct folio *fo
+ 
+ int split_folio_to_list(struct folio *folio, struct list_head *list)
+ {
+-      int ret = min_order_for_split(folio);
+-
+-      if (ret < 0)
+-              return ret;
+-
+-      return split_huge_page_to_list_to_order(&folio->page, list, ret);
++      return split_huge_page_to_list_to_order(&folio->page, list, 0);
+ }
+ 
+ /*
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -194,6 +194,7 @@ bool truncate_inode_partial_folio(struct
+       size_t size = folio_size(folio);
+       unsigned int offset, length;
+       struct page *split_at, *split_at2;
++      unsigned int min_order;
+ 
+       if (pos < start)
+               offset = start - pos;
+@@ -223,8 +224,9 @@ bool truncate_inode_partial_folio(struct
+       if (!folio_test_large(folio))
+               return true;
+ 
++      min_order = mapping_min_folio_order(folio->mapping);
+       split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
+-      if (!try_folio_split(folio, split_at, NULL)) {
++      if (!try_folio_split_to_order(folio, split_at, min_order)) {
+               /*
+                * try to split at offset + length to make sure folios within
+                * the range can be dropped, especially to avoid memory waste
+@@ -254,7 +256,7 @@ bool truncate_inode_partial_folio(struct
+                */
+               if (folio_test_large(folio2) &&
+                   folio2->mapping == folio->mapping)
+-                      try_folio_split(folio2, split_at2, NULL);
++                      try_folio_split_to_order(folio2, split_at2, min_order);
+ 
+               folio_unlock(folio2);
+ out:
diff --git a/queue-6.17/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch b/queue-6.17/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch

new file mode 100644 (file)

index 0000000..de54e7d
--- /dev/null
+++ b/queue-6.17/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
@@ -0,0 +1,182 @@
+From 74207de2ba10c2973334906822dc94d2e859ffc5 Mon Sep 17 00:00:00 2001
+From: Kiryl Shutsemau <kas@kernel.org>
+Date: Mon, 27 Oct 2025 11:56:35 +0000
+Subject: mm/memory: do not populate page table entries beyond i_size
+
+From: Kiryl Shutsemau <kas@kernel.org>
+
+commit 74207de2ba10c2973334906822dc94d2e859ffc5 upstream.
+
+Patch series "Fix SIGBUS semantics with large folios", v3.
+
+Accessing memory within a VMA, but beyond i_size rounded up to the next
+page size, is supposed to generate SIGBUS.
+
+Darrick reported[1] an xfstests regression in v6.18-rc1.  generic/749
+failed due to missing SIGBUS.  This was caused by my recent changes that
+try to fault in the whole folio where possible:
+
+        19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
+        357b92761d94 ("mm/filemap: map entire large folio faultaround")
+
+These changes did not consider i_size when setting up PTEs, leading to
+xfstest breakage.
+
+However, the problem has been present in the kernel for a long time -
+since huge tmpfs was introduced in 2016.  The kernel happily maps
+PMD-sized folios as PMD without checking i_size.  And huge=always tmpfs
+allocates PMD-size folios on any writes.
+
+I considered this corner case when I implemented a large tmpfs, and my
+conclusion was that no one in their right mind should rely on receiving a
+SIGBUS signal when accessing beyond i_size.  I cannot imagine how it could
+be useful for the workload.
+
+But apparently filesystem folks care a lot about preserving strict SIGBUS
+semantics.
+
+Generic/749 was introduced last year with reference to POSIX, but no real
+workloads were mentioned.  It also acknowledged the tmpfs deviation from
+the test case.
+
+POSIX indeed says[3]:
+
+        References within the address range starting at pa and
+        continuing for len bytes to whole pages following the end of an
+        object shall result in delivery of a SIGBUS signal.
+
+The patchset fixes the regression introduced by recent changes as well as
+more subtle SIGBUS breakage due to split failure on truncation.
+
+
+This patch (of 2):
+
+Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
+supposed to generate SIGBUS.
+
+Recent changes attempted to fault in full folio where possible.  They did
+not respect i_size, which led to populating PTEs beyond i_size and
+breaking SIGBUS semantics.
+
+Darrick reported generic/749 breakage because of this.
+
+However, the problem existed before the recent changes.  With huge=always
+tmpfs, any write to a file leads to PMD-size allocation.  Following the
+fault-in of the folio will install PMD mapping regardless of i_size.
+
+Fix filemap_map_pages() and finish_fault() to not install:
+  - PTEs beyond i_size;
+  - PMD mappings across i_size;
+
+Make an exception for shmem/tmpfs that for long time intentionally
+mapped with PMDs across i_size.
+
+Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
+Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Fixes: 6795801366da ("xfs: Support large folios")
+Reported-by: "Darrick J. Wong" <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |   20 +++++++++++++++-----
+ mm/memory.c  |   20 +++++++++++++++++++-
+ 2 files changed, 34 insertions(+), 6 deletions(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3743,13 +3743,27 @@ vm_fault_t filemap_map_pages(struct vm_f
+       unsigned long rss = 0;
+       unsigned int nr_pages = 0, folio_type;
+       unsigned short mmap_miss = 0, mmap_miss_saved;
++      bool can_map_large;
+ 
+       rcu_read_lock();
+       folio = next_uptodate_folio(&xas, mapping, end_pgoff);
+       if (!folio)
+               goto out;
+ 
+-      if (filemap_map_pmd(vmf, folio, start_pgoff)) {
++      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
++      end_pgoff = min(end_pgoff, file_end);
++
++      /*
++       * Do not allow to map with PTEs beyond i_size and with PMD
++       * across i_size to preserve SIGBUS semantics.
++       *
++       * Make an exception for shmem/tmpfs that for long time
++       * intentionally mapped with PMDs across i_size.
++       */
++      can_map_large = shmem_mapping(mapping) ||
++              file_end >= folio_next_index(folio);
++
++      if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
+               ret = VM_FAULT_NOPAGE;
+               goto out;
+       }
+@@ -3762,10 +3776,6 @@ vm_fault_t filemap_map_pages(struct vm_f
+               goto out;
+       }
+ 
+-      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+-      if (end_pgoff > file_end)
+-              end_pgoff = file_end;
+-
+       folio_type = mm_counter_file(folio);
+       do {
+               unsigned long end;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -65,6 +65,7 @@
+ #include <linux/gfp.h>
+ #include <linux/migrate.h>
+ #include <linux/string.h>
++#include <linux/shmem_fs.h>
+ #include <linux/memory-tiers.h>
+ #include <linux/debugfs.h>
+ #include <linux/userfaultfd_k.h>
+@@ -5371,8 +5372,25 @@ fallback:
+                       return ret;
+       }
+ 
++      if (!needs_fallback && vma->vm_file) {
++              struct address_space *mapping = vma->vm_file->f_mapping;
++              pgoff_t file_end;
++
++              file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
++
++              /*
++               * Do not allow to map with PTEs beyond i_size and with PMD
++               * across i_size to preserve SIGBUS semantics.
++               *
++               * Make an exception for shmem/tmpfs that for long time
++               * intentionally mapped with PMDs across i_size.
++               */
++              needs_fallback = !shmem_mapping(mapping) &&
++                      file_end < folio_next_index(folio);
++      }
++
+       if (pmd_none(*vmf->pmd)) {
+-              if (folio_test_pmd_mappable(folio)) {
++              if (!needs_fallback && folio_test_pmd_mappable(folio)) {
+                       ret = do_set_pmd(vmf, folio, page);
+                       if (ret != VM_FAULT_FALLBACK)
+                               return ret;
diff --git a/queue-6.17/scripts-decode_stacktrace.sh-fix-build-id-and-pc-source-parsing.patch b/queue-6.17/scripts-decode_stacktrace.sh-fix-build-id-and-pc-source-parsing.patch

new file mode 100644 (file)

index 0000000..6b4aaa4
--- /dev/null
+++ b/queue-6.17/scripts-decode_stacktrace.sh-fix-build-id-and-pc-source-parsing.patch
@@ -0,0 +1,74 @@
+From 7d9f7d390f6af3a29614e81e802e2b9c238eb7b2 Mon Sep 17 00:00:00 2001
+From: Carlos Llamas <cmllamas@google.com>
+Date: Thu, 30 Oct 2025 01:03:33 +0000
+Subject: scripts/decode_stacktrace.sh: fix build ID and PC source parsing
+
+From: Carlos Llamas <cmllamas@google.com>
+
+commit 7d9f7d390f6af3a29614e81e802e2b9c238eb7b2 upstream.
+
+Support for parsing PC source info in stacktraces (e.g.  '(P)') was added
+in commit 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of
+lines with an additional info").  However, this logic was placed after the
+build ID processing.  This incorrect order fails to parse lines containing
+both elements, e.g.:
+
+  drm_gem_mmap_obj+0x114/0x200 [drm 03d0564e0529947d67bb2008c3548be77279fd27] (P)
+
+This patch fixes the problem by extracting the PC source info first and
+then processing the module build ID.  With this change, the line above is
+now properly parsed as such:
+
+  drm_gem_mmap_obj (./include/linux/mmap_lock.h:212 ./include/linux/mm.h:811 drivers/gpu/drm/drm_gem.c:1177) drm (P)
+
+While here, also add a brief explanation the build ID section.
+
+Link: https://lkml.kernel.org/r/20251030010347.2731925-1-cmllamas@google.com
+Fixes: 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info")
+Signed-off-by: Carlos Llamas <cmllamas@google.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
+Cc: Breno Leitao <leitao@debian.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Marc Rutland <mark.rutland@arm.com>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Matthieu Baerts <matttbe@kernel.org>
+Cc: Miroslav Benes <mbenes@suse.cz>
+Cc: Puranjay Mohan <puranjay@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/decode_stacktrace.sh |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/scripts/decode_stacktrace.sh
++++ b/scripts/decode_stacktrace.sh
+@@ -275,12 +275,6 @@ handle_line() {
+               fi
+       done
+ 
+-      if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+-              words[$last-1]="${words[$last-1]} ${words[$last]}"
+-              unset words[$last] spaces[$last]
+-              last=$(( $last - 1 ))
+-      fi
+-
+       # Extract info after the symbol if present. E.g.:
+       # func_name+0x54/0x80 (P)
+       #                     ^^^
+@@ -292,6 +286,14 @@ handle_line() {
+               unset words[$last] spaces[$last]
+               last=$(( $last - 1 ))
+       fi
++
++      # Join module name with its build id if present, as these were
++      # split during tokenization (e.g. "[module" and "modbuildid]").
++      if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
++              words[$last-1]="${words[$last-1]} ${words[$last]}"
++              unset words[$last] spaces[$last]
++              last=$(( $last - 1 ))
++      fi
+ 
+       if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
+               module=${words[$last]}
diff --git a/queue-6.17/scripts-decode_stacktrace.sh-symbol-avoid-trailing-whitespaces.patch b/queue-6.17/scripts-decode_stacktrace.sh-symbol-avoid-trailing-whitespaces.patch

new file mode 100644 (file)

index 0000000..a101161
--- /dev/null
+++ b/queue-6.17/scripts-decode_stacktrace.sh-symbol-avoid-trailing-whitespaces.patch
@@ -0,0 +1,56 @@
+From d322f6a24ee5964a58294f61bf96a1b6404c676d Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Mon, 8 Sep 2025 17:41:57 +0200
+Subject: scripts/decode_stacktrace.sh: symbol: avoid trailing whitespaces
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit d322f6a24ee5964a58294f61bf96a1b6404c676d upstream.
+
+A few patches slightly improving the output generated by
+decode_stacktrace.sh.
+
+
+This patch (of 3):
+
+Lines having a symbol to decode might not always have info after this
+symbol.  It means ${info_str} might not be set, but it will always be
+printed after a space, causing trailing whitespaces.
+
+That's a detail, but when the output is opened with an editor marking
+these trailing whitespaces, that's a bit disturbing.  It is easy to remove
+them by printing this variable with a space only if it is set.
+
+While at it, do the same with ${module} and print everything in one line.
+
+Link: https://lkml.kernel.org/r/20250908-decode_strace_indent-v1-0-28e5e4758080@kernel.org
+Link: https://lkml.kernel.org/r/20250908-decode_strace_indent-v1-1-28e5e4758080@kernel.org
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Carlos Llamas <cmllamas@google.com>
+Reviewed-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
+Cc: Carlos Llamas <cmllamas@google.com>
+Cc: Elliot Berman <quic_eberman@quicinc.com>
+Cc: Stephen Boyd <swboyd@chromium.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/decode_stacktrace.sh |    7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+--- a/scripts/decode_stacktrace.sh
++++ b/scripts/decode_stacktrace.sh
+@@ -323,12 +323,7 @@ handle_line() {
+       parse_symbol # modifies $symbol
+ 
+       # Add up the line number to the symbol
+-      if [[ -z ${module} ]]
+-      then
+-              echo "${words[@]}" "$symbol ${info_str}"
+-      else
+-              echo "${words[@]}" "$symbol $module ${info_str}"
+-      fi
++      echo "${words[@]}" "${symbol}${module:+ ${module}}${info_str:+ ${info_str}}"
+ }
+ 
+ while read line; do
diff --git a/queue-6.17/scripts-decode_stacktrace.sh-symbol-preserve-alignment.patch b/queue-6.17/scripts-decode_stacktrace.sh-symbol-preserve-alignment.patch

new file mode 100644 (file)

index 0000000..b7f50a9
--- /dev/null
+++ b/queue-6.17/scripts-decode_stacktrace.sh-symbol-preserve-alignment.patch
@@ -0,0 +1,148 @@
+From 4a2fc4897b5e0ca1e7a3cb4e32f44c7db3367dee Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Mon, 8 Sep 2025 17:41:58 +0200
+Subject: scripts/decode_stacktrace.sh: symbol: preserve alignment
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 4a2fc4897b5e0ca1e7a3cb4e32f44c7db3367dee upstream.
+
+With lines having a symbol to decode, the script was only trying to
+preserve the alignment for the timestamps, but not the rest, nor when the
+caller was set (CONFIG_PRINTK_CALLER=y).
+
+With this sample ...
+
+  [   52.080924] Call Trace:
+  [   52.080926]  <TASK>
+  [   52.080931]  dump_stack_lvl+0x6f/0xb0
+
+... the script was producing the following output:
+
+  [   52.080924] Call Trace:
+  [   52.080926]  <TASK>
+  [   52.080931] dump_stack_lvl (arch/x86/include/asm/irqflags.h:19)
+
+  (dump_stack_lvl is no longer aligned with <TASK>: one missing space)
+
+With this other sample ...
+
+  [   52.080924][   T48] Call Trace:
+  [   52.080926][   T48]  <TASK>
+  [   52.080931][   T48]  dump_stack_lvl+0x6f/0xb0
+
+... the script was producing the following output:
+
+  [   52.080924][   T48] Call Trace:
+  [   52.080926][   T48]  <TASK>
+  [ 52.080931][ T48] dump_stack_lvl (arch/x86/include/asm/irqflags.h:19)
+
+  (the misalignment is clearer here)
+
+That's because the script had a workaround for CONFIG_PRINTK_TIME=y only,
+see the previous comment called "Format timestamps with tabs".
+
+To always preserve spaces, they need to be recorded along the words.  That
+is what is now done with the new 'spaces' array.
+
+Some notes:
+
+- 'extglob' is needed only for this operation, and that's why it is set
+  in a dedicated subshell.
+
+- 'read' is used with '-r' not to treat a <backslash> character in any
+  special way, e.g. when followed by a space.
+
+- When a word is removed from the 'words' array, the corresponding space
+  needs to be removed from the 'spaces' array as well.
+
+With the last sample, we now have:
+
+  [   52.080924][   T48] Call Trace:
+  [   52.080926][   T48]  <TASK>
+  [   52.080931][   T48]  dump_stack_lvl (arch/x86/include/asm/irqflags.h:19)
+
+  (the alignment is preserved)
+
+Link: https://lkml.kernel.org/r/20250908-decode_strace_indent-v1-2-28e5e4758080@kernel.org
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Tested-by: Carlos Llamas <cmllamas@google.com>
+Cc: Breno Leitao <leitao@debian.org>
+Cc: Elliot Berman <quic_eberman@quicinc.com>
+Cc: Luca Ceresoli <luca.ceresoli@bootlin.com>
+Cc: Stephen Boyd <swboyd@chromium.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/decode_stacktrace.sh |   26 ++++++++++++--------------
+ 1 file changed, 12 insertions(+), 14 deletions(-)
+
+--- a/scripts/decode_stacktrace.sh
++++ b/scripts/decode_stacktrace.sh
+@@ -255,10 +255,11 @@ handle_line() {
+               basepath=${basepath%/init/main.c:*)}
+       fi
+ 
+-      local words
++      local words spaces
+ 
+-      # Tokenize
+-      read -a words <<<"$1"
++      # Tokenize: words and spaces to preserve the alignment
++      read -ra words <<<"$1"
++      IFS='#' read -ra spaces <<<"$(shopt -s extglob; echo "${1//+([^[:space:]])/#}")"
+ 
+       # Remove hex numbers. Do it ourselves until it happens in the
+       # kernel
+@@ -270,19 +271,13 @@ handle_line() {
+       for i in "${!words[@]}"; do
+               # Remove the address
+               if [[ ${words[$i]} =~ \[\<([^]]+)\>\] ]]; then
+-                      unset words[$i]
+-              fi
+-
+-              # Format timestamps with tabs
+-              if [[ ${words[$i]} == \[ && ${words[$i+1]} == *\] ]]; then
+-                      unset words[$i]
+-                      words[$i+1]=$(printf "[%13s\n" "${words[$i+1]}")
++                      unset words[$i] spaces[$i]
+               fi
+       done
+ 
+       if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+               words[$last-1]="${words[$last-1]} ${words[$last]}"
+-              unset words[$last]
++              unset words[$last] spaces[$last]
+               last=$(( $last - 1 ))
+       fi
+ 
+@@ -294,7 +289,7 @@ handle_line() {
+       local info_str=""
+       if [[ ${words[$last]} =~ \([A-Z]*\) ]]; then
+               info_str=${words[$last]}
+-              unset words[$last]
++              unset words[$last] spaces[$last]
+               last=$(( $last - 1 ))
+       fi
+ 
+@@ -311,7 +306,7 @@ handle_line() {
+                       modbuildid=
+               fi
+               symbol=${words[$last-1]}
+-              unset words[$last-1]
++              unset words[$last-1] spaces[$last-1]
+       else
+               # The symbol is the last element, process it
+               symbol=${words[$last]}
+@@ -323,7 +318,10 @@ handle_line() {
+       parse_symbol # modifies $symbol
+ 
+       # Add up the line number to the symbol
+-      echo "${words[@]}" "${symbol}${module:+ ${module}}${info_str:+ ${info_str}}"
++      for i in "${!words[@]}"; do
++              echo -n "${spaces[i]}${words[i]}"
++      done
++      echo "${spaces[$last]}${symbol}${module:+ ${module}}${info_str:+ ${info_str}}"
+ }
+ 
+ while read line; do
diff --git a/queue-6.17/series b/queue-6.17/series

index 5a61c45b248a0edc23df5a6adcf8f9139870393d..e4c2ca9e86342a84dfd54f3e1bc82c77f666ee2b 100644 (file)
--- a/queue-6.17/series
+++ b/queue-6.17/series
@@ -233,3 +233,13 @@ selftests-mptcp-join-endpoints-longer-transfer.patch
  selftests-mptcp-connect-trunc-read-all-recv-data.patch
  selftests-mptcp-join-userspace-longer-transfer.patch
  selftests-mptcp-join-properly-kill-background-tasks.patch
+mm-huge_memory-do-not-change-split_huge_page-target-order-silently.patch
+mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
+scripts-decode_stacktrace.sh-symbol-avoid-trailing-whitespaces.patch
+scripts-decode_stacktrace.sh-symbol-preserve-alignment.patch
+scripts-decode_stacktrace.sh-fix-build-id-and-pc-source-parsing.patch
+asoc-da7213-convert-to-define_runtime_dev_pm_ops.patch
+asoc-da7213-use-component-driver-suspend-resume.patch
+kvm-x86-rename-local-ecx-variables-to-msr-and-pmc-as-appropriate.patch
+kvm-x86-add-support-for-rdmsr-wrmsrns-w-immediate-on-intel.patch
+kvm-vmx-inject-ud-if-guest-tries-to-execute-seamcall-or-tdcall.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 21 Nov 2025 09:49:09 +0000 (10:49 +0100)
queue-6.17/asoc-da7213-convert-to-define_runtime_dev_pm_ops.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/asoc-da7213-use-component-driver-suspend-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/kvm-vmx-inject-ud-if-guest-tries-to-execute-seamcall-or-tdcall.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/kvm-x86-add-support-for-rdmsr-wrmsrns-w-immediate-on-intel.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/kvm-x86-rename-local-ecx-variables-to-msr-and-pmc-as-appropriate.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/mm-huge_memory-do-not-change-split_huge_page-target-order-silently.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/scripts-decode_stacktrace.sh-fix-build-id-and-pc-source-parsing.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/scripts-decode_stacktrace.sh-symbol-avoid-trailing-whitespaces.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/scripts-decode_stacktrace.sh-symbol-preserve-alignment.patch	[new file with mode: 0644]	patch \| blob
queue-6.17/series		patch \| blob \| blame \| history